├── .github └── workflows │ ├── publish.yml │ └── test.yml ├── .gitignore ├── LICENSE ├── README.md ├── google_drive_to_sqlite ├── __init__.py ├── cli.py └── utils.py ├── setup.py └── tests ├── folder-and-children.json └── test_google_drive_to_sqlite.py /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | test: 9 | runs-on: ubuntu-latest 10 | strategy: 11 | matrix: 12 | python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"] 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Set up Python ${{ matrix.python-version }} 16 | uses: actions/setup-python@v2 17 | with: 18 | python-version: ${{ matrix.python-version }} 19 | - uses: actions/cache@v2 20 | name: Configure pip caching 21 | with: 22 | path: ~/.cache/pip 23 | key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} 24 | restore-keys: | 25 | ${{ runner.os }}-pip- 26 | - name: Install dependencies 27 | run: | 28 | pip install -e '.[test]' 29 | - name: Run tests 30 | run: | 31 | pytest 32 | deploy: 33 | runs-on: ubuntu-latest 34 | needs: [test] 35 | steps: 36 | - uses: actions/checkout@v2 37 | - name: Set up Python 38 | uses: actions/setup-python@v2 39 | with: 40 | python-version: "3.10" 41 | - uses: actions/cache@v2 42 | name: Configure pip caching 43 | with: 44 | path: ~/.cache/pip 45 | key: ${{ runner.os }}-publish-pip-${{ hashFiles('**/setup.py') }} 46 | restore-keys: | 47 | ${{ runner.os }}-publish-pip- 48 | - name: Install dependencies 49 | run: | 50 | pip install setuptools wheel twine build 51 | - name: Publish 52 | env: 53 | TWINE_USERNAME: __token__ 54 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} 55 | run: | 56 | python -m build 57 | twine upload dist/* 58 | 59 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push] 4 | 5 | jobs: 6 | test: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"] 11 | steps: 12 | - uses: actions/checkout@v2 13 | - name: Set up Python ${{ matrix.python-version }} 14 | uses: actions/setup-python@v2 15 | with: 16 | python-version: ${{ matrix.python-version }} 17 | - uses: actions/cache@v2 18 | name: Configure pip caching 19 | with: 20 | path: ~/.cache/pip 21 | key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} 22 | restore-keys: | 23 | ${{ runner.os }}-pip- 24 | - name: Install dependencies 25 | run: | 26 | pip install -e '.[test]' 27 | - name: Run tests 28 | run: | 29 | pytest 30 | - name: Check if cog needs to be run 31 | run: | 32 | cog --check README.md 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | venv 6 | .eggs 7 | .pytest_cache 8 | *.egg-info 9 | .DS_Store 10 | auth.json 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # google-drive-to-sqlite 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/google-drive-to-sqlite.svg)](https://pypi.org/project/google-drive-to-sqlite/) 4 | [![Changelog](https://img.shields.io/github/v/release/simonw/google-drive-to-sqlite?include_prereleases&label=changelog)](https://github.com/simonw/google-drive-to-sqlite/releases) 5 | [![Tests](https://github.com/simonw/google-drive-to-sqlite/workflows/Test/badge.svg)](https://github.com/simonw/google-drive-to-sqlite/actions?query=workflow%3ATest) 6 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/google-drive-to-sqlite/blob/master/LICENSE) 7 | 8 | > [!WARNING] 9 | > This tool no longer works due to Google deprecating the authentication mechanism it uses. See [issue #40](https://github.com/simonw/google-drive-to-sqlite/issues/40). 10 | 11 | Create a SQLite database containing metadata from [Google Drive](https://www.google.com/drive) 12 | 13 | For background on this project, see [Google Drive to SQLite](https://simonwillison.net/2022/Feb/20/google-drive-to-sqlite/) on my blog. 14 | 15 | If you use Google Drive, and especially if you have shared drives with other people there's a good chance you have hundreds or even thousands of files that you may not be fully aware of. 16 | 17 | This tool can download metadata about those files - their names, sizes, folders, content types, permissions, creation dates and more - and store them in a SQLite database. 18 | 19 | This lets you use SQL to analyze your Google Drive contents, using [Datasette](https://datasette.io/) or the SQLite command-line tool or any other SQLite database browsing software. 20 | 21 | ## Installation 22 | 23 | Install this tool using `pip`: 24 | 25 | pip install google-drive-to-sqlite 26 | 27 | ## Quickstart 28 | 29 | Authenticate with Google Drive by running: 30 | 31 | google-drive-to-sqlite auth 32 | 33 | Now create a SQLite database with metadata about all of the files you have starred using: 34 | 35 | google-drive-to-sqlite files starred.db --starred 36 | 37 | You can explore the resulting database using [Datasette](https://datasette.io/): 38 | 39 | $ pip install datasette 40 | $ datasette starred.db 41 | INFO: Started server process [24661] 42 | INFO: Uvicorn running on http://127.0.0.1:8001 43 | 44 | ## Authentication 45 | 46 | > :warning: **This application has not yet been verified by Google** - you may find you are unable to authenticate until that verification is complete. [#10](https://github.com/simonw/google-drive-to-sqlite/issues/10) 47 | > 48 | > You can work around this issue by [creating your own OAuth client ID key](https://til.simonwillison.net/googlecloud/google-oauth-cli-application) and passing it to the `auth` command using `--google-client-id` and `--google-client-secret`. 49 | 50 | First, authenticate with Google Drive using the `auth` command: 51 | 52 | $ google-drive-to-sqlite auth 53 | Visit the following URL to authenticate with Google Drive 54 | 55 | https://accounts.google.com/o/oauth2/v2/auth?... 56 | 57 | Then return here and paste in the resulting code: 58 | Paste code here: 59 | 60 | Follow the link, sign in with Google Drive and then copy and paste the resulting code back into the tool. 61 | 62 | This will save an authentication token to the file called `auth.json` in the current directory. 63 | 64 | To specify a different location for that file, use the `--auth` option: 65 | 66 | google-drive-to-sqlite auth --auth ~/google-drive-auth.json 67 | 68 | The `auth` command also provides options for using a different scope, Google client ID and Google client secret. You can use these to create your own custom authentication tokens that can work with other Google APIs, see [issue #5](https://github.com/simonw/google-drive-to-sqlite/issues/5) for details. 69 | 70 | Full `--help`: 71 | 72 | 83 | ``` 84 | Usage: google-drive-to-sqlite auth [OPTIONS] 85 | 86 | Authenticate user and save credentials 87 | 88 | Options: 89 | -a, --auth FILE Path to save token, defaults to auth.json 90 | --google-client-id TEXT Custom Google client ID 91 | --google-client-secret TEXT Custom Google client secret 92 | --scope TEXT Custom token scope 93 | --help Show this message and exit. 94 | 95 | ``` 96 | 97 | 98 | To revoke the token that is stored in `auth.json`, such that it cannot be used to access Google Drive in the future, run the `revoke` command: 99 | 100 | google-drive-to-sqlite revoke 101 | 102 | Or if your token is stored in another location: 103 | 104 | google-drive-to-sqlite revoke -a ~/google-drive-auth.json 105 | 106 | You will need to obtain a fresh token using the `auth` command in order to continue using this tool. 107 | 108 | ## google-drive-to-sqlite files 109 | 110 | To retrieve metadata about the files in your Google Drive, or a folder or search within it, use the `google-drive-to-sqlite files` command. 111 | 112 | This will default to writing details about every file in your Google Drive to a SQLite database: 113 | 114 | google-drive-to-sqlite files files.db 115 | 116 | Files and folders will be written to databases tables, which will be created if they do not yet exist. The database schema is [shown below](#database-schema). 117 | 118 | If a file or folder already exists, based on a matching `id`, it will be replaced with fresh data. 119 | 120 | Instead of writing to SQLite you can use `--json` to output as JSON, or `--nl` to output as newline-delimited JSON: 121 | 122 | google-drive-to-sqlite files --nl 123 | 124 | Use `--folder ID` to retrieve everything in a specified folder and its sub-folders: 125 | 126 | google-drive-to-sqlite files files.db --folder 1E6Zg2X2bjjtPzVfX8YqdXZDCoB3AVA7i 127 | 128 | Use `--q QUERY` to use a [custom search query](https://developers.google.com/drive/api/v3/reference/query-ref): 129 | 130 | google-drive-to-sqlite files files.db -q "viewedByMeTime > '2022-01-01'" 131 | 132 | The following shortcut options help build queries: 133 | 134 | - `--full-text TEXT` to search for files where the full text matches a search term 135 | - `--starred` for files and folders you have starred 136 | - `--trashed` for files and folders in the trash 137 | - `--shared-with-me` for files and folders that have been shared with you 138 | - `--apps` for Google Apps documents, spreadsheets, presentations and drawings (equivalent to setting all of the next four options) 139 | - `--docs` for Google Apps documents 140 | - `--sheets` for Google Apps spreadsheets 141 | - `--presentations` for Google Apps presentations 142 | - `--drawings` for Google Apps drawings 143 | 144 | You can combine these - for example, this returns all files that you have starred and that were shared with you: 145 | 146 | google-drive-to-sqlite files highlights.db \ 147 | --starred --shared-with-me 148 | 149 | Multiple options are treated as AND, with the exception of the Google Apps options which are treated as OR - so the following would retrieve all spreadsheets and presentations that have also been starred: 150 | 151 | google-drive-to-sqlite files highlights.db \ 152 | --starred --sheets --presentations 153 | 154 | You can use `--stop-after X` to stop after retrieving X files, useful for trying out a new search pattern and seeing results straight away. 155 | 156 | The `--import-json` and `--import-nl` options are mainly useful for testing and developing this tool. They allow you to replay the JSON or newline-delimited JSON that was previously fetched using `--json` or `--nl` and use it to create a fresh SQLite database, without needing to make any outbound API calls: 157 | 158 | # Fetch all starred files from the API, write to starred.json 159 | google-drive-to-sqlite files -q 'starred = true' --json > starred.json 160 | # Now import that data into a new SQLite database file 161 | google-drive-to-sqlite files starred.db --import-json starred.json 162 | 163 | Full `--help`: 164 | 165 | 172 | ``` 173 | Usage: google-drive-to-sqlite files [OPTIONS] [DATABASE] 174 | 175 | Retrieve metadata for files in Google Drive, and write to a SQLite database or 176 | output as JSON. 177 | 178 | google-drive-to-sqlite files files.db 179 | 180 | Use --json to output JSON, --nl for newline-delimited JSON: 181 | 182 | google-drive-to-sqlite files files.db --json 183 | 184 | Use a folder ID to recursively fetch every file in that folder and its sub- 185 | folders: 186 | 187 | google-drive-to-sqlite files files.db --folder 188 | 1E6Zg2X2bjjtPzVfX8YqdXZDCoB3AVA7i 189 | 190 | Fetch files you have starred: 191 | 192 | google-drive-to-sqlite files starred.db --starred 193 | 194 | Options: 195 | -a, --auth FILE Path to auth.json token file 196 | --folder TEXT Files in this folder ID and its sub-folders 197 | -q TEXT Files matching this query 198 | --full-text TEXT Search for files with text match 199 | --starred Files you have starred 200 | --trashed Files in the trash 201 | --shared-with-me Files that have been shared with you 202 | --apps Google Apps docs, spreadsheets, presentations and 203 | drawings 204 | --docs Google Apps docs 205 | --sheets Google Apps spreadsheets 206 | --presentations Google Apps presentations 207 | --drawings Google Apps drawings 208 | --json Output JSON rather than write to DB 209 | --nl Output newline-delimited JSON rather than write to DB 210 | --stop-after INTEGER Stop paginating after X results 211 | --import-json FILE Import from this JSON file instead of the API 212 | --import-nl FILE Import from this newline-delimited JSON file 213 | -v, --verbose Send verbose output to stderr 214 | --help Show this message and exit. 215 | 216 | ``` 217 | 218 | 219 | ## google-drive-to-sqlite download FILE_ID 220 | 221 | The `download` command can be used to download files from Google Drive. 222 | 223 | You'll need one or more file IDs, which look something like `0B32uDVNZfiEKLUtIT1gzYWN2NDI4SzVQYTFWWWxCWUtvVGNB`. 224 | 225 | To download the file, run this: 226 | 227 | google-drive-to-sqlite download 0B32uDVNZfiEKLUtIT1gzYWN2NDI4SzVQYTFWWWxCWUtvVGNB 228 | 229 | This will detect the content type of the file and use that as the extension - so if this file is a JPEG the file would be downloaded as: 230 | 231 | 0B32uDVNZfiEKLUtIT1gzYWN2NDI4SzVQYTFWWWxCWUtvVGNB.jpeg 232 | 233 | You can pass multiple file IDs to the command at once. 234 | 235 | To hide the progress bar and filename output, use `-s` or `--silent`. 236 | 237 | If you are downloading a single file you can use the `-o` output to specify a filename and location: 238 | 239 | google-drive-to-sqlite download 0B32uDVNZfiEKLUtIT1gzYWN2NDI4SzVQYTFWWWxCWUtvVGNB \ 240 | -o my-image.jpeg 241 | 242 | Use `-o -` to write the file contents to standard output: 243 | 244 | google-drive-to-sqlite download 0B32uDVNZfiEKLUtIT1gzYWN2NDI4SzVQYTFWWWxCWUtvVGNB \ 245 | -o - > my-image.jpeg 246 | 247 | Full `--help`: 248 | 249 | 256 | ``` 257 | Usage: google-drive-to-sqlite download [OPTIONS] FILE_IDS... 258 | 259 | Download one or more files to disk, based on their file IDs. 260 | 261 | The file content will be saved to a file with the name: 262 | 263 | FILE_ID.ext 264 | 265 | Where the extension is automatically picked based on the type of file. 266 | 267 | If you are downloading a single file you can specify a filename with -o: 268 | 269 | google-drive-to-sqlite download MY_FILE_ID -o myfile.txt 270 | 271 | Options: 272 | -a, --auth FILE Path to auth.json token file 273 | -o, --output FILE File to write to, or - for standard output 274 | -s, --silent Hide progress bar and filename 275 | --help Show this message and exit. 276 | 277 | ``` 278 | 279 | 280 | ## google-drive-to-sqlite export FORMAT FILE_ID 281 | 282 | The `export` command can be used to export Google Docs documents, spreadsheets and presentations in a number of different formats. 283 | 284 | You'll need one or more document IDs, which look something like `10BOHGDUYa7lBjUSo26YFCHTpgEmtXabdVFaopCTh1vU`. You can find these by looking at the URL of your document on the Google Docs site. 285 | 286 | To export that document as PDF, run this: 287 | 288 | google-drive-to-sqlite export pdf 10BOHGDUYa7lBjUSo26YFCHTpgEmtXabdVFaopCTh1vU 289 | 290 | The file will be exported as: 291 | 292 | 10BOHGDUYa7lBjUSo26YFCHTpgEmtXabdVFaopCTh1vU-export.pdf 293 | 294 | You can pass multiple file IDs to the command at once. 295 | 296 | For the `FORMAT` option you can use any of the mime type options listed [on this page](https://developers.google.com/drive/api/v3/ref-export-formats) - for example, to export as an Open Office document you could use: 297 | 298 | google-drive-to-sqlite export \ 299 | application/vnd.oasis.opendocument.text \ 300 | 10BOHGDUYa7lBjUSo26YFCHTpgEmtXabdVFaopCTh1vU 301 | 302 | For convenience the following shortcuts for common file formats are provided: 303 | 304 | - Google Docs: `html`, `txt`, `rtf`, `pdf`, `doc`, `zip`, `epub` 305 | - Google Sheets: `xls`, `pdf`, `csv`, `tsv`, `zip` 306 | - Presentations: `ppt`, `pdf`, `txt` 307 | - Drawings: `jpeg`, `png`, `svg` 308 | 309 | The `zip` option returns a zip file of HTML. `txt` returns plain text. The others should be self-evident. 310 | 311 | To hide the filename output, use `-s` or `--silent`. 312 | 313 | If you are exporting a single file you can use the `-o` output to specify a filename and location: 314 | 315 | google-drive-to-sqlite export pdf 10BOHGDUYa7lBjUSo26YFCHTpgEmtXabdVFaopCTh1vU \ 316 | -o my-document.pdf 317 | 318 | Use `-o -` to write the file contents to standard output: 319 | 320 | google-drive-to-sqlite export pdf 10BOHGDUYa7lBjUSo26YFCHTpgEmtXabdVFaopCTh1vU \ 321 | -o - > my-document.pdf 322 | 323 | Full `--help`: 324 | 325 | 332 | ``` 333 | Usage: google-drive-to-sqlite export [OPTIONS] FORMAT FILE_IDS... 334 | 335 | Export one or more files to the specified format. 336 | 337 | Usage: 338 | 339 | google-drive-to-sqlite export pdf FILE_ID_1 FILE_ID_2 340 | 341 | The file content will be saved to a file with the name: 342 | 343 | FILE_ID-export.ext 344 | 345 | Where the extension is based on the format you specified. 346 | 347 | Available export formats can be seen here: 348 | https://developers.google.com/drive/api/v3/ref-export-formats 349 | 350 | Or you can use one of the following shortcuts: 351 | 352 | - Google Docs: html, txt, rtf, pdf, doc, zip, epub 353 | - Google Sheets: xls, pdf, csv, tsv, zip 354 | - Presentations: ppt, pdf, txt 355 | - Drawings: jpeg, png, svg 356 | 357 | "zip" returns a zip file of HTML. 358 | 359 | If you are exporting a single file you can specify a filename with -o: 360 | 361 | google-drive-to-sqlite export zip MY_FILE_ID -o myfile.zip 362 | 363 | Options: 364 | -a, --auth FILE Path to auth.json token file 365 | -o, --output FILE File to write to, or - for standard output 366 | -s, --silent Hide progress bar and filename 367 | --help Show this message and exit. 368 | 369 | ``` 370 | 371 | 372 | ## google-drive-to-sqlite get URL 373 | 374 | The `get` command makes authenticated requests to the specified URL, using credentials derived from the `auth.json` file. 375 | 376 | For example: 377 | 378 | $ google-drive-to-sqlite get 'https://www.googleapis.com/drive/v3/about?fields=*' 379 | { 380 | "kind": "drive#about", 381 | "user": { 382 | "kind": "drive#user", 383 | "displayName": "Simon Willison", 384 | # ... 385 | 386 | If the resource you are fetching supports pagination you can use `--paginate key` to paginate through all of the rows in a specified key. For example, the following API has a `nextPageToken` key and a `files` list, suggesting it supports pagination: 387 | 388 | $ google-drive-to-sqlite get https://www.googleapis.com/drive/v3/files 389 | { 390 | "kind": "drive#fileList", 391 | "nextPageToken": "~!!~AI9...wogHHYlc=", 392 | "incompleteSearch": false, 393 | "files": [ 394 | { 395 | "kind": "drive#file", 396 | "id": "1YEsITp_X8PtDUJWHGM0osT-TXAU1nr0e7RSWRM2Jpyg", 397 | "name": "Title of a spreadsheet", 398 | "mimeType": "application/vnd.google-apps.spreadsheet" 399 | }, 400 | 401 | To paginate through everything in the `files` list you would use `--paginate files` like this: 402 | 403 | $ google-drive-to-sqlite get https://www.googleapis.com/drive/v3/files --paginate files 404 | [ 405 | { 406 | "kind": "drive#file", 407 | "id": "1YEsITp_X8PtDUJWHGM0osT-TXAU1nr0e7RSWRM2Jpyg", 408 | "name": "Title of a spreadsheet", 409 | "mimeType": "application/vnd.google-apps.spreadsheet" 410 | }, 411 | # ... 412 | 413 | Add `--nl` to stream paginated data as newline-delimited JSON: 414 | 415 | $ google-drive-to-sqlite get https://www.googleapis.com/drive/v3/files --paginate files --nl 416 | {"kind": "drive#file", "id": "1YEsITp_X8PtDUJWHGM0osT-TXAU1nr0e7RSWRM2Jpyg", "name": "Title of a spreadsheet", "mimeType": "application/vnd.google-apps.spreadsheet"} 417 | {"kind": "drive#file", "id": "1E6Zg2X2bjjtPzVfX8YqdXZDCoB3AVA7i", "name": "Subfolder", "mimeType": "application/vnd.google-apps.folder"} 418 | 419 | Add `--stop-after 5` to stop after 5 records - useful for testing. 420 | 421 | Full `--help`: 422 | 423 | 430 | ``` 431 | Usage: google-drive-to-sqlite get [OPTIONS] URL 432 | 433 | Make an authenticated HTTP GET to the specified URL 434 | 435 | Options: 436 | -a, --auth FILE Path to auth.json token file 437 | --paginate TEXT Paginate through all results in this key 438 | --nl Output paginated data as newline-delimited JSON 439 | --stop-after INTEGER Stop paginating after X results 440 | -v, --verbose Send verbose output to stderr 441 | --help Show this message and exit. 442 | 443 | ``` 444 | 445 | 446 | 447 | ## Database schema 448 | 449 | The database created by this tool has the following schema: 450 | 451 | 467 | ```sql 468 | CREATE TABLE [drive_users] ( 469 | [permissionId] TEXT PRIMARY KEY, 470 | [kind] TEXT, 471 | [displayName] TEXT, 472 | [photoLink] TEXT, 473 | [me] INTEGER, 474 | [emailAddress] TEXT 475 | ); 476 | CREATE TABLE [drive_folders] ( 477 | [id] TEXT PRIMARY KEY, 478 | [_parent] TEXT, 479 | [_owner] TEXT, 480 | [lastModifyingUser] TEXT, 481 | [kind] TEXT, 482 | [name] TEXT, 483 | [mimeType] TEXT, 484 | [starred] INTEGER, 485 | [trashed] INTEGER, 486 | [explicitlyTrashed] INTEGER, 487 | [parents] TEXT, 488 | [spaces] TEXT, 489 | [version] TEXT, 490 | [webViewLink] TEXT, 491 | [iconLink] TEXT, 492 | [hasThumbnail] INTEGER, 493 | [thumbnailVersion] TEXT, 494 | [viewedByMe] INTEGER, 495 | [createdTime] TEXT, 496 | [modifiedTime] TEXT, 497 | [modifiedByMe] INTEGER, 498 | [shared] INTEGER, 499 | [ownedByMe] INTEGER, 500 | [viewersCanCopyContent] INTEGER, 501 | [copyRequiresWriterPermission] INTEGER, 502 | [writersCanShare] INTEGER, 503 | [folderColorRgb] TEXT, 504 | [quotaBytesUsed] TEXT, 505 | [isAppAuthorized] INTEGER, 506 | [linkShareMetadata] TEXT, 507 | FOREIGN KEY([_parent]) REFERENCES [drive_folders]([id]), 508 | FOREIGN KEY([_owner]) REFERENCES [drive_users]([permissionId]), 509 | FOREIGN KEY([lastModifyingUser]) REFERENCES [drive_users]([permissionId]) 510 | ); 511 | CREATE TABLE [drive_files] ( 512 | [id] TEXT PRIMARY KEY, 513 | [_parent] TEXT, 514 | [_owner] TEXT, 515 | [lastModifyingUser] TEXT, 516 | [kind] TEXT, 517 | [name] TEXT, 518 | [mimeType] TEXT, 519 | [starred] INTEGER, 520 | [trashed] INTEGER, 521 | [explicitlyTrashed] INTEGER, 522 | [parents] TEXT, 523 | [spaces] TEXT, 524 | [version] TEXT, 525 | [webViewLink] TEXT, 526 | [iconLink] TEXT, 527 | [hasThumbnail] INTEGER, 528 | [thumbnailVersion] TEXT, 529 | [viewedByMe] INTEGER, 530 | [createdTime] TEXT, 531 | [modifiedTime] TEXT, 532 | [modifiedByMe] INTEGER, 533 | [shared] INTEGER, 534 | [ownedByMe] INTEGER, 535 | [viewersCanCopyContent] INTEGER, 536 | [copyRequiresWriterPermission] INTEGER, 537 | [writersCanShare] INTEGER, 538 | [quotaBytesUsed] TEXT, 539 | [isAppAuthorized] INTEGER, 540 | [linkShareMetadata] TEXT, 541 | FOREIGN KEY([_parent]) REFERENCES [drive_folders]([id]), 542 | FOREIGN KEY([_owner]) REFERENCES [drive_users]([permissionId]), 543 | FOREIGN KEY([lastModifyingUser]) REFERENCES [drive_users]([permissionId]) 544 | ); 545 | ``` 546 | 547 | 548 | ## Thumbnails 549 | 550 | You can construct a thumbnail image for a known file ID using the following URL: 551 | 552 | https://drive.google.com/thumbnail?sz=w800-h800&id=FILE_ID 553 | 554 | Users who are signed into Google Drive and have permission to view a file will be redirected to a thumbnail version of that file. You can tweak the `w800` and `h800` parameters to request different thumbnail sizes. 555 | 556 | ## Privacy policy 557 | 558 | This tool requests access to your Google Drive account in order to retrieve metadata about your files there. It also offers a feature that can download the content of those files. 559 | 560 | The credentials used to access your account are stored in the `auth.json` file on your computer. The metadata and content retrieved from Google Drive is also stored only on your own personal computer. 561 | 562 | At no point do the developers of this tool gain access to any of your data. 563 | 564 | ## Development 565 | 566 | To contribute to this tool, first checkout the code. Then create a new virtual environment: 567 | 568 | cd google-drive-to-sqlite 569 | python -m venv venv 570 | source venv/bin/activate 571 | 572 | Or if you are using `pipenv`: 573 | 574 | pipenv shell 575 | 576 | Now install the dependencies and test dependencies: 577 | 578 | pip install -e '.[test]' 579 | 580 | To run the tests: 581 | 582 | pytest 583 | -------------------------------------------------------------------------------- /google_drive_to_sqlite/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simonw/google-drive-to-sqlite/60c96e5c909285afb32978e16ec8d6a4aeb851db/google_drive_to_sqlite/__init__.py -------------------------------------------------------------------------------- /google_drive_to_sqlite/cli.py: -------------------------------------------------------------------------------- 1 | from os import access 2 | import click 3 | import httpx 4 | import itertools 5 | import json 6 | import pathlib 7 | import sqlite_utils 8 | import sys 9 | import textwrap 10 | import urllib.parse 11 | from .utils import ( 12 | APIClient, 13 | get_file, 14 | files_in_folder_recursive, 15 | paginate_files, 16 | save_files_and_folders, 17 | ) 18 | 19 | # https://github.com/simonw/google-drive-to-sqlite/issues/2 20 | GOOGLE_CLIENT_ID = ( 21 | "148933860554-98i3hter1bsn24sa6fcq1tcrhcrujrnl.apps.googleusercontent.com" 22 | ) 23 | # It's OK to publish this secret in application source code 24 | GOOGLE_CLIENT_SECRET = "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" 25 | DEFAULT_SCOPE = "https://www.googleapis.com/auth/drive.readonly" 26 | 27 | FORMAT_SHORTCUTS = { 28 | "html": "text/html", 29 | "txt": "text/plain", 30 | "rtf": "application/rtf", 31 | "pdf": "application/pdf", 32 | "doc": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", 33 | "zip": "application/zip", 34 | "epub": "application/epub+zip", 35 | "xls": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", 36 | "csv": "text/csv", 37 | "tsv": "text/tab-separated-values", 38 | "ppt": "application/vnd.openxmlformats-officedocument.presentationml.presentation", 39 | "jpeg": "image/jpeg", 40 | "png": "image/png", 41 | "svg": "image/svg+xml", 42 | } 43 | # .ext defaults to the bit after the / - e.g. "application/pdf" becomes "pdf", 44 | # unless there is an explicit override here: 45 | FILE_EXTENSIONS = { 46 | "image/svg+xml": "svg", 47 | "application/epub+zip": "epub", 48 | "text/plain": "txt", 49 | "text/tab-separated-values": "tsv", 50 | "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "doc", 51 | "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xls", 52 | "application/vnd.openxmlformats-officedocument.presentationml.presentation": "ppt", 53 | } 54 | 55 | 56 | def start_auth_url(google_client_id, scope): 57 | return "https://accounts.google.com/o/oauth2/v2/auth?" + urllib.parse.urlencode( 58 | { 59 | "access_type": "offline", 60 | "client_id": google_client_id, 61 | "redirect_uri": "urn:ietf:wg:oauth:2.0:oob", 62 | "response_type": "code", 63 | "scope": scope, 64 | } 65 | ) 66 | 67 | 68 | DEFAULT_FIELDS = [ 69 | "kind", 70 | "id", 71 | "name", 72 | "mimeType", 73 | "starred", 74 | "trashed", 75 | "explicitlyTrashed", 76 | "parents", 77 | "spaces", 78 | "version", 79 | "webViewLink", 80 | "iconLink", 81 | "hasThumbnail", 82 | "thumbnailVersion", 83 | "viewedByMe", 84 | "createdTime", 85 | "modifiedTime", 86 | "modifiedByMe", 87 | "owners", 88 | "lastModifyingUser", 89 | "shared", 90 | "ownedByMe", 91 | "viewersCanCopyContent", 92 | "copyRequiresWriterPermission", 93 | "writersCanShare", 94 | "folderColorRgb", 95 | "quotaBytesUsed", 96 | "isAppAuthorized", 97 | "linkShareMetadata", 98 | ] 99 | 100 | 101 | @click.group() 102 | @click.version_option() 103 | def cli(): 104 | "Create a SQLite database of metadata from a Google Drive folder" 105 | 106 | 107 | @cli.command() 108 | @click.option( 109 | "-a", 110 | "--auth", 111 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 112 | default="auth.json", 113 | help="Path to save token, defaults to auth.json", 114 | ) 115 | @click.option("--google-client-id", help="Custom Google client ID") 116 | @click.option("--google-client-secret", help="Custom Google client secret") 117 | @click.option("--scope", help="Custom token scope") 118 | def auth(auth, google_client_id, google_client_secret, scope): 119 | "Authenticate user and save credentials" 120 | if google_client_id is None: 121 | google_client_id = GOOGLE_CLIENT_ID 122 | if google_client_secret is None: 123 | google_client_secret = GOOGLE_CLIENT_SECRET 124 | if scope is None: 125 | scope = DEFAULT_SCOPE 126 | click.echo("Visit the following URL to authenticate with Google Drive") 127 | click.echo("") 128 | click.echo(start_auth_url(google_client_id, scope)) 129 | click.echo("") 130 | click.echo("Then return here and paste in the resulting code:") 131 | copied_code = click.prompt("Paste code here", hide_input=True) 132 | response = httpx.post( 133 | "https://www.googleapis.com/oauth2/v4/token", 134 | data={ 135 | "code": copied_code, 136 | "client_id": google_client_id, 137 | "client_secret": google_client_secret, 138 | "redirect_uri": "urn:ietf:wg:oauth:2.0:oob", 139 | "grant_type": "authorization_code", 140 | }, 141 | ) 142 | tokens = response.json() 143 | if "error" in tokens: 144 | message = "{error}: {error_description}".format(**tokens) 145 | raise click.ClickException(message) 146 | if "refresh_token" not in tokens: 147 | raise click.ClickException("No refresh_token in response") 148 | # Read existing file and add refresh_token to it 149 | try: 150 | auth_data = json.load(open(auth)) 151 | except (ValueError, FileNotFoundError): 152 | auth_data = {} 153 | info = {"refresh_token": tokens["refresh_token"]} 154 | if google_client_id != GOOGLE_CLIENT_ID: 155 | info["google_client_id"] = google_client_id 156 | if google_client_secret != GOOGLE_CLIENT_SECRET: 157 | info["google_client_secret"] = google_client_secret 158 | if scope != DEFAULT_SCOPE: 159 | info["scope"] = scope 160 | auth_data["google-drive-to-sqlite"] = info 161 | with open(auth, "w") as fp: 162 | fp.write(json.dumps(auth_data, indent=4)) 163 | # chmod 600 to avoid other users on the shared machine reading it 164 | pathlib.Path(auth).chmod(0o600) 165 | 166 | 167 | @cli.command() 168 | @click.option( 169 | "-a", 170 | "--auth", 171 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 172 | default="auth.json", 173 | help="Path to load token, defaults to auth.json", 174 | ) 175 | def revoke(auth): 176 | "Revoke the token stored in auth.json" 177 | tokens = load_tokens(auth) 178 | response = httpx.get( 179 | "https://accounts.google.com/o/oauth2/revoke", 180 | params={ 181 | "token": tokens["refresh_token"], 182 | }, 183 | ) 184 | if "error" in response.json(): 185 | raise click.ClickException(response.json()["error"]) 186 | 187 | 188 | @cli.command() 189 | @click.argument("url") 190 | @click.option( 191 | "-a", 192 | "--auth", 193 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=True), 194 | default="auth.json", 195 | help="Path to auth.json token file", 196 | ) 197 | @click.option("--paginate", help="Paginate through all results in this key") 198 | @click.option( 199 | "--nl", is_flag=True, help="Output paginated data as newline-delimited JSON" 200 | ) 201 | @click.option("--stop-after", type=int, help="Stop paginating after X results") 202 | @click.option( 203 | "-v", 204 | "--verbose", 205 | is_flag=True, 206 | help="Send verbose output to stderr", 207 | ) 208 | def get(url, auth, paginate, nl, stop_after, verbose): 209 | "Make an authenticated HTTP GET to the specified URL" 210 | if not url.startswith("https://www.googleapis.com/"): 211 | if url.startswith("/"): 212 | url = "https://www.googleapis.com" + url 213 | else: 214 | raise click.ClickException( 215 | "url must start with / or https://www.googleapis.com/" 216 | ) 217 | 218 | kwargs = load_tokens(auth) 219 | if verbose: 220 | kwargs["logger"] = lambda s: click.echo(s, err=True) 221 | client = APIClient(**kwargs) 222 | 223 | if not paginate: 224 | response = client.get(url) 225 | if verbose: 226 | click.echo( 227 | "{}, headers: {}".format(response.status_code, repr(response.headers)) 228 | ) 229 | if response.status_code != 200: 230 | raise click.ClickException( 231 | "{}: {}\n\n{}".format(response.url, response.status_code, response.text) 232 | ) 233 | if "json" in response.headers.get("content-type", ""): 234 | click.echo(json.dumps(response.json(), indent=4)) 235 | else: 236 | click.echo(response.text) 237 | 238 | else: 239 | 240 | def paginate_all(): 241 | i = 0 242 | next_page_token = None 243 | while True: 244 | params = {} 245 | if next_page_token is not None: 246 | params["pageToken"] = next_page_token 247 | response = client.get( 248 | url, 249 | params=params, 250 | ) 251 | data = response.json() 252 | if response.status_code != 200: 253 | raise click.ClickException(json.dumps(data, indent=4)) 254 | # Paginate using the specified key and nextPageToken 255 | if paginate not in data: 256 | raise click.ClickException( 257 | "paginate key {} not found in {}".format( 258 | repr(paginate), repr(list(data.keys())) 259 | ) 260 | ) 261 | for item in data[paginate]: 262 | yield item 263 | i += 1 264 | if stop_after is not None and i >= stop_after: 265 | return 266 | 267 | next_page_token = data.get("nextPageToken") 268 | if not next_page_token: 269 | break 270 | 271 | if nl: 272 | for item in paginate_all(): 273 | click.echo(json.dumps(item)) 274 | else: 275 | for line in stream_indented_json(paginate_all()): 276 | click.echo(line) 277 | 278 | 279 | @cli.command() 280 | @click.argument( 281 | "database", 282 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 283 | required=False, 284 | ) 285 | @click.option( 286 | "-a", 287 | "--auth", 288 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=True), 289 | default="auth.json", 290 | help="Path to auth.json token file", 291 | ) 292 | @click.option("--folder", help="Files in this folder ID and its sub-folders") 293 | @click.option("-q", help="Files matching this query") 294 | @click.option("--full-text", help="Search for files with text match") 295 | @click.option("--starred", is_flag=True, help="Files you have starred") 296 | @click.option("--trashed", is_flag=True, help="Files in the trash") 297 | @click.option( 298 | "--shared-with-me", is_flag=True, help="Files that have been shared with you" 299 | ) 300 | @click.option( 301 | "--apps", 302 | is_flag=True, 303 | help="Google Apps docs, spreadsheets, presentations and drawings", 304 | ) 305 | @click.option("--docs", is_flag=True, help="Google Apps docs") 306 | @click.option("--sheets", is_flag=True, help="Google Apps spreadsheets") 307 | @click.option("--presentations", is_flag=True, help="Google Apps presentations") 308 | @click.option("--drawings", is_flag=True, help="Google Apps drawings") 309 | @click.option( 310 | "json_", "--json", is_flag=True, help="Output JSON rather than write to DB" 311 | ) 312 | @click.option( 313 | "--nl", is_flag=True, help="Output newline-delimited JSON rather than write to DB" 314 | ) 315 | @click.option("--stop-after", type=int, help="Stop paginating after X results") 316 | @click.option( 317 | "--import-json", 318 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=True), 319 | help="Import from this JSON file instead of the API", 320 | ) 321 | @click.option( 322 | "--import-nl", 323 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=True), 324 | help="Import from this newline-delimited JSON file", 325 | ) 326 | @click.option( 327 | "-v", 328 | "--verbose", 329 | is_flag=True, 330 | help="Send verbose output to stderr", 331 | ) 332 | def files( 333 | database, 334 | auth, 335 | folder, 336 | q, 337 | full_text, 338 | starred, 339 | trashed, 340 | shared_with_me, 341 | apps, 342 | docs, 343 | sheets, 344 | presentations, 345 | drawings, 346 | json_, 347 | nl, 348 | stop_after, 349 | import_json, 350 | import_nl, 351 | verbose, 352 | ): 353 | """ 354 | Retrieve metadata for files in Google Drive, and write to a SQLite database 355 | or output as JSON. 356 | 357 | google-drive-to-sqlite files files.db 358 | 359 | Use --json to output JSON, --nl for newline-delimited JSON: 360 | 361 | google-drive-to-sqlite files files.db --json 362 | 363 | Use a folder ID to recursively fetch every file in that folder and its 364 | sub-folders: 365 | 366 | google-drive-to-sqlite files files.db --folder 1E6Zg2X2bjjtPzVfX8YqdXZDCoB3AVA7i 367 | 368 | Fetch files you have starred: 369 | 370 | google-drive-to-sqlite files starred.db --starred 371 | """ 372 | if not database and not json_ and not nl: 373 | raise click.ClickException("Must either provide database or use --json or --nl") 374 | q_bits = [] 375 | if q: 376 | q_bits.append(q) 377 | if full_text: 378 | q_bits.append("fullText contains '{}'".format(full_text.replace("'", ""))) 379 | if starred: 380 | q_bits.append("starred = true") 381 | if trashed: 382 | q_bits.append("trashed = true") 383 | if shared_with_me: 384 | q_bits.append("sharedWithMe = true") 385 | 386 | mime_types = [] 387 | if apps: 388 | docs = True 389 | sheets = True 390 | presentations = True 391 | drawings = True 392 | if docs: 393 | mime_types.append("application/vnd.google-apps.document") 394 | if sheets: 395 | mime_types.append("application/vnd.google-apps.spreadsheet") 396 | if presentations: 397 | mime_types.append("application/vnd.google-apps.presentation") 398 | if drawings: 399 | mime_types.append("application/vnd.google-apps.drawing") 400 | if mime_types: 401 | q_bits.append( 402 | "({})".format( 403 | " or ".join( 404 | "mimeType = '{}'".format(mime_type) for mime_type in mime_types 405 | ) 406 | ) 407 | ) 408 | 409 | q = " and ".join(q_bits) 410 | 411 | if q and verbose: 412 | click.echo("?q= query: {}".format(q), err=True) 413 | 414 | client = None 415 | if not (import_json or import_nl): 416 | kwargs = load_tokens(auth) 417 | if verbose: 418 | kwargs["logger"] = lambda s: click.echo(s, err=True) 419 | client = APIClient(**kwargs) 420 | 421 | if import_json or import_nl: 422 | if "-" in (import_json, import_nl): 423 | fp = sys.stdin 424 | else: 425 | fp = open(import_json or import_nl) 426 | if import_json: 427 | all = json.load(fp) 428 | else: 429 | 430 | def _nl(): 431 | for line in fp: 432 | line = line.strip() 433 | if line: 434 | yield json.loads(line) 435 | 436 | all = _nl() 437 | else: 438 | if folder: 439 | all_in_folder = files_in_folder_recursive( 440 | client, folder, fields=DEFAULT_FIELDS 441 | ) 442 | # Fetch details of that folder first 443 | folder_details = get_file(client, folder, fields=DEFAULT_FIELDS) 444 | 445 | def folder_details_then_all(): 446 | yield folder_details 447 | yield from all_in_folder 448 | 449 | all = folder_details_then_all() 450 | else: 451 | all = paginate_files(client, q=q, fields=DEFAULT_FIELDS) 452 | 453 | if stop_after: 454 | prev_all = all 455 | 456 | def stop_after_all(): 457 | i = 0 458 | for file in prev_all: 459 | yield file 460 | i += 1 461 | if i >= stop_after: 462 | break 463 | 464 | all = stop_after_all() 465 | 466 | if nl: 467 | for file in all: 468 | click.echo(json.dumps(file)) 469 | return 470 | if json_: 471 | for line in stream_indented_json(all): 472 | click.echo(line) 473 | return 474 | 475 | db = sqlite_utils.Database(database) 476 | save_files_and_folders(db, all) 477 | 478 | 479 | def load_tokens(auth): 480 | try: 481 | token_info = json.load(open(auth))["google-drive-to-sqlite"] 482 | except (KeyError, FileNotFoundError): 483 | raise click.ClickException("Could not find google-drive-to-sqlite in auth.json") 484 | return { 485 | "refresh_token": token_info["refresh_token"], 486 | "client_id": token_info.get("google_client_id", GOOGLE_CLIENT_ID), 487 | "client_secret": token_info.get("google_client_secret", GOOGLE_CLIENT_SECRET), 488 | } 489 | 490 | 491 | @cli.command() 492 | @click.argument("file_ids", nargs=-1, required=True) 493 | @click.option( 494 | "-a", 495 | "--auth", 496 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=True), 497 | default="auth.json", 498 | help="Path to auth.json token file", 499 | ) 500 | @click.option( 501 | "-o", 502 | "--output", 503 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, writable=True), 504 | help="File to write to, or - for standard output", 505 | ) 506 | @click.option( 507 | "-s", 508 | "--silent", 509 | is_flag=True, 510 | help="Hide progress bar and filename", 511 | ) 512 | def download(file_ids, auth, output, silent): 513 | """ 514 | Download one or more files to disk, based on their file IDs. 515 | 516 | The file content will be saved to a file with the name: 517 | 518 | FILE_ID.ext 519 | 520 | Where the extension is automatically picked based on the type of file. 521 | 522 | If you are downloading a single file you can specify a filename with -o: 523 | 524 | google-drive-to-sqlite download MY_FILE_ID -o myfile.txt 525 | """ 526 | if output: 527 | if len(file_ids) != 1: 528 | raise click.ClickException("--output option only works with a single file") 529 | tokens = load_tokens(auth) 530 | client = APIClient(**tokens) 531 | for file_id in file_ids: 532 | with client.stream( 533 | "GET", 534 | "https://www.googleapis.com/drive/v3/files/{}?alt=media".format(file_id), 535 | ) as response: 536 | streaming_download(response, file_id, output, silent) 537 | 538 | 539 | @cli.command() 540 | @click.argument("format") 541 | @click.argument("file_ids", nargs=-1, required=True) 542 | @click.option( 543 | "-a", 544 | "--auth", 545 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=True), 546 | default="auth.json", 547 | help="Path to auth.json token file", 548 | ) 549 | @click.option( 550 | "-o", 551 | "--output", 552 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, writable=True), 553 | help="File to write to, or - for standard output", 554 | ) 555 | @click.option( 556 | "-s", 557 | "--silent", 558 | is_flag=True, 559 | help="Hide progress bar and filename", 560 | ) 561 | def export(format, file_ids, auth, output, silent): 562 | """ 563 | Export one or more files to the specified format. 564 | 565 | Usage: 566 | 567 | google-drive-to-sqlite export pdf FILE_ID_1 FILE_ID_2 568 | 569 | The file content will be saved to a file with the name: 570 | 571 | FILE_ID-export.ext 572 | 573 | Where the extension is based on the format you specified. 574 | 575 | Available export formats can be seen here: 576 | https://developers.google.com/drive/api/v3/ref-export-formats 577 | 578 | Or you can use one of the following shortcuts: 579 | 580 | \b 581 | - Google Docs: html, txt, rtf, pdf, doc, zip, epub 582 | - Google Sheets: xls, pdf, csv, tsv, zip 583 | - Presentations: ppt, pdf, txt 584 | - Drawings: jpeg, png, svg 585 | 586 | "zip" returns a zip file of HTML. 587 | 588 | If you are exporting a single file you can specify a filename with -o: 589 | 590 | google-drive-to-sqlite export zip MY_FILE_ID -o myfile.zip 591 | """ 592 | format = FORMAT_SHORTCUTS.get(format, format) 593 | if output: 594 | if len(file_ids) != 1: 595 | raise click.ClickException("--output option only works with a single file") 596 | tokens = load_tokens(auth) 597 | client = APIClient(**tokens) 598 | for file_id in file_ids: 599 | with client.stream( 600 | "GET", 601 | "https://www.googleapis.com/drive/v3/files/{}/export".format(file_id), 602 | params={"mimeType": format}, 603 | ) as response: 604 | filestem = "{}-export".format(file_id) 605 | streaming_download(response, filestem, output, silent) 606 | 607 | 608 | def streaming_download(response, filestem, output, silent): 609 | if response.status_code != 200: 610 | raise click.ClickException(response.read().decode("utf-8")) 611 | fp = None 612 | if output: 613 | filename = pathlib.Path(output).name 614 | if output == "-": 615 | fp = sys.stdout.buffer 616 | silent = True 617 | else: 618 | fp = open(output, "wb") 619 | else: 620 | ext = response.headers.get("content-type", "/bin") 621 | if ext in FILE_EXTENSIONS: 622 | ext = FILE_EXTENSIONS[ext] 623 | else: 624 | ext = ext.split("/")[-1] 625 | filename = "{}.{}".format(filestem, ext) 626 | fp = open(filename, "wb") 627 | length = int(response.headers.get("content-length", "0")) 628 | if not silent: 629 | click.echo( 630 | "Writing {}to {}".format( 631 | "{:,} bytes ".format(length) if length else "", filename 632 | ), 633 | err=True, 634 | ) 635 | if length and not silent: 636 | with click.progressbar( 637 | length=int(response.headers["content-length"]), label="Downloading" 638 | ) as bar: 639 | for data in response.iter_bytes(): 640 | fp.write(data) 641 | bar.update(len(data)) 642 | else: 643 | for data in response.iter_bytes(): 644 | fp.write(data) 645 | 646 | 647 | def stream_indented_json(iterator, indent=2): 648 | # We have to iterate two-at-a-time so we can know if we 649 | # should output a trailing comma or if we have reached 650 | # the last item. 651 | current_iter, next_iter = itertools.tee(iterator, 2) 652 | next(next_iter, None) 653 | first = True 654 | for item, next_item in itertools.zip_longest(current_iter, next_iter): 655 | is_last = next_item is None 656 | data = item 657 | line = "{first}{serialized}{separator}{last}".format( 658 | first="[\n" if first else "", 659 | serialized=textwrap.indent( 660 | json.dumps(data, indent=indent, default=repr), " " * indent 661 | ), 662 | separator="," if not is_last else "", 663 | last="\n]" if is_last else "", 664 | ) 665 | yield line 666 | first = False 667 | if first: 668 | # We didn't output anything, so yield the empty list 669 | yield "[]" 670 | -------------------------------------------------------------------------------- /google_drive_to_sqlite/utils.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | import click 3 | import httpx 4 | import itertools 5 | from time import sleep 6 | 7 | 8 | class FilesError(Exception): 9 | pass 10 | 11 | 12 | def get_file(client, file_id, fields=None): 13 | file_url = "https://www.googleapis.com/drive/v3/files/{}".format(file_id) 14 | params = {} 15 | if fields is not None: 16 | params["fields"] = ",".join(fields) 17 | return client.get( 18 | file_url, 19 | params=params, 20 | ).json() 21 | 22 | 23 | def paginate_files(client, *, corpora=None, q=None, fields=None): 24 | pageToken = None 25 | files_url = "https://www.googleapis.com/drive/v3/files" 26 | params = {} 27 | if corpora is not None: 28 | params["corpora"] = corpora 29 | if fields is not None: 30 | params["fields"] = "nextPageToken, files({})".format(",".join(fields)) 31 | if q: 32 | params["q"] = q 33 | while True: 34 | if pageToken is not None: 35 | params["pageToken"] = pageToken 36 | else: 37 | params.pop("pageToken", None) 38 | data = client.get( 39 | files_url, 40 | params=params, 41 | ).json() 42 | if "error" in data: 43 | raise FilesError(data) 44 | yield from data["files"] 45 | pageToken = data.get("nextPageToken", None) 46 | if pageToken is None: 47 | break 48 | 49 | 50 | def files_in_folder_recursive(client, folder_id, fields): 51 | for file in paginate_files( 52 | client, q='"{}" in parents'.format(folder_id), fields=fields 53 | ): 54 | yield file 55 | if file["mimeType"] == "application/vnd.google-apps.folder": 56 | yield from files_in_folder_recursive(client, file["id"], fields) 57 | 58 | 59 | class APIClient: 60 | class Error(click.ClickException): 61 | pass 62 | 63 | timeout = 30.0 64 | 65 | def __init__(self, refresh_token, client_id, client_secret, logger=None): 66 | self.refresh_token = refresh_token 67 | self.access_token = None 68 | self.client_id = client_id 69 | self.client_secret = client_secret 70 | self.log = logger or (lambda s: None) 71 | 72 | def get_access_token(self, force_refresh=False): 73 | if self.access_token and not force_refresh: 74 | return self.access_token 75 | url = "https://www.googleapis.com/oauth2/v4/token" 76 | self.log("POST {}".format(url)) 77 | data = httpx.post( 78 | url, 79 | data={ 80 | "grant_type": "refresh_token", 81 | "refresh_token": self.refresh_token, 82 | "client_id": self.client_id, 83 | "client_secret": self.client_secret, 84 | }, 85 | timeout=self.timeout, 86 | ).json() 87 | if "error" in data: 88 | raise self.Error(str(data)) 89 | self.access_token = data["access_token"] 90 | return self.access_token 91 | 92 | def get( 93 | self, 94 | url, 95 | params=None, 96 | headers=None, 97 | allow_token_refresh=True, 98 | transport_retries=2, 99 | ): 100 | headers = headers or {} 101 | headers["Authorization"] = "Bearer {}".format(self.get_access_token()) 102 | self.log("GET: {} {}".format(url, params or "").strip()) 103 | try: 104 | response = httpx.get( 105 | url, params=params, headers=headers, timeout=self.timeout 106 | ) 107 | except httpx.TransportError as ex: 108 | if transport_retries: 109 | sleep(2) 110 | self.log(" Got {}, retrying".format(ex.__class__.__name__)) 111 | return self.get( 112 | url, 113 | params, 114 | headers, 115 | allow_token_refresh=allow_token_refresh, 116 | transport_retries=transport_retries - 1, 117 | ) 118 | else: 119 | raise 120 | 121 | if response.status_code == 401 and allow_token_refresh: 122 | # Try again after refreshing the token 123 | self.get_access_token(force_refresh=True) 124 | return self.get(url, params, headers, allow_token_refresh=False) 125 | return response 126 | 127 | def post(self, url, data=None, headers=None, allow_token_refresh=True): 128 | headers = headers or {} 129 | headers["Authorization"] = "Bearer {}".format(self.get_access_token()) 130 | self.log("POST: {}".format(url)) 131 | response = httpx.post(url, data=data, headers=headers, timeout=self.timeout) 132 | if response.status_code == 403 and allow_token_refresh: 133 | self.get_access_token(force_refresh=True) 134 | return self.post(url, data, headers, allow_token_refresh=False) 135 | return response 136 | 137 | @contextmanager 138 | def stream(self, method, url, params=None): 139 | with httpx.stream( 140 | method, 141 | url, 142 | params=params, 143 | headers={"Authorization": "Bearer {}".format(self.get_access_token())}, 144 | ) as stream: 145 | yield stream 146 | 147 | 148 | def save_files_and_folders(db, all): 149 | # Ensure tables with foreign keys exist 150 | with db.conn: 151 | if not db["drive_users"].exists(): 152 | db["drive_users"].create({"permissionId": str}, pk="permissionId") 153 | for table in ("drive_folders", "drive_files"): 154 | if not db[table].exists(): 155 | db[table].create( 156 | { 157 | "id": str, 158 | "_parent": str, 159 | "_owner": str, 160 | "lastModifyingUser": str, 161 | }, 162 | pk="id", 163 | ) 164 | # Gotta add foreign key after table is created, to avoid 165 | # AlterError: No such column: drive_folders.id 166 | db.add_foreign_keys( 167 | ( 168 | (table, "_parent", "drive_folders", "id"), 169 | (table, "_owner", "drive_users", "permissionId"), 170 | (table, "lastModifyingUser", "drive_users", "permissionId"), 171 | ) 172 | ) 173 | 174 | # Commit every 100 records 175 | users_seen = set() 176 | for chunk in chunks(all, 100): 177 | # Add `_parent` columns 178 | files = [] 179 | folders = [] 180 | for file in chunk: 181 | file["_parent"] = file["parents"][0] if file.get("parents") else None 182 | if file.get("mimeType") == "application/vnd.google-apps.folder": 183 | folders.append(file) 184 | else: 185 | files.append(file) 186 | # Convert "lastModifyingUser" JSON into a foreign key reference to drive_users 187 | drive_folders_owners_to_insert = [] 188 | drive_files_owners_to_insert = [] 189 | for to_insert_list, sequence in ( 190 | (drive_folders_owners_to_insert, folders), 191 | (drive_files_owners_to_insert, files), 192 | ): 193 | for file in sequence: 194 | last_modifying_user = file.get("lastModifyingUser") 195 | # This can be {'displayName': '', 'kind': 'drive#user', 'me': False} 196 | if last_modifying_user and last_modifying_user.get("permissionId"): 197 | user_id = last_modifying_user["permissionId"] 198 | if user_id not in users_seen: 199 | db["drive_users"].insert( 200 | last_modifying_user, 201 | replace=True, 202 | pk="permissionId", 203 | alter=True, 204 | ) 205 | users_seen.add(user_id) 206 | file["lastModifyingUser"] = user_id 207 | else: 208 | file["lastModifyingUser"] = None 209 | owners = file.pop("owners", None) 210 | file["_owner"] = None 211 | if owners and owners[0].get("permissionId"): 212 | owner_user_id = owners[0]["permissionId"] 213 | if owner_user_id not in users_seen: 214 | db["drive_users"].insert( 215 | owners[0], 216 | replace=True, 217 | pk="permissionId", 218 | alter=True, 219 | ) 220 | users_seen.add(owner_user_id) 221 | file["_owner"] = owner_user_id 222 | 223 | with db.conn: 224 | db["drive_folders"].insert_all( 225 | folders, 226 | pk="id", 227 | replace=True, 228 | alter=True, 229 | ) 230 | db["drive_files"].insert_all( 231 | files, 232 | pk="id", 233 | replace=True, 234 | alter=True, 235 | ) 236 | if drive_folders_owners_to_insert: 237 | db["drive_folders_owners"].insert_all( 238 | drive_folders_owners_to_insert, replace=True 239 | ) 240 | if drive_files_owners_to_insert: 241 | db["drive_files_owners"].insert_all( 242 | drive_files_owners_to_insert, replace=True 243 | ) 244 | 245 | 246 | def chunks(sequence, size): 247 | iterator = iter(sequence) 248 | for item in iterator: 249 | yield itertools.chain([item], itertools.islice(iterator, size - 1)) 250 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import os 3 | 4 | VERSION = "0.4" 5 | 6 | 7 | def get_long_description(): 8 | with open( 9 | os.path.join(os.path.dirname(os.path.abspath(__file__)), "README.md"), 10 | encoding="utf8", 11 | ) as fp: 12 | return fp.read() 13 | 14 | 15 | setup( 16 | name="google-drive-to-sqlite", 17 | description="Create a SQLite database containing metadata from Google Drive", 18 | long_description=get_long_description(), 19 | long_description_content_type="text/markdown", 20 | author="Simon Willison", 21 | url="https://github.com/simonw/google-drive-to-sqlite", 22 | project_urls={ 23 | "Issues": "https://github.com/simonw/google-drive-to-sqlite/issues", 24 | "CI": "https://github.com/simonw/google-drive-to-sqlite/actions", 25 | "Changelog": "https://github.com/simonw/google-drive-to-sqlite/releases", 26 | }, 27 | license="Apache License, Version 2.0", 28 | version=VERSION, 29 | packages=["google_drive_to_sqlite"], 30 | entry_points=""" 31 | [console_scripts] 32 | google-drive-to-sqlite=google_drive_to_sqlite.cli:cli 33 | """, 34 | install_requires=["click", "httpx", "sqlite-utils"], 35 | extras_require={"test": ["pytest", "pytest-httpx", "pytest-mock", "cogapp"]}, 36 | python_requires=">=3.6", 37 | ) 38 | -------------------------------------------------------------------------------- /tests/folder-and-children.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "kind": "drive#file", 4 | "id": "1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j", 5 | "name": "test-folder", 6 | "mimeType": "application/vnd.google-apps.folder", 7 | "starred": false, 8 | "trashed": false, 9 | "explicitlyTrashed": false, 10 | "parents": [ 11 | "0AK1CICIR8ECDUk9PVA" 12 | ], 13 | "spaces": [ 14 | "drive" 15 | ], 16 | "version": "4", 17 | "webViewLink": "https://drive.google.com/drive/folders/1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j", 18 | "iconLink": "https://drive-thirdparty.googleusercontent.com/16/type/application/vnd.google-apps.folder", 19 | "hasThumbnail": false, 20 | "thumbnailVersion": "0", 21 | "viewedByMe": true, 22 | "createdTime": "2022-02-19T04:22:24.589Z", 23 | "modifiedTime": "2022-02-19T04:22:24.589Z", 24 | "modifiedByMe": true, 25 | "owners": [ 26 | { 27 | "kind": "drive#user", 28 | "displayName": "Simon Willison", 29 | "photoLink": "https://lh3.googleusercontent.com/a-/AOh14Gg9Loyxove5ocfBp0mg0u2afcTpM1no8QJnwbWnxw=s64", 30 | "me": true, 31 | "permissionId": "16974643384157631322", 32 | "emailAddress": "...@gmail.com" 33 | } 34 | ], 35 | "lastModifyingUser": { 36 | "kind": "drive#user", 37 | "displayName": "Simon Willison", 38 | "photoLink": "https://lh3.googleusercontent.com/a-/AOh14Gg9Loyxove5ocfBp0mg0u2afcTpM1no8QJnwbWnxw=s64", 39 | "me": true, 40 | "permissionId": "16974643384157631322", 41 | "emailAddress": "...@gmail.com" 42 | }, 43 | "shared": false, 44 | "ownedByMe": true, 45 | "viewersCanCopyContent": true, 46 | "copyRequiresWriterPermission": false, 47 | "writersCanShare": true, 48 | "folderColorRgb": "#8f8f8f", 49 | "quotaBytesUsed": "0", 50 | "isAppAuthorized": false, 51 | "linkShareMetadata": { 52 | "securityUpdateEligible": false, 53 | "securityUpdateEnabled": true 54 | } 55 | }, 56 | { 57 | "kind": "drive#file", 58 | "id": "1FYLDMMXi1-gGjxg8dLmvbiixDuR8-FZ3", 59 | "name": "two", 60 | "mimeType": "application/vnd.google-apps.folder", 61 | "starred": false, 62 | "trashed": false, 63 | "explicitlyTrashed": false, 64 | "parents": [ 65 | "1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j" 66 | ], 67 | "spaces": [ 68 | "drive" 69 | ], 70 | "version": "1", 71 | "webViewLink": "https://drive.google.com/drive/folders/1FYLDMMXi1-gGjxg8dLmvbiixDuR8-FZ3", 72 | "iconLink": "https://drive-thirdparty.googleusercontent.com/16/type/application/vnd.google-apps.folder", 73 | "hasThumbnail": false, 74 | "thumbnailVersion": "0", 75 | "viewedByMe": true, 76 | "createdTime": "2022-02-19T04:22:38.714Z", 77 | "modifiedTime": "2022-02-19T04:22:38.714Z", 78 | "modifiedByMe": true, 79 | "owners": [ 80 | { 81 | "kind": "drive#user", 82 | "displayName": "Simon Willison", 83 | "photoLink": "https://lh3.googleusercontent.com/a-/AOh14Gg9Loyxove5ocfBp0mg0u2afcTpM1no8QJnwbWnxw=s64", 84 | "me": true, 85 | "permissionId": "16974643384157631322", 86 | "emailAddress": "...@gmail.com" 87 | } 88 | ], 89 | "lastModifyingUser": { 90 | "kind": "drive#user", 91 | "displayName": "Simon Willison", 92 | "photoLink": "https://lh3.googleusercontent.com/a-/AOh14Gg9Loyxove5ocfBp0mg0u2afcTpM1no8QJnwbWnxw=s64", 93 | "me": true, 94 | "permissionId": "16974643384157631322", 95 | "emailAddress": "...@gmail.com" 96 | }, 97 | "shared": false, 98 | "ownedByMe": true, 99 | "viewersCanCopyContent": true, 100 | "copyRequiresWriterPermission": false, 101 | "writersCanShare": true, 102 | "folderColorRgb": "#8f8f8f", 103 | "quotaBytesUsed": "0", 104 | "isAppAuthorized": false, 105 | "linkShareMetadata": { 106 | "securityUpdateEligible": false, 107 | "securityUpdateEnabled": true 108 | } 109 | }, 110 | { 111 | "kind": "drive#file", 112 | "id": "113Wb_KLL1dtgx3vpeRfSTOYIUDf3QnnN", 113 | "name": "one", 114 | "mimeType": "application/vnd.google-apps.folder", 115 | "starred": false, 116 | "trashed": false, 117 | "explicitlyTrashed": false, 118 | "parents": [ 119 | "1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j" 120 | ], 121 | "spaces": [ 122 | "drive" 123 | ], 124 | "version": "2", 125 | "webViewLink": "https://drive.google.com/drive/folders/113Wb_KLL1dtgx3vpeRfSTOYIUDf3QnnN", 126 | "iconLink": "https://drive-thirdparty.googleusercontent.com/16/type/application/vnd.google-apps.folder", 127 | "hasThumbnail": false, 128 | "thumbnailVersion": "0", 129 | "viewedByMe": true, 130 | "createdTime": "2022-02-19T04:22:33.581Z", 131 | "modifiedTime": "2022-02-19T04:22:33.581Z", 132 | "modifiedByMe": true, 133 | "owners": [ 134 | { 135 | "kind": "drive#user", 136 | "displayName": "Simon Willison", 137 | "photoLink": "https://lh3.googleusercontent.com/a-/AOh14Gg9Loyxove5ocfBp0mg0u2afcTpM1no8QJnwbWnxw=s64", 138 | "me": true, 139 | "permissionId": "16974643384157631322", 140 | "emailAddress": "...@gmail.com" 141 | } 142 | ], 143 | "lastModifyingUser": { 144 | "kind": "drive#user", 145 | "displayName": "Simon Willison", 146 | "photoLink": "https://lh3.googleusercontent.com/a-/AOh14Gg9Loyxove5ocfBp0mg0u2afcTpM1no8QJnwbWnxw=s64", 147 | "me": true, 148 | "permissionId": "16974643384157631322", 149 | "emailAddress": "...@gmail.com" 150 | }, 151 | "shared": false, 152 | "ownedByMe": true, 153 | "viewersCanCopyContent": true, 154 | "copyRequiresWriterPermission": false, 155 | "writersCanShare": true, 156 | "folderColorRgb": "#8f8f8f", 157 | "quotaBytesUsed": "0", 158 | "isAppAuthorized": false, 159 | "linkShareMetadata": { 160 | "securityUpdateEligible": false, 161 | "securityUpdateEnabled": true 162 | } 163 | }, 164 | { 165 | "kind": "drive#file", 166 | "id": "1Xdqfeoi8B8YJJR0y-_oQlHYpjHHzD5a-", 167 | "name": "sample.csv", 168 | "mimeType": "text/csv", 169 | "starred": false, 170 | "trashed": false, 171 | "explicitlyTrashed": false, 172 | "parents": [ 173 | "113Wb_KLL1dtgx3vpeRfSTOYIUDf3QnnN" 174 | ], 175 | "spaces": [ 176 | "drive" 177 | ], 178 | "version": "2", 179 | "webViewLink": "https://drive.google.com/file/d/1Xdqfeoi8B8YJJR0y-_oQlHYpjHHzD5a-/view?usp=drivesdk", 180 | "iconLink": "https://drive-thirdparty.googleusercontent.com/16/type/text/csv", 181 | "hasThumbnail": false, 182 | "thumbnailVersion": "0", 183 | "viewedByMe": true, 184 | "createdTime": "2022-02-19T04:25:16.517Z", 185 | "modifiedTime": "2020-11-11T18:10:31.000Z", 186 | "modifiedByMe": true, 187 | "owners": [ 188 | { 189 | "kind": "drive#user", 190 | "displayName": "Simon Willison", 191 | "photoLink": "https://lh3.googleusercontent.com/a-/AOh14Gg9Loyxove5ocfBp0mg0u2afcTpM1no8QJnwbWnxw=s64", 192 | "me": true, 193 | "permissionId": "16974643384157631322", 194 | "emailAddress": "...@gmail.com" 195 | } 196 | ], 197 | "lastModifyingUser": {"displayName": "", "kind": "drive#user", "me": false}, 198 | "shared": false, 199 | "ownedByMe": true, 200 | "viewersCanCopyContent": true, 201 | "copyRequiresWriterPermission": false, 202 | "writersCanShare": true, 203 | "quotaBytesUsed": "1070506", 204 | "isAppAuthorized": false, 205 | "linkShareMetadata": { 206 | "securityUpdateEligible": false, 207 | "securityUpdateEnabled": true 208 | } 209 | } 210 | ] 211 | -------------------------------------------------------------------------------- /tests/test_google_drive_to_sqlite.py: -------------------------------------------------------------------------------- 1 | from click.testing import CliRunner 2 | from google_drive_to_sqlite.cli import cli, DEFAULT_FIELDS 3 | import httpx 4 | import json 5 | import pathlib 6 | import pytest 7 | import re 8 | import stat 9 | import sqlite_utils 10 | 11 | TOKEN_REQUEST_CONTENT = ( 12 | b"grant_type=refresh_token&" 13 | b"refresh_token=rtoken&" 14 | b"client_id=148933860554-98i3hter1bsn24sa6fcq1tcrhcrujrnl.apps.googleusercontent.com&" 15 | b"client_secret=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" 16 | ) 17 | 18 | AUTH_JSON = {"google-drive-to-sqlite": {"refresh_token": "rtoken"}} 19 | FOLDER_AND_CHILDREN_JSON_PATH = ( 20 | pathlib.Path(__file__).parent / "folder-and-children.json" 21 | ) 22 | 23 | 24 | @pytest.mark.parametrize( 25 | "response,expected_error", 26 | ( 27 | ({"refresh_token": "rtoken"}, None), 28 | ( 29 | {"error": "bad_error", "error_description": "description"}, 30 | "Error: bad_error: description", 31 | ), 32 | ( 33 | {"unexpected": "error"}, 34 | "Error: No refresh_token in response", 35 | ), 36 | ), 37 | ) 38 | def test_auth(httpx_mock, response, expected_error): 39 | httpx_mock.add_response(json=response) 40 | runner = CliRunner() 41 | with runner.isolated_filesystem(): 42 | result = runner.invoke(cli, ["auth"], input="my-token") 43 | if expected_error: 44 | assert result.exit_code == 1 45 | assert result.output.strip().endswith(expected_error) 46 | else: 47 | assert result.exit_code == 0 48 | auth = json.load(open("auth.json")) 49 | assert auth == {"google-drive-to-sqlite": {"refresh_token": "rtoken"}} 50 | # Should be chmod 600 51 | st_mode = pathlib.Path("auth.json").stat().st_mode 52 | assert stat.filemode(st_mode) == "-rw-------" 53 | 54 | 55 | @pytest.mark.parametrize( 56 | "auth_file_exists,revoke_response,expected_error", 57 | ( 58 | (False, None, "Error: Could not find google-drive-to-sqlite in auth.json"), 59 | (True, {}, None), 60 | (True, {"error": "invalid_token"}, "Error: invalid_token"), 61 | ), 62 | ) 63 | def test_revoke(httpx_mock, auth_file_exists, revoke_response, expected_error): 64 | runner = CliRunner() 65 | with runner.isolated_filesystem(): 66 | if auth_file_exists: 67 | open("auth.json", "w").write(json.dumps(AUTH_JSON)) 68 | httpx_mock.add_response(json=revoke_response) 69 | result = runner.invoke(cli, ["revoke"]) 70 | if auth_file_exists: 71 | request = httpx_mock.get_request() 72 | assert ( 73 | request.url 74 | == "https://accounts.google.com/o/oauth2/revoke?token=rtoken" 75 | ) 76 | if expected_error: 77 | assert result.exit_code == 1 78 | assert result.output.strip().endswith(expected_error) 79 | else: 80 | assert result.exit_code == 0 81 | 82 | 83 | @pytest.mark.parametrize( 84 | "opts,expected_content", 85 | ( 86 | ([], {"refresh_token": "rtoken"}), 87 | ( 88 | ["--google-client-id", "x", "--google-client-secret", "y"], 89 | { 90 | "refresh_token": "rtoken", 91 | "google_client_id": "x", 92 | "google_client_secret": "y", 93 | }, 94 | ), 95 | ( 96 | ["--scope", "SCOPE"], 97 | { 98 | "refresh_token": "rtoken", 99 | "scope": "SCOPE", 100 | }, 101 | ), 102 | ), 103 | ) 104 | def test_auth_custom_client(httpx_mock, opts, expected_content): 105 | httpx_mock.add_response(json={"refresh_token": "rtoken"}) 106 | runner = CliRunner() 107 | with runner.isolated_filesystem(): 108 | result = runner.invoke(cli, ["auth"] + opts, input="my-token") 109 | assert result.exit_code == 0 110 | auth = json.load(open("auth.json")) 111 | assert auth == {"google-drive-to-sqlite": expected_content} 112 | 113 | 114 | def test_get_single(httpx_mock): 115 | about_data = { 116 | "kind": "drive#about", 117 | "user": {"kind": "drive#user", "displayName": "User"}, 118 | } 119 | httpx_mock.add_response( 120 | url="https://www.googleapis.com/oauth2/v4/token", 121 | method="POST", 122 | json={"access_token": "atoken"}, 123 | ) 124 | httpx_mock.add_response( 125 | url="https://www.googleapis.com/drive/v3/about?fields=*", 126 | method="GET", 127 | json=about_data, 128 | ) 129 | runner = CliRunner() 130 | with runner.isolated_filesystem(): 131 | open("auth.json", "w").write(json.dumps(AUTH_JSON)) 132 | result = runner.invoke( 133 | cli, ["get", "https://www.googleapis.com/drive/v3/about?fields=*"] 134 | ) 135 | token_request, about_request = httpx_mock.get_requests() 136 | assert token_request.content == TOKEN_REQUEST_CONTENT 137 | assert about_request.url == "https://www.googleapis.com/drive/v3/about?fields=*" 138 | assert about_request.headers["authorization"] == "Bearer atoken" 139 | assert result.exit_code == 0 140 | assert result.output.strip() == json.dumps(about_data, indent=4) 141 | 142 | 143 | def test_get_plain_text(httpx_mock): 144 | url = "https://www.googleapis.com/drive/v3/files/123/export?mimeType=text/plain" 145 | httpx_mock.add_response( 146 | url="https://www.googleapis.com/oauth2/v4/token", 147 | method="POST", 148 | json={"access_token": "atoken"}, 149 | ) 150 | httpx_mock.add_response( 151 | url=url, 152 | method="GET", 153 | content="This is plain text", 154 | ) 155 | runner = CliRunner() 156 | with runner.isolated_filesystem(): 157 | open("auth.json", "w").write(json.dumps(AUTH_JSON)) 158 | result = runner.invoke(cli, ["get", url]) 159 | token_request, export_request = httpx_mock.get_requests() 160 | assert token_request.content == TOKEN_REQUEST_CONTENT 161 | assert export_request.url == url 162 | assert export_request.headers["authorization"] == "Bearer atoken" 163 | assert result.exit_code == 0 164 | assert result.output.strip() == "This is plain text" 165 | 166 | 167 | @pytest.mark.parametrize( 168 | "opts,expected_output", 169 | ( 170 | ( 171 | [], 172 | '[\n {\n "id": 1\n },\n {\n "id": 2\n },\n ' 173 | '{\n "id": 3\n },\n {\n "id": 4\n }\n]\n', 174 | ), 175 | ( 176 | ["--nl"], 177 | '{"id": 1}\n{"id": 2}\n{"id": 3}\n{"id": 4}\n', 178 | ), 179 | ), 180 | ) 181 | def test_get_paginated(httpx_mock, opts, expected_output): 182 | httpx_mock.add_response( 183 | url="https://www.googleapis.com/oauth2/v4/token", 184 | method="POST", 185 | json={"access_token": "atoken"}, 186 | ) 187 | httpx_mock.add_response( 188 | url="https://www.googleapis.com/page", 189 | json={"nextPageToken": "next", "files": [{"id": 1}, {"id": 2}]}, 190 | ) 191 | httpx_mock.add_response( 192 | url="https://www.googleapis.com/page?pageToken=next", 193 | json={"nextPageToken": None, "files": [{"id": 3}, {"id": 4}]}, 194 | ) 195 | runner = CliRunner() 196 | with runner.isolated_filesystem(): 197 | open("auth.json", "w").write(json.dumps(AUTH_JSON)) 198 | result = runner.invoke( 199 | cli, 200 | ["get", "https://www.googleapis.com/page", "--paginate", "files"] + opts, 201 | ) 202 | _, page1_request, page2_request = httpx_mock.get_requests() 203 | for request in (page1_request, page2_request): 204 | assert request.headers["authorization"] == "Bearer atoken" 205 | assert page2_request.url == "https://www.googleapis.com/page?pageToken=next" 206 | assert result.exit_code == 0 207 | assert result.output == expected_output 208 | 209 | 210 | @pytest.mark.parametrize( 211 | "opts,extra_qs", 212 | ( 213 | ([], ""), 214 | (["-q", "starred = true"], "&q=starred+%3D+true"), 215 | (["--full-text", "search"], "&q=fullText+contains+%27search%27"), 216 | (["--starred"], "&q=starred+%3D+true"), 217 | (["--trashed"], "&q=trashed+%3D+true"), 218 | (["--shared-with-me"], "&q=sharedWithMe+%3D+true"), 219 | ( 220 | ["--starred", "--trashed", "--shared-with-me"], 221 | "&q=starred+%3D+true+and+trashed+%3D+true+and+sharedWithMe+%3D+true", 222 | ), 223 | ( 224 | ["--apps"], 225 | "&q=%28mimeType+%3D+%27application%2Fvnd.google-apps.document%27+or+mimeType+%3D+%27application%2Fvnd.google-apps.spreadsheet%27+or+mimeType+%3D+%27application%2Fvnd.google-apps.presentation%27+or+mimeType+%3D+%27application%2Fvnd.google-apps.drawing%27%29", 226 | ), 227 | ( 228 | ["--docs"], 229 | "&q=%28mimeType+%3D+%27application%2Fvnd.google-apps.document%27%29", 230 | ), 231 | ( 232 | ["--sheets"], 233 | "&q=%28mimeType+%3D+%27application%2Fvnd.google-apps.spreadsheet%27%29", 234 | ), 235 | ( 236 | ["--presentations"], 237 | "&q=%28mimeType+%3D+%27application%2Fvnd.google-apps.presentation%27%29", 238 | ), 239 | ( 240 | ["--drawings"], 241 | "&q=%28mimeType+%3D+%27application%2Fvnd.google-apps.drawing%27%29", 242 | ), 243 | ), 244 | ) 245 | @pytest.mark.parametrize("use_db", (True, False)) 246 | def test_files_basic(httpx_mock, opts, extra_qs, use_db): 247 | httpx_mock.add_response( 248 | method="POST", 249 | json={"access_token": "atoken"}, 250 | ) 251 | httpx_mock.add_response( 252 | json={"nextPageToken": "next", "files": [{"id": 1}, {"id": 2}]}, 253 | ) 254 | httpx_mock.add_response( 255 | json={"nextPageToken": None, "files": [{"id": 3}, {"id": 4}]}, 256 | ) 257 | runner = CliRunner() 258 | with runner.isolated_filesystem(): 259 | open("auth.json", "w").write(json.dumps(AUTH_JSON)) 260 | args = ["files"] 261 | if use_db: 262 | args.append("test.db") 263 | else: 264 | args.append("--json") 265 | result = runner.invoke(cli, args + opts, catch_exceptions=False) 266 | assert result.exit_code == 0 267 | token_request, page1_request, page2_request = httpx_mock.get_requests() 268 | assert token_request.content == TOKEN_REQUEST_CONTENT 269 | assert page1_request.url == ( 270 | "https://www.googleapis.com/drive/v3/files?fields=" 271 | + "nextPageToken%2C+files%28{}%29".format("%2C".join(DEFAULT_FIELDS)) 272 | + extra_qs 273 | ) 274 | assert page2_request.url == ( 275 | "https://www.googleapis.com/drive/v3/files?fields=" 276 | + "nextPageToken%2C+files%28{}%29".format("%2C".join(DEFAULT_FIELDS)) 277 | + extra_qs 278 | + "&pageToken=next" 279 | ) 280 | if use_db: 281 | rows = list(sqlite_utils.Database("test.db")["drive_files"].rows) 282 | assert rows == [ 283 | {"id": "1", "_parent": None, "_owner": None, "lastModifyingUser": None}, 284 | {"id": "2", "_parent": None, "_owner": None, "lastModifyingUser": None}, 285 | {"id": "3", "_parent": None, "_owner": None, "lastModifyingUser": None}, 286 | {"id": "4", "_parent": None, "_owner": None, "lastModifyingUser": None}, 287 | ] 288 | else: 289 | results = json.loads(result.output) 290 | assert results == [{"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}] 291 | 292 | 293 | @pytest.mark.parametrize("verbosity_arg", ("-v", "--verbose")) 294 | def test_files_basic_stop_after_also_test_verbose(httpx_mock, verbosity_arg): 295 | httpx_mock.add_response( 296 | method="POST", 297 | json={"access_token": "atoken"}, 298 | ) 299 | httpx_mock.add_response( 300 | json={"nextPageToken": None, "files": [{"id": 3}, {"id": 4}]}, 301 | ) 302 | runner = CliRunner(mix_stderr=False) 303 | with runner.isolated_filesystem(): 304 | open("auth.json", "w").write(json.dumps(AUTH_JSON)) 305 | args = ["files", "--json", "--stop-after", "1", verbosity_arg] 306 | result = runner.invoke(cli, args) 307 | assert ( 308 | result.stderr == "POST https://www.googleapis.com/oauth2/v4/token\n" 309 | "GET: https://www.googleapis.com/drive/v3/files " 310 | "{'fields': 'nextPageToken, files(kind,id,name,mimeType,starred,trashed," 311 | "explicitlyTrashed,parents,spaces,version,webViewLink,iconLink,hasThumbnail," 312 | "thumbnailVersion,viewedByMe,createdTime,modifiedTime,modifiedByMe,owners," 313 | "lastModifyingUser,shared,ownedByMe,viewersCanCopyContent," 314 | "copyRequiresWriterPermission,writersCanShare,folderColorRgb,quotaBytesUsed," 315 | "isAppAuthorized,linkShareMetadata)'}\n" 316 | ) 317 | token_request, page1_request = httpx_mock.get_requests() 318 | assert token_request.content == TOKEN_REQUEST_CONTENT 319 | assert page1_request.url == ( 320 | "https://www.googleapis.com/drive/v3/files?fields=" 321 | + "nextPageToken%2C+files%28{}%29".format("%2C".join(DEFAULT_FIELDS)) 322 | ) 323 | results = json.loads(result.output) 324 | assert results == [{"id": 3}] 325 | 326 | 327 | def test_files_folder(httpx_mock): 328 | httpx_mock.add_response( 329 | method="POST", 330 | json={"access_token": "atoken"}, 331 | ) 332 | httpx_mock.add_response( 333 | json={"id": "folder1", "mimeType": "application/vnd.google-apps.folder"}, 334 | ) 335 | httpx_mock.add_response( 336 | json={ 337 | "nextPageToken": None, 338 | "files": [ 339 | {"id": "doc1", "mimeType": "doc"}, 340 | {"id": "folder2", "mimeType": "application/vnd.google-apps.folder"}, 341 | ], 342 | } 343 | ) 344 | httpx_mock.add_response( 345 | url=re.compile(".*folder2.*"), 346 | json={ 347 | "nextPageToken": None, 348 | "files": [ 349 | {"id": "doc2", "mimeType": "doc"}, 350 | ], 351 | }, 352 | ) 353 | runner = CliRunner() 354 | with runner.isolated_filesystem(): 355 | open("auth.json", "w").write(json.dumps(AUTH_JSON)) 356 | args = ["files", "--folder", "folder1", "--json"] 357 | result = runner.invoke(cli, args) 358 | ( 359 | token_request, 360 | folder_details_request, 361 | folder1_request, 362 | folder2_request, 363 | ) = httpx_mock.get_requests() 364 | assert token_request.content == TOKEN_REQUEST_CONTENT 365 | assert folder_details_request.url == ( 366 | "https://www.googleapis.com/drive/v3/files/folder1?fields=" 367 | + "%2C".join(DEFAULT_FIELDS) 368 | ) 369 | assert folder1_request.url == ( 370 | "https://www.googleapis.com/drive/v3/files?fields=" 371 | + "nextPageToken%2C+files%28{}%29".format("%2C".join(DEFAULT_FIELDS)) 372 | + "&q=%22folder1%22+in+parents" 373 | ) 374 | assert folder2_request.url == ( 375 | "https://www.googleapis.com/drive/v3/files?fields=" 376 | + "nextPageToken%2C+files%28{}%29".format("%2C".join(DEFAULT_FIELDS)) 377 | + "&q=%22folder2%22+in+parents" 378 | ) 379 | results = json.loads(result.output) 380 | assert results == [ 381 | {"id": "folder1", "mimeType": "application/vnd.google-apps.folder"}, 382 | {"id": "doc1", "mimeType": "doc"}, 383 | {"id": "folder2", "mimeType": "application/vnd.google-apps.folder"}, 384 | {"id": "doc2", "mimeType": "doc"}, 385 | ] 386 | 387 | 388 | def test_download_two_files(httpx_mock): 389 | httpx_mock.add_response( 390 | method="POST", 391 | json={"access_token": "atoken"}, 392 | ) 393 | httpx_mock.add_response( 394 | content="this is text", 395 | headers={"content-type": "text/plain"}, 396 | ) 397 | httpx_mock.add_response( 398 | content="this is gif", 399 | headers={"content-type": "image/gif"}, 400 | ) 401 | runner = CliRunner() 402 | with runner.isolated_filesystem(): 403 | open("auth.json", "w").write(json.dumps(AUTH_JSON)) 404 | result = runner.invoke(cli, ["download", "file1", "file2"]) 405 | assert result.exit_code == 0 406 | # Should be file1.plain and file2.gif 407 | assert open("file1.txt").read() == "this is text" 408 | assert open("file2.gif").read() == "this is gif" 409 | _, file1_request, file2_request = httpx_mock.get_requests() 410 | assert ( 411 | file1_request.url == "https://www.googleapis.com/drive/v3/files/file1?alt=media" 412 | ) 413 | assert ( 414 | file2_request.url == "https://www.googleapis.com/drive/v3/files/file2?alt=media" 415 | ) 416 | 417 | 418 | def test_download_output_two_files_error(): 419 | runner = CliRunner() 420 | result = runner.invoke(cli, ["download", "file1", "file2", "-o", "out.txt"]) 421 | assert result.exit_code == 1 422 | assert result.output == "Error: --output option only works with a single file\n" 423 | 424 | 425 | def test_download_output_stdout(httpx_mock): 426 | httpx_mock.add_response( 427 | method="POST", 428 | json={"access_token": "atoken"}, 429 | ) 430 | httpx_mock.add_response( 431 | content="this is text", 432 | headers={"content-type": "text/plain"}, 433 | ) 434 | runner = CliRunner() 435 | with runner.isolated_filesystem(): 436 | open("auth.json", "w").write(json.dumps(AUTH_JSON)) 437 | result = runner.invoke(cli, ["download", "file1", "-o", "-"]) 438 | assert result.exit_code == 0 439 | assert result.output == "this is text" 440 | 441 | 442 | def test_download_output_path(httpx_mock): 443 | httpx_mock.add_response( 444 | method="POST", 445 | json={"access_token": "atoken"}, 446 | ) 447 | httpx_mock.add_response( 448 | content="this is text", 449 | headers={"content-type": "text/plain"}, 450 | ) 451 | runner = CliRunner() 452 | with runner.isolated_filesystem(): 453 | open("auth.json", "w").write(json.dumps(AUTH_JSON)) 454 | result = runner.invoke(cli, ["download", "file1", "-o", "out.txt"]) 455 | assert result.exit_code == 0 456 | assert open("out.txt").read() == "this is text" 457 | 458 | 459 | def test_export_two_files(httpx_mock): 460 | httpx_mock.add_response( 461 | method="POST", 462 | json={"access_token": "atoken"}, 463 | ) 464 | httpx_mock.add_response( 465 | content="this is pdf", 466 | headers={"content-type": "application/pdf"}, 467 | ) 468 | httpx_mock.add_response( 469 | content="this is also pdf", 470 | headers={"content-type": "application/pdf"}, 471 | ) 472 | runner = CliRunner() 473 | with runner.isolated_filesystem(): 474 | open("auth.json", "w").write(json.dumps(AUTH_JSON)) 475 | result = runner.invoke(cli, ["export", "pdf", "file1", "file2"]) 476 | assert result.exit_code == 0 477 | assert open("file1-export.pdf").read() == "this is pdf" 478 | assert open("file2-export.pdf").read() == "this is also pdf" 479 | _, file1_request, file2_request = httpx_mock.get_requests() 480 | assert ( 481 | file1_request.url 482 | == "https://www.googleapis.com/drive/v3/files/file1/export?mimeType=application%2Fpdf" 483 | ) 484 | assert ( 485 | file2_request.url 486 | == "https://www.googleapis.com/drive/v3/files/file2/export?mimeType=application%2Fpdf" 487 | ) 488 | 489 | 490 | def test_refresh_access_token_once_if_it_expires(httpx_mock): 491 | httpx_mock.add_response( 492 | method="POST", 493 | json={"access_token": "atoken"}, 494 | ) 495 | httpx_mock.add_response( 496 | url="https://www.googleapis.com/drive/v3/about?fields=*", 497 | json={ 498 | "error": { 499 | "errors": [ 500 | { 501 | "domain": "global", 502 | "reason": "authError", 503 | "message": "Invalid Credentials", 504 | "locationType": "header", 505 | "location": "Authorization", 506 | } 507 | ], 508 | "code": 401, 509 | "message": "Invalid Credentials", 510 | } 511 | }, 512 | status_code=401, 513 | ) 514 | httpx_mock.add_response( 515 | method="POST", 516 | json={"access_token": "atoken2"}, 517 | ) 518 | about_data = { 519 | "kind": "drive#about", 520 | "user": {"kind": "drive#user", "displayName": "User"}, 521 | } 522 | httpx_mock.add_response( 523 | url="https://www.googleapis.com/drive/v3/about?fields=*", 524 | method="GET", 525 | json=about_data, 526 | ) 527 | runner = CliRunner() 528 | with runner.isolated_filesystem(): 529 | open("auth.json", "w").write(json.dumps(AUTH_JSON)) 530 | result = runner.invoke( 531 | cli, ["get", "https://www.googleapis.com/drive/v3/about?fields=*"] 532 | ) 533 | assert result.exit_code == 0 534 | 535 | assert json.loads(result.output) == about_data 536 | 537 | token1, about_denied, token2, about_success = httpx_mock.get_requests() 538 | for request in (token1, token2): 539 | assert request.method == "POST" 540 | assert request.url == "https://www.googleapis.com/oauth2/v4/token" 541 | for request2 in (about_denied, about_success): 542 | assert request2.method == "GET" 543 | assert request2.url == "https://www.googleapis.com/drive/v3/about?fields=*" 544 | assert about_denied.headers["Authorization"] == "Bearer atoken" 545 | assert about_success.headers["Authorization"] == "Bearer atoken2" 546 | 547 | 548 | @pytest.mark.parametrize( 549 | "opt,input", 550 | ( 551 | ("--import-json", '[{"id": "one"}, {"id": "two"}]'), 552 | ("--import-nl", '{"id": "one"}\n{"id": "two"}'), 553 | ), 554 | ) 555 | def test_files_input(httpx_mock, opt, input): 556 | runner = CliRunner() 557 | with runner.isolated_filesystem(): 558 | result = runner.invoke(cli, ["files", "test.db", opt, "-"], input=input) 559 | assert len(httpx_mock.get_requests()) == 0 560 | assert result.exit_code == 0 561 | db = sqlite_utils.Database("test.db") 562 | assert set(db.table_names()) == { 563 | "drive_folders", 564 | "drive_files", 565 | "drive_users", 566 | } 567 | rows = list(db["drive_files"].rows) 568 | assert rows == [ 569 | {"id": "one", "_parent": None, "_owner": None, "lastModifyingUser": None}, 570 | {"id": "two", "_parent": None, "_owner": None, "lastModifyingUser": None}, 571 | ] 572 | 573 | 574 | def test_files_input_real_example(httpx_mock): 575 | runner = CliRunner() 576 | with runner.isolated_filesystem(): 577 | result = runner.invoke( 578 | cli, ["files", "test.db", "--import-json", FOLDER_AND_CHILDREN_JSON_PATH] 579 | ) 580 | assert len(httpx_mock.get_requests()) == 0 581 | assert result.exit_code == 0 582 | db = sqlite_utils.Database("test.db") 583 | assert set(db.table_names()) == { 584 | "drive_folders", 585 | "drive_files", 586 | "drive_users", 587 | } 588 | schema = db.schema 589 | assert ( 590 | schema 591 | == "CREATE TABLE [drive_users] (\n [permissionId] TEXT PRIMARY KEY\n," 592 | " [kind] TEXT, [displayName] TEXT, [photoLink] TEXT, [me] INTEGER," 593 | " [emailAddress] TEXT);\nCREATE TABLE [drive_folders] (\n [id] TEXT" 594 | " PRIMARY KEY,\n [_parent] TEXT,\n [_owner] TEXT,\n " 595 | " [lastModifyingUser] TEXT, [kind] TEXT, [name] TEXT, [mimeType] TEXT," 596 | " [starred] INTEGER, [trashed] INTEGER, [explicitlyTrashed] INTEGER," 597 | " [parents] TEXT, [spaces] TEXT, [version] TEXT, [webViewLink] TEXT," 598 | " [iconLink] TEXT, [hasThumbnail] INTEGER, [thumbnailVersion] TEXT," 599 | " [viewedByMe] INTEGER, [createdTime] TEXT, [modifiedTime] TEXT," 600 | " [modifiedByMe] INTEGER, [shared] INTEGER, [ownedByMe] INTEGER," 601 | " [viewersCanCopyContent] INTEGER, [copyRequiresWriterPermission]" 602 | " INTEGER, [writersCanShare] INTEGER, [folderColorRgb] TEXT," 603 | " [quotaBytesUsed] TEXT, [isAppAuthorized] INTEGER, [linkShareMetadata]" 604 | " TEXT,\n FOREIGN KEY([_parent]) REFERENCES [drive_folders]([id]),\n " 605 | " FOREIGN KEY([_owner]) REFERENCES [drive_users]([permissionId]),\n " 606 | " FOREIGN KEY([lastModifyingUser]) REFERENCES" 607 | " [drive_users]([permissionId])\n);\nCREATE TABLE [drive_files] (\n " 608 | " [id] TEXT PRIMARY KEY,\n [_parent] TEXT,\n [_owner] TEXT,\n " 609 | " [lastModifyingUser] TEXT, [kind] TEXT, [name] TEXT, [mimeType] TEXT," 610 | " [starred] INTEGER, [trashed] INTEGER, [explicitlyTrashed] INTEGER," 611 | " [parents] TEXT, [spaces] TEXT, [version] TEXT, [webViewLink] TEXT," 612 | " [iconLink] TEXT, [hasThumbnail] INTEGER, [thumbnailVersion] TEXT," 613 | " [viewedByMe] INTEGER, [createdTime] TEXT, [modifiedTime] TEXT," 614 | " [modifiedByMe] INTEGER, [shared] INTEGER, [ownedByMe] INTEGER," 615 | " [viewersCanCopyContent] INTEGER, [copyRequiresWriterPermission]" 616 | " INTEGER, [writersCanShare] INTEGER, [quotaBytesUsed] TEXT," 617 | " [isAppAuthorized] INTEGER, [linkShareMetadata] TEXT,\n FOREIGN" 618 | " KEY([_parent]) REFERENCES [drive_folders]([id]),\n FOREIGN" 619 | " KEY([_owner]) REFERENCES [drive_users]([permissionId]),\n FOREIGN" 620 | " KEY([lastModifyingUser]) REFERENCES [drive_users]([permissionId])\n);" 621 | ) 622 | files_rows = list(db["drive_files"].rows) 623 | folders_rows = list(db["drive_folders"].rows) 624 | users_rows = list(db["drive_users"].rows) 625 | assert files_rows == [ 626 | { 627 | "id": "1Xdqfeoi8B8YJJR0y-_oQlHYpjHHzD5a-", 628 | "_parent": "113Wb_KLL1dtgx3vpeRfSTOYIUDf3QnnN", 629 | "_owner": "16974643384157631322", 630 | "lastModifyingUser": None, 631 | "kind": "drive#file", 632 | "name": "sample.csv", 633 | "mimeType": "text/csv", 634 | "starred": 0, 635 | "trashed": 0, 636 | "explicitlyTrashed": 0, 637 | "parents": '["113Wb_KLL1dtgx3vpeRfSTOYIUDf3QnnN"]', 638 | "spaces": '["drive"]', 639 | "version": "2", 640 | "webViewLink": "https://drive.google.com/file/d/1Xdqfeoi8B8YJJR0y-_oQlHYpjHHzD5a-/view?usp=drivesdk", 641 | "iconLink": "https://drive-thirdparty.googleusercontent.com/16/type/text/csv", 642 | "hasThumbnail": 0, 643 | "thumbnailVersion": "0", 644 | "viewedByMe": 1, 645 | "createdTime": "2022-02-19T04:25:16.517Z", 646 | "modifiedTime": "2020-11-11T18:10:31.000Z", 647 | "modifiedByMe": 1, 648 | "shared": 0, 649 | "ownedByMe": 1, 650 | "viewersCanCopyContent": 1, 651 | "copyRequiresWriterPermission": 0, 652 | "writersCanShare": 1, 653 | "quotaBytesUsed": "1070506", 654 | "isAppAuthorized": 0, 655 | "linkShareMetadata": '{"securityUpdateEligible": false, "securityUpdateEnabled": true}', 656 | } 657 | ] 658 | assert folders_rows == [ 659 | { 660 | "id": "1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j", 661 | "_parent": "0AK1CICIR8ECDUk9PVA", 662 | "_owner": "16974643384157631322", 663 | "lastModifyingUser": "16974643384157631322", 664 | "kind": "drive#file", 665 | "name": "test-folder", 666 | "mimeType": "application/vnd.google-apps.folder", 667 | "starred": 0, 668 | "trashed": 0, 669 | "explicitlyTrashed": 0, 670 | "parents": '["0AK1CICIR8ECDUk9PVA"]', 671 | "spaces": '["drive"]', 672 | "version": "4", 673 | "webViewLink": "https://drive.google.com/drive/folders/1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j", 674 | "iconLink": "https://drive-thirdparty.googleusercontent.com/16/type/application/vnd.google-apps.folder", 675 | "hasThumbnail": 0, 676 | "thumbnailVersion": "0", 677 | "viewedByMe": 1, 678 | "createdTime": "2022-02-19T04:22:24.589Z", 679 | "modifiedTime": "2022-02-19T04:22:24.589Z", 680 | "modifiedByMe": 1, 681 | "shared": 0, 682 | "ownedByMe": 1, 683 | "viewersCanCopyContent": 1, 684 | "copyRequiresWriterPermission": 0, 685 | "writersCanShare": 1, 686 | "folderColorRgb": "#8f8f8f", 687 | "quotaBytesUsed": "0", 688 | "isAppAuthorized": 0, 689 | "linkShareMetadata": '{"securityUpdateEligible": false, "securityUpdateEnabled": true}', 690 | }, 691 | { 692 | "id": "1FYLDMMXi1-gGjxg8dLmvbiixDuR8-FZ3", 693 | "_parent": "1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j", 694 | "_owner": "16974643384157631322", 695 | "lastModifyingUser": "16974643384157631322", 696 | "kind": "drive#file", 697 | "name": "two", 698 | "mimeType": "application/vnd.google-apps.folder", 699 | "starred": 0, 700 | "trashed": 0, 701 | "explicitlyTrashed": 0, 702 | "parents": '["1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j"]', 703 | "spaces": '["drive"]', 704 | "version": "1", 705 | "webViewLink": "https://drive.google.com/drive/folders/1FYLDMMXi1-gGjxg8dLmvbiixDuR8-FZ3", 706 | "iconLink": "https://drive-thirdparty.googleusercontent.com/16/type/application/vnd.google-apps.folder", 707 | "hasThumbnail": 0, 708 | "thumbnailVersion": "0", 709 | "viewedByMe": 1, 710 | "createdTime": "2022-02-19T04:22:38.714Z", 711 | "modifiedTime": "2022-02-19T04:22:38.714Z", 712 | "modifiedByMe": 1, 713 | "shared": 0, 714 | "ownedByMe": 1, 715 | "viewersCanCopyContent": 1, 716 | "copyRequiresWriterPermission": 0, 717 | "writersCanShare": 1, 718 | "folderColorRgb": "#8f8f8f", 719 | "quotaBytesUsed": "0", 720 | "isAppAuthorized": 0, 721 | "linkShareMetadata": '{"securityUpdateEligible": false, "securityUpdateEnabled": true}', 722 | }, 723 | { 724 | "id": "113Wb_KLL1dtgx3vpeRfSTOYIUDf3QnnN", 725 | "_parent": "1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j", 726 | "_owner": "16974643384157631322", 727 | "lastModifyingUser": "16974643384157631322", 728 | "kind": "drive#file", 729 | "name": "one", 730 | "mimeType": "application/vnd.google-apps.folder", 731 | "starred": 0, 732 | "trashed": 0, 733 | "explicitlyTrashed": 0, 734 | "parents": '["1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j"]', 735 | "spaces": '["drive"]', 736 | "version": "2", 737 | "webViewLink": "https://drive.google.com/drive/folders/113Wb_KLL1dtgx3vpeRfSTOYIUDf3QnnN", 738 | "iconLink": "https://drive-thirdparty.googleusercontent.com/16/type/application/vnd.google-apps.folder", 739 | "hasThumbnail": 0, 740 | "thumbnailVersion": "0", 741 | "viewedByMe": 1, 742 | "createdTime": "2022-02-19T04:22:33.581Z", 743 | "modifiedTime": "2022-02-19T04:22:33.581Z", 744 | "modifiedByMe": 1, 745 | "shared": 0, 746 | "ownedByMe": 1, 747 | "viewersCanCopyContent": 1, 748 | "copyRequiresWriterPermission": 0, 749 | "writersCanShare": 1, 750 | "folderColorRgb": "#8f8f8f", 751 | "quotaBytesUsed": "0", 752 | "isAppAuthorized": 0, 753 | "linkShareMetadata": '{"securityUpdateEligible": false, "securityUpdateEnabled": true}', 754 | }, 755 | ] 756 | assert users_rows == [ 757 | { 758 | "permissionId": "16974643384157631322", 759 | "kind": "drive#user", 760 | "displayName": "Simon Willison", 761 | "photoLink": "https://lh3.googleusercontent.com/a-/AOh14Gg9Loyxove5ocfBp0mg0u2afcTpM1no8QJnwbWnxw=s64", 762 | "me": 1, 763 | "emailAddress": "...@gmail.com", 764 | } 765 | ] 766 | 767 | 768 | @pytest.mark.parametrize( 769 | "exception", (httpx.TransportError, httpx.RemoteProtocolError, httpx.ConnectError) 770 | ) 771 | @pytest.mark.parametrize( 772 | "num_exceptions,should_succeed", 773 | ( 774 | (3, False), 775 | (2, True), 776 | (1, True), 777 | (0, True), 778 | ), 779 | ) 780 | def test_files_retry_on_transport_error( 781 | httpx_mock, mocker, num_exceptions, should_succeed, exception 782 | ): 783 | mocker.patch("google_drive_to_sqlite.utils.sleep") 784 | about_data = { 785 | "kind": "drive#about", 786 | "user": {"kind": "drive#user", "displayName": "User"}, 787 | } 788 | httpx_mock.add_response( 789 | url="https://www.googleapis.com/oauth2/v4/token", 790 | method="POST", 791 | json={"access_token": "atoken"}, 792 | ) 793 | for _ in range(min(num_exceptions, 3)): 794 | httpx_mock.add_exception(exception("Error")) 795 | 796 | if should_succeed: 797 | httpx_mock.add_response( 798 | url="https://www.googleapis.com/drive/v3/about?fields=*", 799 | method="GET", 800 | json=about_data, 801 | ) 802 | 803 | runner = CliRunner(mix_stderr=False) 804 | with runner.isolated_filesystem(): 805 | open("auth.json", "w").write(json.dumps(AUTH_JSON)) 806 | result = runner.invoke( 807 | cli, ["get", "https://www.googleapis.com/drive/v3/about?fields=*", "-v"] 808 | ) 809 | if should_succeed: 810 | assert result.exit_code == 0 811 | else: 812 | assert result.exit_code == 1 813 | requests = httpx_mock.get_requests() 814 | num_expected = num_exceptions + 1 815 | if should_succeed: 816 | num_expected += 1 817 | assert len(requests) == num_expected 818 | 819 | # Test log output for num_exceptions = 2 820 | if num_exceptions == 2: 821 | assert result.stderr == ( 822 | "POST https://www.googleapis.com/oauth2/v4/token\n" 823 | "GET: https://www.googleapis.com/drive/v3/about?fields=*\n" 824 | + " Got {}, retrying\n".format(exception.__name__) 825 | + "GET: https://www.googleapis.com/drive/v3/about?fields=*\n" 826 | + " Got {}, retrying\n".format(exception.__name__) 827 | + "GET: https://www.googleapis.com/drive/v3/about?fields=*\n" 828 | ) 829 | --------------------------------------------------------------------------------