├── .github
    └── workflows
    │   ├── publish.yml
    │   └── test.yml
├── .gitignore
├── LICENSE
├── README.md
├── google_drive_to_sqlite
    ├── __init__.py
    ├── cli.py
    └── utils.py
├── setup.py
└── tests
    ├── folder-and-children.json
    └── test_google_drive_to_sqlite.py


/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Python Package
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created]
 6 | 
 7 | jobs:
 8 |   test:
 9 |     runs-on: ubuntu-latest
10 |     strategy:
11 |       matrix:
12 |         python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"]
13 |     steps:
14 |     - uses: actions/checkout@v2
15 |     - name: Set up Python ${{ matrix.python-version }}
16 |       uses: actions/setup-python@v2
17 |       with:
18 |         python-version: ${{ matrix.python-version }}
19 |     - uses: actions/cache@v2
20 |       name: Configure pip caching
21 |       with:
22 |         path: ~/.cache/pip
23 |         key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }}
24 |         restore-keys: |
25 |           ${{ runner.os }}-pip-
26 |     - name: Install dependencies
27 |       run: |
28 |         pip install -e '.[test]'
29 |     - name: Run tests
30 |       run: |
31 |         pytest
32 |   deploy:
33 |     runs-on: ubuntu-latest
34 |     needs: [test]
35 |     steps:
36 |     - uses: actions/checkout@v2
37 |     - name: Set up Python
38 |       uses: actions/setup-python@v2
39 |       with:
40 |         python-version: "3.10"
41 |     - uses: actions/cache@v2
42 |       name: Configure pip caching
43 |       with:
44 |         path: ~/.cache/pip
45 |         key: ${{ runner.os }}-publish-pip-${{ hashFiles('**/setup.py') }}
46 |         restore-keys: |
47 |           ${{ runner.os }}-publish-pip-
48 |     - name: Install dependencies
49 |       run: |
50 |         pip install setuptools wheel twine build
51 |     - name: Publish
52 |       env:
53 |         TWINE_USERNAME: __token__
54 |         TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
55 |       run: |
56 |         python -m build
57 |         twine upload dist/*
58 | 
59 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   test:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       matrix:
10 |         python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"]
11 |     steps:
12 |     - uses: actions/checkout@v2
13 |     - name: Set up Python ${{ matrix.python-version }}
14 |       uses: actions/setup-python@v2
15 |       with:
16 |         python-version: ${{ matrix.python-version }}
17 |     - uses: actions/cache@v2
18 |       name: Configure pip caching
19 |       with:
20 |         path: ~/.cache/pip
21 |         key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }}
22 |         restore-keys: |
23 |           ${{ runner.os }}-pip-
24 |     - name: Install dependencies
25 |       run: |
26 |         pip install -e '.[test]'
27 |     - name: Run tests
28 |       run: |
29 |         pytest
30 |     - name: Check if cog needs to be run
31 |       run: |
32 |         cog --check README.md
33 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .venv
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | venv
 6 | .eggs
 7 | .pytest_cache
 8 | *.egg-info
 9 | .DS_Store
10 | auth.json
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # google-drive-to-sqlite
  2 | 
  3 | [![PyPI](https://img.shields.io/pypi/v/google-drive-to-sqlite.svg)](https://pypi.org/project/google-drive-to-sqlite/)
  4 | [![Changelog](https://img.shields.io/github/v/release/simonw/google-drive-to-sqlite?include_prereleases&label=changelog)](https://github.com/simonw/google-drive-to-sqlite/releases)
  5 | [![Tests](https://github.com/simonw/google-drive-to-sqlite/workflows/Test/badge.svg)](https://github.com/simonw/google-drive-to-sqlite/actions?query=workflow%3ATest)
  6 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/google-drive-to-sqlite/blob/master/LICENSE)
  7 | 
  8 | > [!WARNING]  
  9 | > This tool no longer works due to Google deprecating the authentication mechanism it uses. See [issue #40](https://github.com/simonw/google-drive-to-sqlite/issues/40).
 10 | 
 11 | Create a SQLite database containing metadata from [Google Drive](https://www.google.com/drive)
 12 | 
 13 | For background on this project, see [Google Drive to SQLite](https://simonwillison.net/2022/Feb/20/google-drive-to-sqlite/) on my blog.
 14 | 
 15 | If you use Google Drive, and especially if you have shared drives with other people there's a good chance you have hundreds or even thousands of files that you may not be fully aware of.
 16 | 
 17 | This tool can download metadata about those files - their names, sizes, folders, content types, permissions, creation dates and more - and store them in a SQLite database.
 18 | 
 19 | This lets you use SQL to analyze your Google Drive contents, using [Datasette](https://datasette.io/) or the SQLite command-line tool or any other SQLite database browsing software.
 20 | 
 21 | ## Installation
 22 | 
 23 | Install this tool using `pip`:
 24 | 
 25 |     pip install google-drive-to-sqlite
 26 | 
 27 | ## Quickstart
 28 | 
 29 | Authenticate with Google Drive by running:
 30 | 
 31 |     google-drive-to-sqlite auth
 32 | 
 33 | Now create a SQLite database with metadata about all of the files you have starred using:
 34 | 
 35 |     google-drive-to-sqlite files starred.db --starred
 36 | 
 37 | You can explore the resulting database using [Datasette](https://datasette.io/):
 38 | 
 39 |     $ pip install datasette
 40 |     $ datasette starred.db
 41 |     INFO:     Started server process [24661]
 42 |     INFO:     Uvicorn running on http://127.0.0.1:8001
 43 | 
 44 | ## Authentication
 45 | 
 46 | > :warning: **This application has not yet been verified by Google** - you may find you are unable to authenticate until that verification is complete. [#10](https://github.com/simonw/google-drive-to-sqlite/issues/10)
 47 | >
 48 | > You can work around this issue by [creating your own OAuth client ID key](https://til.simonwillison.net/googlecloud/google-oauth-cli-application) and passing it to the `auth` command using `--google-client-id` and `--google-client-secret`.
 49 | 
 50 | First, authenticate with Google Drive using the `auth` command:
 51 | 
 52 |     $ google-drive-to-sqlite auth
 53 |     Visit the following URL to authenticate with Google Drive
 54 | 
 55 |     https://accounts.google.com/o/oauth2/v2/auth?...
 56 | 
 57 |     Then return here and paste in the resulting code:
 58 |     Paste code here: 
 59 | 
 60 | Follow the link, sign in with Google Drive and then copy and paste the resulting code back into the tool.
 61 | 
 62 | This will save an authentication token to the file called `auth.json` in the current directory.
 63 | 
 64 | To specify a different location for that file, use the `--auth` option:
 65 | 
 66 |     google-drive-to-sqlite auth --auth ~/google-drive-auth.json
 67 | 
 68 | The `auth` command also provides options for using a different scope, Google client ID and Google client secret. You can use these to create your own custom authentication tokens that can work with other Google APIs, see [issue #5](https://github.com/simonw/google-drive-to-sqlite/issues/5) for details.
 69 | 
 70 | Full `--help`:
 71 | 
 72 | <!-- [[[cog
 73 | import cog
 74 | from google_drive_to_sqlite import cli
 75 | from click.testing import CliRunner
 76 | runner = CliRunner()
 77 | result = runner.invoke(cli.cli, ["auth", "--help"])
 78 | help = result.output.replace("Usage: cli", "Usage: google-drive-to-sqlite")
 79 | cog.out(
 80 |     "```\n{}\n```\n".format(help)
 81 | )
 82 | ]]] -->
 83 | ```
 84 | Usage: google-drive-to-sqlite auth [OPTIONS]
 85 | 
 86 |   Authenticate user and save credentials
 87 | 
 88 | Options:
 89 |   -a, --auth FILE              Path to save token, defaults to auth.json
 90 |   --google-client-id TEXT      Custom Google client ID
 91 |   --google-client-secret TEXT  Custom Google client secret
 92 |   --scope TEXT                 Custom token scope
 93 |   --help                       Show this message and exit.
 94 | 
 95 | ```
 96 | <!-- [[[end]]] -->
 97 | 
 98 | To revoke the token that is stored in `auth.json`, such that it cannot be used to access Google Drive in the future, run the `revoke` command:
 99 | 
100 |     google-drive-to-sqlite revoke
101 | 
102 | Or if your token is stored in another location:
103 | 
104 |     google-drive-to-sqlite revoke -a ~/google-drive-auth.json
105 | 
106 | You will need to obtain a fresh token using the `auth` command in order to continue using this tool.
107 | 
108 | ## google-drive-to-sqlite files
109 | 
110 | To retrieve metadata about the files in your Google Drive, or a folder or search within it, use the `google-drive-to-sqlite files` command.
111 | 
112 | This will default to writing details about every file in your Google Drive to a SQLite database:
113 | 
114 |     google-drive-to-sqlite files files.db
115 | 
116 | Files and folders will be written to databases tables, which will be created if they do not yet exist. The database schema is [shown below](#database-schema).
117 | 
118 | If a file or folder already exists, based on a matching `id`, it will be replaced with fresh data.
119 | 
120 | Instead of writing to SQLite you can use `--json` to output as JSON, or `--nl` to output as newline-delimited JSON:
121 | 
122 |     google-drive-to-sqlite files --nl
123 | 
124 | Use `--folder ID` to retrieve everything in a specified folder and its sub-folders:
125 | 
126 |     google-drive-to-sqlite files files.db --folder 1E6Zg2X2bjjtPzVfX8YqdXZDCoB3AVA7i
127 | 
128 | Use `--q QUERY` to use a [custom search query](https://developers.google.com/drive/api/v3/reference/query-ref):
129 | 
130 |     google-drive-to-sqlite files files.db -q "viewedByMeTime > '2022-01-01'"
131 | 
132 | The following shortcut options help build queries:
133 | 
134 | - `--full-text TEXT` to search for files where the full text matches a search term
135 | - `--starred` for files and folders you have starred
136 | - `--trashed` for files and folders in the trash
137 | - `--shared-with-me` for files and folders that have been shared with you
138 | - `--apps` for Google Apps documents, spreadsheets, presentations and drawings (equivalent to setting all  of the next four options)
139 | - `--docs` for Google Apps documents
140 | - `--sheets` for Google Apps spreadsheets
141 | - `--presentations` for Google Apps presentations
142 | - `--drawings` for Google Apps drawings
143 | 
144 | You can combine these - for example, this returns all files that you have starred and that were shared with you:
145 | 
146 |     google-drive-to-sqlite files highlights.db \
147 |       --starred --shared-with-me
148 | 
149 | Multiple options are treated as AND, with the exception of the Google Apps options which are treated as OR - so the following would retrieve all spreadsheets and presentations that have also been starred:
150 | 
151 |     google-drive-to-sqlite files highlights.db \
152 |       --starred --sheets --presentations
153 | 
154 | You can use `--stop-after X` to stop after retrieving X files, useful for trying out a new search pattern and seeing results straight away.
155 | 
156 | The `--import-json` and `--import-nl` options are mainly useful for testing and developing this tool. They allow you to replay the JSON or newline-delimited JSON that was previously fetched using `--json` or `--nl` and use it to create a fresh SQLite database, without needing to make any outbound API calls:
157 | 
158 |     # Fetch all starred files from the API, write to starred.json
159 |     google-drive-to-sqlite files -q 'starred = true' --json > starred.json
160 |     # Now import that data into a new SQLite database file
161 |     google-drive-to-sqlite files starred.db --import-json starred.json
162 | 
163 | Full `--help`:
164 | 
165 | <!-- [[[cog
166 | result = runner.invoke(cli.cli, ["files", "--help"])
167 | help = result.output.replace("Usage: cli", "Usage: google-drive-to-sqlite")
168 | cog.out(
169 |     "```\n{}\n```\n".format(help)
170 | )
171 | ]]] -->
172 | ```
173 | Usage: google-drive-to-sqlite files [OPTIONS] [DATABASE]
174 | 
175 |   Retrieve metadata for files in Google Drive, and write to a SQLite database or
176 |   output as JSON.
177 | 
178 |       google-drive-to-sqlite files files.db
179 | 
180 |   Use --json to output JSON, --nl for newline-delimited JSON:
181 | 
182 |       google-drive-to-sqlite files files.db --json
183 | 
184 |   Use a folder ID to recursively fetch every file in that folder and its sub-
185 |   folders:
186 | 
187 |       google-drive-to-sqlite files files.db --folder
188 |       1E6Zg2X2bjjtPzVfX8YqdXZDCoB3AVA7i
189 | 
190 |   Fetch files you have starred:
191 | 
192 |       google-drive-to-sqlite files starred.db --starred
193 | 
194 | Options:
195 |   -a, --auth FILE       Path to auth.json token file
196 |   --folder TEXT         Files in this folder ID and its sub-folders
197 |   -q TEXT               Files matching this query
198 |   --full-text TEXT      Search for files with text match
199 |   --starred             Files you have starred
200 |   --trashed             Files in the trash
201 |   --shared-with-me      Files that have been shared with you
202 |   --apps                Google Apps docs, spreadsheets, presentations and
203 |                         drawings
204 |   --docs                Google Apps docs
205 |   --sheets              Google Apps spreadsheets
206 |   --presentations       Google Apps presentations
207 |   --drawings            Google Apps drawings
208 |   --json                Output JSON rather than write to DB
209 |   --nl                  Output newline-delimited JSON rather than write to DB
210 |   --stop-after INTEGER  Stop paginating after X results
211 |   --import-json FILE    Import from this JSON file instead of the API
212 |   --import-nl FILE      Import from this newline-delimited JSON file
213 |   -v, --verbose         Send verbose output to stderr
214 |   --help                Show this message and exit.
215 | 
216 | ```
217 | <!-- [[[end]]] -->
218 | 
219 | ## google-drive-to-sqlite download FILE_ID
220 | 
221 | The `download` command can be used to download files from Google Drive.
222 | 
223 | You'll need one or more file IDs, which look something like `0B32uDVNZfiEKLUtIT1gzYWN2NDI4SzVQYTFWWWxCWUtvVGNB`.
224 | 
225 | To download the file, run this:
226 | 
227 |     google-drive-to-sqlite download 0B32uDVNZfiEKLUtIT1gzYWN2NDI4SzVQYTFWWWxCWUtvVGNB
228 | 
229 | This will detect the content type of the file and use that as the extension - so if this file is a JPEG the file would be downloaded as:
230 | 
231 |     0B32uDVNZfiEKLUtIT1gzYWN2NDI4SzVQYTFWWWxCWUtvVGNB.jpeg
232 | 
233 | You can pass multiple file IDs to the command at once.
234 | 
235 | To hide the progress bar and filename output, use `-s` or `--silent`.
236 | 
237 | If you are downloading a single file you can use the `-o` output to specify a filename and location:
238 | 
239 |     google-drive-to-sqlite download 0B32uDVNZfiEKLUtIT1gzYWN2NDI4SzVQYTFWWWxCWUtvVGNB \
240 |       -o my-image.jpeg
241 | 
242 | Use `-o -` to write the file contents to standard output:
243 | 
244 |     google-drive-to-sqlite download 0B32uDVNZfiEKLUtIT1gzYWN2NDI4SzVQYTFWWWxCWUtvVGNB \
245 |       -o - > my-image.jpeg
246 | 
247 | Full `--help`:
248 | 
249 | <!-- [[[cog
250 | result = runner.invoke(cli.cli, ["download", "--help"])
251 | help = result.output.replace("Usage: cli", "Usage: google-drive-to-sqlite")
252 | cog.out(
253 |     "```\n{}\n```\n".format(help)
254 | )
255 | ]]] -->
256 | ```
257 | Usage: google-drive-to-sqlite download [OPTIONS] FILE_IDS...
258 | 
259 |   Download one or more files to disk, based on their file IDs.
260 | 
261 |   The file content will be saved to a file with the name:
262 | 
263 |       FILE_ID.ext
264 | 
265 |   Where the extension is automatically picked based on the type of file.
266 | 
267 |   If you are downloading a single file you can specify a filename with -o:
268 | 
269 |       google-drive-to-sqlite download MY_FILE_ID -o myfile.txt
270 | 
271 | Options:
272 |   -a, --auth FILE    Path to auth.json token file
273 |   -o, --output FILE  File to write to, or - for standard output
274 |   -s, --silent       Hide progress bar and filename
275 |   --help             Show this message and exit.
276 | 
277 | ```
278 | <!-- [[[end]]] -->
279 | 
280 | ## google-drive-to-sqlite export FORMAT FILE_ID
281 | 
282 | The `export` command can be used to export Google Docs documents, spreadsheets and presentations in a number of different formats.
283 | 
284 | You'll need one or more document IDs, which look something like `10BOHGDUYa7lBjUSo26YFCHTpgEmtXabdVFaopCTh1vU`. You can find these by looking at the URL of your document on the Google Docs site.
285 | 
286 | To export that document as PDF, run this:
287 | 
288 |     google-drive-to-sqlite export pdf 10BOHGDUYa7lBjUSo26YFCHTpgEmtXabdVFaopCTh1vU
289 | 
290 | The file will be exported as:
291 | 
292 |     10BOHGDUYa7lBjUSo26YFCHTpgEmtXabdVFaopCTh1vU-export.pdf
293 | 
294 | You can pass multiple file IDs to the command at once.
295 | 
296 | For the `FORMAT` option you can use any of the mime type options listed [on this page](https://developers.google.com/drive/api/v3/ref-export-formats) - for example, to export as an Open Office document you could use:
297 | 
298 |     google-drive-to-sqlite export \
299 |      application/vnd.oasis.opendocument.text \
300 |      10BOHGDUYa7lBjUSo26YFCHTpgEmtXabdVFaopCTh1vU
301 | 
302 | For convenience the following shortcuts for common file formats are provided:
303 | 
304 | - Google Docs: `html`, `txt`, `rtf`, `pdf`, `doc`, `zip`, `epub`
305 | - Google Sheets: `xls`, `pdf`, `csv`, `tsv`, `zip`
306 | - Presentations: `ppt`, `pdf`, `txt`
307 | - Drawings: `jpeg`, `png`, `svg`
308 | 
309 | The `zip` option returns a zip file of HTML. `txt` returns plain text. The others should be self-evident.
310 | 
311 | To hide the filename output, use `-s` or `--silent`.
312 | 
313 | If you are exporting a single file you can use the `-o` output to specify a filename and location:
314 | 
315 |     google-drive-to-sqlite export pdf 10BOHGDUYa7lBjUSo26YFCHTpgEmtXabdVFaopCTh1vU \
316 |       -o my-document.pdf
317 | 
318 | Use `-o -` to write the file contents to standard output:
319 | 
320 |     google-drive-to-sqlite export pdf 10BOHGDUYa7lBjUSo26YFCHTpgEmtXabdVFaopCTh1vU \
321 |       -o - > my-document.pdf
322 | 
323 | Full `--help`:
324 | 
325 | <!-- [[[cog
326 | result = runner.invoke(cli.cli, ["export", "--help"])
327 | help = result.output.replace("Usage: cli", "Usage: google-drive-to-sqlite")
328 | cog.out(
329 |     "```\n{}\n```\n".format(help)
330 | )
331 | ]]] -->
332 | ```
333 | Usage: google-drive-to-sqlite export [OPTIONS] FORMAT FILE_IDS...
334 | 
335 |   Export one or more files to the specified format.
336 | 
337 |   Usage:
338 | 
339 |       google-drive-to-sqlite export pdf FILE_ID_1 FILE_ID_2
340 | 
341 |   The file content will be saved to a file with the name:
342 | 
343 |       FILE_ID-export.ext
344 | 
345 |   Where the extension is based on the format you specified.
346 | 
347 |   Available export formats can be seen here:
348 |   https://developers.google.com/drive/api/v3/ref-export-formats
349 | 
350 |   Or you can use one of the following shortcuts:
351 | 
352 |   - Google Docs: html, txt, rtf, pdf, doc, zip, epub
353 |   - Google Sheets: xls, pdf, csv, tsv, zip
354 |   - Presentations: ppt, pdf, txt
355 |   - Drawings: jpeg, png, svg
356 | 
357 |   "zip" returns a zip file of HTML.
358 | 
359 |   If you are exporting a single file you can specify a filename with -o:
360 | 
361 |       google-drive-to-sqlite export zip MY_FILE_ID -o myfile.zip
362 | 
363 | Options:
364 |   -a, --auth FILE    Path to auth.json token file
365 |   -o, --output FILE  File to write to, or - for standard output
366 |   -s, --silent       Hide progress bar and filename
367 |   --help             Show this message and exit.
368 | 
369 | ```
370 | <!-- [[[end]]] -->
371 | 
372 | ## google-drive-to-sqlite get URL
373 | 
374 | The `get` command makes authenticated requests to the specified URL, using credentials derived from the `auth.json` file.
375 | 
376 | For example:
377 | 
378 |     $ google-drive-to-sqlite get 'https://www.googleapis.com/drive/v3/about?fields=*'
379 |     {
380 |         "kind": "drive#about",
381 |         "user": {
382 |             "kind": "drive#user",
383 |             "displayName": "Simon Willison",
384 |     # ...
385 | 
386 | If the resource you are fetching supports pagination you can use `--paginate key` to paginate through all of the rows in a specified key. For example, the following API has a `nextPageToken` key and a `files` list, suggesting it supports pagination:
387 | 
388 |     $ google-drive-to-sqlite get https://www.googleapis.com/drive/v3/files
389 |     {
390 |         "kind": "drive#fileList",
391 |         "nextPageToken": "~!!~AI9...wogHHYlc=",
392 |         "incompleteSearch": false,
393 |         "files": [
394 |             {
395 |                 "kind": "drive#file",
396 |                 "id": "1YEsITp_X8PtDUJWHGM0osT-TXAU1nr0e7RSWRM2Jpyg",
397 |                 "name": "Title of a spreadsheet",
398 |                 "mimeType": "application/vnd.google-apps.spreadsheet"
399 |             },
400 | 
401 | To paginate through everything in the `files` list you would use `--paginate files` like this:
402 | 
403 |     $ google-drive-to-sqlite get https://www.googleapis.com/drive/v3/files --paginate files
404 |     [
405 |       {
406 |         "kind": "drive#file",
407 |         "id": "1YEsITp_X8PtDUJWHGM0osT-TXAU1nr0e7RSWRM2Jpyg",
408 |         "name": "Title of a spreadsheet",
409 |         "mimeType": "application/vnd.google-apps.spreadsheet"
410 |       },
411 |       # ...
412 | 
413 | Add `--nl` to stream paginated data as newline-delimited JSON:
414 | 
415 |     $ google-drive-to-sqlite get https://www.googleapis.com/drive/v3/files --paginate files --nl
416 |     {"kind": "drive#file", "id": "1YEsITp_X8PtDUJWHGM0osT-TXAU1nr0e7RSWRM2Jpyg", "name": "Title of a spreadsheet", "mimeType": "application/vnd.google-apps.spreadsheet"}
417 |     {"kind": "drive#file", "id": "1E6Zg2X2bjjtPzVfX8YqdXZDCoB3AVA7i", "name": "Subfolder", "mimeType": "application/vnd.google-apps.folder"}
418 | 
419 | Add `--stop-after 5` to stop after 5 records - useful for testing.
420 | 
421 | Full `--help`:
422 | 
423 | <!-- [[[cog
424 | result = runner.invoke(cli.cli, ["get", "--help"])
425 | help = result.output.replace("Usage: cli", "Usage: google-drive-to-sqlite")
426 | cog.out(
427 |     "```\n{}\n```\n".format(help)
428 | )
429 | ]]] -->
430 | ```
431 | Usage: google-drive-to-sqlite get [OPTIONS] URL
432 | 
433 |   Make an authenticated HTTP GET to the specified URL
434 | 
435 | Options:
436 |   -a, --auth FILE       Path to auth.json token file
437 |   --paginate TEXT       Paginate through all results in this key
438 |   --nl                  Output paginated data as newline-delimited JSON
439 |   --stop-after INTEGER  Stop paginating after X results
440 |   -v, --verbose         Send verbose output to stderr
441 |   --help                Show this message and exit.
442 | 
443 | ```
444 | <!-- [[[end]]] -->
445 | 
446 | 
447 | ## Database schema
448 | 
449 | The database created by this tool has the following schema:
450 | 
451 | <!-- [[[cog
452 | import tempfile, pathlib, sqlite_utils
453 | tmpdir = pathlib.Path(tempfile.mkdtemp())
454 | db_path = str(tmpdir / "docs.db")
455 | result = runner.invoke(cli.cli, [
456 |     "files", db_path, "--import-json", "tests/folder-and-children.json"
457 | ])
458 | cog.out("```sql\n")
459 | schema = sqlite_utils.Database(db_path).schema
460 | # Tidy up some formatting
461 | schema = schema.replace(", [", ",\n   [")
462 | schema = schema.replace("\n,\n", ",\n")
463 | schema = schema.replace("TEXT);", "TEXT\n);")
464 | cog.out(schema)
465 | cog.out("\n```")
466 | ]]] -->
467 | ```sql
468 | CREATE TABLE [drive_users] (
469 |    [permissionId] TEXT PRIMARY KEY,
470 |    [kind] TEXT,
471 |    [displayName] TEXT,
472 |    [photoLink] TEXT,
473 |    [me] INTEGER,
474 |    [emailAddress] TEXT
475 | );
476 | CREATE TABLE [drive_folders] (
477 |    [id] TEXT PRIMARY KEY,
478 |    [_parent] TEXT,
479 |    [_owner] TEXT,
480 |    [lastModifyingUser] TEXT,
481 |    [kind] TEXT,
482 |    [name] TEXT,
483 |    [mimeType] TEXT,
484 |    [starred] INTEGER,
485 |    [trashed] INTEGER,
486 |    [explicitlyTrashed] INTEGER,
487 |    [parents] TEXT,
488 |    [spaces] TEXT,
489 |    [version] TEXT,
490 |    [webViewLink] TEXT,
491 |    [iconLink] TEXT,
492 |    [hasThumbnail] INTEGER,
493 |    [thumbnailVersion] TEXT,
494 |    [viewedByMe] INTEGER,
495 |    [createdTime] TEXT,
496 |    [modifiedTime] TEXT,
497 |    [modifiedByMe] INTEGER,
498 |    [shared] INTEGER,
499 |    [ownedByMe] INTEGER,
500 |    [viewersCanCopyContent] INTEGER,
501 |    [copyRequiresWriterPermission] INTEGER,
502 |    [writersCanShare] INTEGER,
503 |    [folderColorRgb] TEXT,
504 |    [quotaBytesUsed] TEXT,
505 |    [isAppAuthorized] INTEGER,
506 |    [linkShareMetadata] TEXT,
507 |    FOREIGN KEY([_parent]) REFERENCES [drive_folders]([id]),
508 |    FOREIGN KEY([_owner]) REFERENCES [drive_users]([permissionId]),
509 |    FOREIGN KEY([lastModifyingUser]) REFERENCES [drive_users]([permissionId])
510 | );
511 | CREATE TABLE [drive_files] (
512 |    [id] TEXT PRIMARY KEY,
513 |    [_parent] TEXT,
514 |    [_owner] TEXT,
515 |    [lastModifyingUser] TEXT,
516 |    [kind] TEXT,
517 |    [name] TEXT,
518 |    [mimeType] TEXT,
519 |    [starred] INTEGER,
520 |    [trashed] INTEGER,
521 |    [explicitlyTrashed] INTEGER,
522 |    [parents] TEXT,
523 |    [spaces] TEXT,
524 |    [version] TEXT,
525 |    [webViewLink] TEXT,
526 |    [iconLink] TEXT,
527 |    [hasThumbnail] INTEGER,
528 |    [thumbnailVersion] TEXT,
529 |    [viewedByMe] INTEGER,
530 |    [createdTime] TEXT,
531 |    [modifiedTime] TEXT,
532 |    [modifiedByMe] INTEGER,
533 |    [shared] INTEGER,
534 |    [ownedByMe] INTEGER,
535 |    [viewersCanCopyContent] INTEGER,
536 |    [copyRequiresWriterPermission] INTEGER,
537 |    [writersCanShare] INTEGER,
538 |    [quotaBytesUsed] TEXT,
539 |    [isAppAuthorized] INTEGER,
540 |    [linkShareMetadata] TEXT,
541 |    FOREIGN KEY([_parent]) REFERENCES [drive_folders]([id]),
542 |    FOREIGN KEY([_owner]) REFERENCES [drive_users]([permissionId]),
543 |    FOREIGN KEY([lastModifyingUser]) REFERENCES [drive_users]([permissionId])
544 | );
545 | ```
546 | <!-- [[[end]]] -->
547 | 
548 | ## Thumbnails
549 | 
550 | You can construct a thumbnail image for a known file ID using the following URL:
551 | 
552 |     https://drive.google.com/thumbnail?sz=w800-h800&id=FILE_ID
553 | 
554 | Users who are signed into Google Drive and have permission to view a file will be redirected to a thumbnail version of that file. You can tweak the `w800` and `h800` parameters to request different thumbnail sizes.
555 | 
556 | ## Privacy policy
557 | 
558 | This tool requests access to your Google Drive account in order to retrieve metadata about your files there. It also offers a feature that can download the content of those files.
559 | 
560 | The credentials used to access your account are stored in the `auth.json` file on your computer. The metadata and content retrieved from Google Drive is also stored only on your own personal computer.
561 | 
562 | At no point do the developers of this tool gain access to any of your data.
563 | 
564 | ## Development
565 | 
566 | To contribute to this tool, first checkout the code. Then create a new virtual environment:
567 | 
568 |     cd google-drive-to-sqlite
569 |     python -m venv venv
570 |     source venv/bin/activate
571 | 
572 | Or if you are using `pipenv`:
573 | 
574 |     pipenv shell
575 | 
576 | Now install the dependencies and test dependencies:
577 | 
578 |     pip install -e '.[test]'
579 | 
580 | To run the tests:
581 | 
582 |     pytest
583 | 


--------------------------------------------------------------------------------
/google_drive_to_sqlite/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simonw/google-drive-to-sqlite/60c96e5c909285afb32978e16ec8d6a4aeb851db/google_drive_to_sqlite/__init__.py


--------------------------------------------------------------------------------
/google_drive_to_sqlite/cli.py:
--------------------------------------------------------------------------------
  1 | from os import access
  2 | import click
  3 | import httpx
  4 | import itertools
  5 | import json
  6 | import pathlib
  7 | import sqlite_utils
  8 | import sys
  9 | import textwrap
 10 | import urllib.parse
 11 | from .utils import (
 12 |     APIClient,
 13 |     get_file,
 14 |     files_in_folder_recursive,
 15 |     paginate_files,
 16 |     save_files_and_folders,
 17 | )
 18 | 
 19 | # https://github.com/simonw/google-drive-to-sqlite/issues/2
 20 | GOOGLE_CLIENT_ID = (
 21 |     "148933860554-98i3hter1bsn24sa6fcq1tcrhcrujrnl.apps.googleusercontent.com"
 22 | )
 23 | # It's OK to publish this secret in application source code
 24 | GOOGLE_CLIENT_SECRET = "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"
 25 | DEFAULT_SCOPE = "https://www.googleapis.com/auth/drive.readonly"
 26 | 
 27 | FORMAT_SHORTCUTS = {
 28 |     "html": "text/html",
 29 |     "txt": "text/plain",
 30 |     "rtf": "application/rtf",
 31 |     "pdf": "application/pdf",
 32 |     "doc": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
 33 |     "zip": "application/zip",
 34 |     "epub": "application/epub+zip",
 35 |     "xls": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
 36 |     "csv": "text/csv",
 37 |     "tsv": "text/tab-separated-values",
 38 |     "ppt": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
 39 |     "jpeg": "image/jpeg",
 40 |     "png": "image/png",
 41 |     "svg": "image/svg+xml",
 42 | }
 43 | # .ext defaults to the bit after the / - e.g. "application/pdf" becomes "pdf",
 44 | # unless there is an explicit override here:
 45 | FILE_EXTENSIONS = {
 46 |     "image/svg+xml": "svg",
 47 |     "application/epub+zip": "epub",
 48 |     "text/plain": "txt",
 49 |     "text/tab-separated-values": "tsv",
 50 |     "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "doc",
 51 |     "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xls",
 52 |     "application/vnd.openxmlformats-officedocument.presentationml.presentation": "ppt",
 53 | }
 54 | 
 55 | 
 56 | def start_auth_url(google_client_id, scope):
 57 |     return "https://accounts.google.com/o/oauth2/v2/auth?" + urllib.parse.urlencode(
 58 |         {
 59 |             "access_type": "offline",
 60 |             "client_id": google_client_id,
 61 |             "redirect_uri": "urn:ietf:wg:oauth:2.0:oob",
 62 |             "response_type": "code",
 63 |             "scope": scope,
 64 |         }
 65 |     )
 66 | 
 67 | 
 68 | DEFAULT_FIELDS = [
 69 |     "kind",
 70 |     "id",
 71 |     "name",
 72 |     "mimeType",
 73 |     "starred",
 74 |     "trashed",
 75 |     "explicitlyTrashed",
 76 |     "parents",
 77 |     "spaces",
 78 |     "version",
 79 |     "webViewLink",
 80 |     "iconLink",
 81 |     "hasThumbnail",
 82 |     "thumbnailVersion",
 83 |     "viewedByMe",
 84 |     "createdTime",
 85 |     "modifiedTime",
 86 |     "modifiedByMe",
 87 |     "owners",
 88 |     "lastModifyingUser",
 89 |     "shared",
 90 |     "ownedByMe",
 91 |     "viewersCanCopyContent",
 92 |     "copyRequiresWriterPermission",
 93 |     "writersCanShare",
 94 |     "folderColorRgb",
 95 |     "quotaBytesUsed",
 96 |     "isAppAuthorized",
 97 |     "linkShareMetadata",
 98 | ]
 99 | 
100 | 
101 | @click.group()
102 | @click.version_option()
103 | def cli():
104 |     "Create a SQLite database of metadata from a Google Drive folder"
105 | 
106 | 
107 | @cli.command()
108 | @click.option(
109 |     "-a",
110 |     "--auth",
111 |     type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
112 |     default="auth.json",
113 |     help="Path to save token, defaults to auth.json",
114 | )
115 | @click.option("--google-client-id", help="Custom Google client ID")
116 | @click.option("--google-client-secret", help="Custom Google client secret")
117 | @click.option("--scope", help="Custom token scope")
118 | def auth(auth, google_client_id, google_client_secret, scope):
119 |     "Authenticate user and save credentials"
120 |     if google_client_id is None:
121 |         google_client_id = GOOGLE_CLIENT_ID
122 |     if google_client_secret is None:
123 |         google_client_secret = GOOGLE_CLIENT_SECRET
124 |     if scope is None:
125 |         scope = DEFAULT_SCOPE
126 |     click.echo("Visit the following URL to authenticate with Google Drive")
127 |     click.echo("")
128 |     click.echo(start_auth_url(google_client_id, scope))
129 |     click.echo("")
130 |     click.echo("Then return here and paste in the resulting code:")
131 |     copied_code = click.prompt("Paste code here", hide_input=True)
132 |     response = httpx.post(
133 |         "https://www.googleapis.com/oauth2/v4/token",
134 |         data={
135 |             "code": copied_code,
136 |             "client_id": google_client_id,
137 |             "client_secret": google_client_secret,
138 |             "redirect_uri": "urn:ietf:wg:oauth:2.0:oob",
139 |             "grant_type": "authorization_code",
140 |         },
141 |     )
142 |     tokens = response.json()
143 |     if "error" in tokens:
144 |         message = "{error}: {error_description}".format(**tokens)
145 |         raise click.ClickException(message)
146 |     if "refresh_token" not in tokens:
147 |         raise click.ClickException("No refresh_token in response")
148 |     # Read existing file and add refresh_token to it
149 |     try:
150 |         auth_data = json.load(open(auth))
151 |     except (ValueError, FileNotFoundError):
152 |         auth_data = {}
153 |     info = {"refresh_token": tokens["refresh_token"]}
154 |     if google_client_id != GOOGLE_CLIENT_ID:
155 |         info["google_client_id"] = google_client_id
156 |     if google_client_secret != GOOGLE_CLIENT_SECRET:
157 |         info["google_client_secret"] = google_client_secret
158 |     if scope != DEFAULT_SCOPE:
159 |         info["scope"] = scope
160 |     auth_data["google-drive-to-sqlite"] = info
161 |     with open(auth, "w") as fp:
162 |         fp.write(json.dumps(auth_data, indent=4))
163 |     # chmod 600 to avoid other users on the shared machine reading it
164 |     pathlib.Path(auth).chmod(0o600)
165 | 
166 | 
167 | @cli.command()
168 | @click.option(
169 |     "-a",
170 |     "--auth",
171 |     type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
172 |     default="auth.json",
173 |     help="Path to load token, defaults to auth.json",
174 | )
175 | def revoke(auth):
176 |     "Revoke the token stored in auth.json"
177 |     tokens = load_tokens(auth)
178 |     response = httpx.get(
179 |         "https://accounts.google.com/o/oauth2/revoke",
180 |         params={
181 |             "token": tokens["refresh_token"],
182 |         },
183 |     )
184 |     if "error" in response.json():
185 |         raise click.ClickException(response.json()["error"])
186 | 
187 | 
188 | @cli.command()
189 | @click.argument("url")
190 | @click.option(
191 |     "-a",
192 |     "--auth",
193 |     type=click.Path(file_okay=True, dir_okay=False, allow_dash=True),
194 |     default="auth.json",
195 |     help="Path to auth.json token file",
196 | )
197 | @click.option("--paginate", help="Paginate through all results in this key")
198 | @click.option(
199 |     "--nl", is_flag=True, help="Output paginated data as newline-delimited JSON"
200 | )
201 | @click.option("--stop-after", type=int, help="Stop paginating after X results")
202 | @click.option(
203 |     "-v",
204 |     "--verbose",
205 |     is_flag=True,
206 |     help="Send verbose output to stderr",
207 | )
208 | def get(url, auth, paginate, nl, stop_after, verbose):
209 |     "Make an authenticated HTTP GET to the specified URL"
210 |     if not url.startswith("https://www.googleapis.com/"):
211 |         if url.startswith("/"):
212 |             url = "https://www.googleapis.com" + url
213 |         else:
214 |             raise click.ClickException(
215 |                 "url must start with / or https://www.googleapis.com/"
216 |             )
217 | 
218 |     kwargs = load_tokens(auth)
219 |     if verbose:
220 |         kwargs["logger"] = lambda s: click.echo(s, err=True)
221 |     client = APIClient(**kwargs)
222 | 
223 |     if not paginate:
224 |         response = client.get(url)
225 |         if verbose:
226 |             click.echo(
227 |                 "{}, headers: {}".format(response.status_code, repr(response.headers))
228 |             )
229 |         if response.status_code != 200:
230 |             raise click.ClickException(
231 |                 "{}: {}\n\n{}".format(response.url, response.status_code, response.text)
232 |             )
233 |         if "json" in response.headers.get("content-type", ""):
234 |             click.echo(json.dumps(response.json(), indent=4))
235 |         else:
236 |             click.echo(response.text)
237 | 
238 |     else:
239 | 
240 |         def paginate_all():
241 |             i = 0
242 |             next_page_token = None
243 |             while True:
244 |                 params = {}
245 |                 if next_page_token is not None:
246 |                     params["pageToken"] = next_page_token
247 |                 response = client.get(
248 |                     url,
249 |                     params=params,
250 |                 )
251 |                 data = response.json()
252 |                 if response.status_code != 200:
253 |                     raise click.ClickException(json.dumps(data, indent=4))
254 |                 # Paginate using the specified key and nextPageToken
255 |                 if paginate not in data:
256 |                     raise click.ClickException(
257 |                         "paginate key {} not found in {}".format(
258 |                             repr(paginate), repr(list(data.keys()))
259 |                         )
260 |                     )
261 |                 for item in data[paginate]:
262 |                     yield item
263 |                     i += 1
264 |                     if stop_after is not None and i >= stop_after:
265 |                         return
266 | 
267 |                 next_page_token = data.get("nextPageToken")
268 |                 if not next_page_token:
269 |                     break
270 | 
271 |         if nl:
272 |             for item in paginate_all():
273 |                 click.echo(json.dumps(item))
274 |         else:
275 |             for line in stream_indented_json(paginate_all()):
276 |                 click.echo(line)
277 | 
278 | 
279 | @cli.command()
280 | @click.argument(
281 |     "database",
282 |     type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
283 |     required=False,
284 | )
285 | @click.option(
286 |     "-a",
287 |     "--auth",
288 |     type=click.Path(file_okay=True, dir_okay=False, allow_dash=True),
289 |     default="auth.json",
290 |     help="Path to auth.json token file",
291 | )
292 | @click.option("--folder", help="Files in this folder ID and its sub-folders")
293 | @click.option("-q", help="Files matching this query")
294 | @click.option("--full-text", help="Search for files with text match")
295 | @click.option("--starred", is_flag=True, help="Files you have starred")
296 | @click.option("--trashed", is_flag=True, help="Files in the trash")
297 | @click.option(
298 |     "--shared-with-me", is_flag=True, help="Files that have been shared with you"
299 | )
300 | @click.option(
301 |     "--apps",
302 |     is_flag=True,
303 |     help="Google Apps docs, spreadsheets, presentations and drawings",
304 | )
305 | @click.option("--docs", is_flag=True, help="Google Apps docs")
306 | @click.option("--sheets", is_flag=True, help="Google Apps spreadsheets")
307 | @click.option("--presentations", is_flag=True, help="Google Apps presentations")
308 | @click.option("--drawings", is_flag=True, help="Google Apps drawings")
309 | @click.option(
310 |     "json_", "--json", is_flag=True, help="Output JSON rather than write to DB"
311 | )
312 | @click.option(
313 |     "--nl", is_flag=True, help="Output newline-delimited JSON rather than write to DB"
314 | )
315 | @click.option("--stop-after", type=int, help="Stop paginating after X results")
316 | @click.option(
317 |     "--import-json",
318 |     type=click.Path(file_okay=True, dir_okay=False, allow_dash=True),
319 |     help="Import from this JSON file instead of the API",
320 | )
321 | @click.option(
322 |     "--import-nl",
323 |     type=click.Path(file_okay=True, dir_okay=False, allow_dash=True),
324 |     help="Import from this newline-delimited JSON file",
325 | )
326 | @click.option(
327 |     "-v",
328 |     "--verbose",
329 |     is_flag=True,
330 |     help="Send verbose output to stderr",
331 | )
332 | def files(
333 |     database,
334 |     auth,
335 |     folder,
336 |     q,
337 |     full_text,
338 |     starred,
339 |     trashed,
340 |     shared_with_me,
341 |     apps,
342 |     docs,
343 |     sheets,
344 |     presentations,
345 |     drawings,
346 |     json_,
347 |     nl,
348 |     stop_after,
349 |     import_json,
350 |     import_nl,
351 |     verbose,
352 | ):
353 |     """
354 |     Retrieve metadata for files in Google Drive, and write to a SQLite database
355 |     or output as JSON.
356 | 
357 |         google-drive-to-sqlite files files.db
358 | 
359 |     Use --json to output JSON, --nl for newline-delimited JSON:
360 | 
361 |         google-drive-to-sqlite files files.db --json
362 | 
363 |     Use a folder ID to recursively fetch every file in that folder and its
364 |     sub-folders:
365 | 
366 |         google-drive-to-sqlite files files.db --folder 1E6Zg2X2bjjtPzVfX8YqdXZDCoB3AVA7i
367 | 
368 |     Fetch files you have starred:
369 | 
370 |         google-drive-to-sqlite files starred.db --starred
371 |     """
372 |     if not database and not json_ and not nl:
373 |         raise click.ClickException("Must either provide database or use --json or --nl")
374 |     q_bits = []
375 |     if q:
376 |         q_bits.append(q)
377 |     if full_text:
378 |         q_bits.append("fullText contains '{}'".format(full_text.replace("'", "")))
379 |     if starred:
380 |         q_bits.append("starred = true")
381 |     if trashed:
382 |         q_bits.append("trashed = true")
383 |     if shared_with_me:
384 |         q_bits.append("sharedWithMe = true")
385 | 
386 |     mime_types = []
387 |     if apps:
388 |         docs = True
389 |         sheets = True
390 |         presentations = True
391 |         drawings = True
392 |     if docs:
393 |         mime_types.append("application/vnd.google-apps.document")
394 |     if sheets:
395 |         mime_types.append("application/vnd.google-apps.spreadsheet")
396 |     if presentations:
397 |         mime_types.append("application/vnd.google-apps.presentation")
398 |     if drawings:
399 |         mime_types.append("application/vnd.google-apps.drawing")
400 |     if mime_types:
401 |         q_bits.append(
402 |             "({})".format(
403 |                 " or ".join(
404 |                     "mimeType = '{}'".format(mime_type) for mime_type in mime_types
405 |                 )
406 |             )
407 |         )
408 | 
409 |     q = " and ".join(q_bits)
410 | 
411 |     if q and verbose:
412 |         click.echo("?q= query: {}".format(q), err=True)
413 | 
414 |     client = None
415 |     if not (import_json or import_nl):
416 |         kwargs = load_tokens(auth)
417 |         if verbose:
418 |             kwargs["logger"] = lambda s: click.echo(s, err=True)
419 |         client = APIClient(**kwargs)
420 | 
421 |     if import_json or import_nl:
422 |         if "-" in (import_json, import_nl):
423 |             fp = sys.stdin
424 |         else:
425 |             fp = open(import_json or import_nl)
426 |         if import_json:
427 |             all = json.load(fp)
428 |         else:
429 | 
430 |             def _nl():
431 |                 for line in fp:
432 |                     line = line.strip()
433 |                     if line:
434 |                         yield json.loads(line)
435 | 
436 |             all = _nl()
437 |     else:
438 |         if folder:
439 |             all_in_folder = files_in_folder_recursive(
440 |                 client, folder, fields=DEFAULT_FIELDS
441 |             )
442 |             # Fetch details of that folder first
443 |             folder_details = get_file(client, folder, fields=DEFAULT_FIELDS)
444 | 
445 |             def folder_details_then_all():
446 |                 yield folder_details
447 |                 yield from all_in_folder
448 | 
449 |             all = folder_details_then_all()
450 |         else:
451 |             all = paginate_files(client, q=q, fields=DEFAULT_FIELDS)
452 | 
453 |     if stop_after:
454 |         prev_all = all
455 | 
456 |         def stop_after_all():
457 |             i = 0
458 |             for file in prev_all:
459 |                 yield file
460 |                 i += 1
461 |                 if i >= stop_after:
462 |                     break
463 | 
464 |         all = stop_after_all()
465 | 
466 |     if nl:
467 |         for file in all:
468 |             click.echo(json.dumps(file))
469 |         return
470 |     if json_:
471 |         for line in stream_indented_json(all):
472 |             click.echo(line)
473 |         return
474 | 
475 |     db = sqlite_utils.Database(database)
476 |     save_files_and_folders(db, all)
477 | 
478 | 
479 | def load_tokens(auth):
480 |     try:
481 |         token_info = json.load(open(auth))["google-drive-to-sqlite"]
482 |     except (KeyError, FileNotFoundError):
483 |         raise click.ClickException("Could not find google-drive-to-sqlite in auth.json")
484 |     return {
485 |         "refresh_token": token_info["refresh_token"],
486 |         "client_id": token_info.get("google_client_id", GOOGLE_CLIENT_ID),
487 |         "client_secret": token_info.get("google_client_secret", GOOGLE_CLIENT_SECRET),
488 |     }
489 | 
490 | 
491 | @cli.command()
492 | @click.argument("file_ids", nargs=-1, required=True)
493 | @click.option(
494 |     "-a",
495 |     "--auth",
496 |     type=click.Path(file_okay=True, dir_okay=False, allow_dash=True),
497 |     default="auth.json",
498 |     help="Path to auth.json token file",
499 | )
500 | @click.option(
501 |     "-o",
502 |     "--output",
503 |     type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, writable=True),
504 |     help="File to write to, or - for standard output",
505 | )
506 | @click.option(
507 |     "-s",
508 |     "--silent",
509 |     is_flag=True,
510 |     help="Hide progress bar and filename",
511 | )
512 | def download(file_ids, auth, output, silent):
513 |     """
514 |     Download one or more files to disk, based on their file IDs.
515 | 
516 |     The file content will be saved to a file with the name:
517 | 
518 |         FILE_ID.ext
519 | 
520 |     Where the extension is automatically picked based on the type of file.
521 | 
522 |     If you are downloading a single file you can specify a filename with -o:
523 | 
524 |         google-drive-to-sqlite download MY_FILE_ID -o myfile.txt
525 |     """
526 |     if output:
527 |         if len(file_ids) != 1:
528 |             raise click.ClickException("--output option only works with a single file")
529 |     tokens = load_tokens(auth)
530 |     client = APIClient(**tokens)
531 |     for file_id in file_ids:
532 |         with client.stream(
533 |             "GET",
534 |             "https://www.googleapis.com/drive/v3/files/{}?alt=media".format(file_id),
535 |         ) as response:
536 |             streaming_download(response, file_id, output, silent)
537 | 
538 | 
539 | @cli.command()
540 | @click.argument("format")
541 | @click.argument("file_ids", nargs=-1, required=True)
542 | @click.option(
543 |     "-a",
544 |     "--auth",
545 |     type=click.Path(file_okay=True, dir_okay=False, allow_dash=True),
546 |     default="auth.json",
547 |     help="Path to auth.json token file",
548 | )
549 | @click.option(
550 |     "-o",
551 |     "--output",
552 |     type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, writable=True),
553 |     help="File to write to, or - for standard output",
554 | )
555 | @click.option(
556 |     "-s",
557 |     "--silent",
558 |     is_flag=True,
559 |     help="Hide progress bar and filename",
560 | )
561 | def export(format, file_ids, auth, output, silent):
562 |     """
563 |     Export one or more files to the specified format.
564 | 
565 |     Usage:
566 | 
567 |         google-drive-to-sqlite export pdf FILE_ID_1 FILE_ID_2
568 | 
569 |     The file content will be saved to a file with the name:
570 | 
571 |         FILE_ID-export.ext
572 | 
573 |     Where the extension is based on the format you specified.
574 | 
575 |     Available export formats can be seen here:
576 |     https://developers.google.com/drive/api/v3/ref-export-formats
577 | 
578 |     Or you can use one of the following shortcuts:
579 | 
580 |     \b
581 |     - Google Docs: html, txt, rtf, pdf, doc, zip, epub
582 |     - Google Sheets: xls, pdf, csv, tsv, zip
583 |     - Presentations: ppt, pdf, txt
584 |     - Drawings: jpeg, png, svg
585 | 
586 |     "zip" returns a zip file of HTML.
587 | 
588 |     If you are exporting a single file you can specify a filename with -o:
589 | 
590 |         google-drive-to-sqlite export zip MY_FILE_ID -o myfile.zip
591 |     """
592 |     format = FORMAT_SHORTCUTS.get(format, format)
593 |     if output:
594 |         if len(file_ids) != 1:
595 |             raise click.ClickException("--output option only works with a single file")
596 |     tokens = load_tokens(auth)
597 |     client = APIClient(**tokens)
598 |     for file_id in file_ids:
599 |         with client.stream(
600 |             "GET",
601 |             "https://www.googleapis.com/drive/v3/files/{}/export".format(file_id),
602 |             params={"mimeType": format},
603 |         ) as response:
604 |             filestem = "{}-export".format(file_id)
605 |             streaming_download(response, filestem, output, silent)
606 | 
607 | 
608 | def streaming_download(response, filestem, output, silent):
609 |     if response.status_code != 200:
610 |         raise click.ClickException(response.read().decode("utf-8"))
611 |     fp = None
612 |     if output:
613 |         filename = pathlib.Path(output).name
614 |         if output == "-":
615 |             fp = sys.stdout.buffer
616 |             silent = True
617 |         else:
618 |             fp = open(output, "wb")
619 |     else:
620 |         ext = response.headers.get("content-type", "/bin")
621 |         if ext in FILE_EXTENSIONS:
622 |             ext = FILE_EXTENSIONS[ext]
623 |         else:
624 |             ext = ext.split("/")[-1]
625 |         filename = "{}.{}".format(filestem, ext)
626 |         fp = open(filename, "wb")
627 |     length = int(response.headers.get("content-length", "0"))
628 |     if not silent:
629 |         click.echo(
630 |             "Writing {}to {}".format(
631 |                 "{:,} bytes ".format(length) if length else "", filename
632 |             ),
633 |             err=True,
634 |         )
635 |     if length and not silent:
636 |         with click.progressbar(
637 |             length=int(response.headers["content-length"]), label="Downloading"
638 |         ) as bar:
639 |             for data in response.iter_bytes():
640 |                 fp.write(data)
641 |                 bar.update(len(data))
642 |     else:
643 |         for data in response.iter_bytes():
644 |             fp.write(data)
645 | 
646 | 
647 | def stream_indented_json(iterator, indent=2):
648 |     # We have to iterate two-at-a-time so we can know if we
649 |     # should output a trailing comma or if we have reached
650 |     # the last item.
651 |     current_iter, next_iter = itertools.tee(iterator, 2)
652 |     next(next_iter, None)
653 |     first = True
654 |     for item, next_item in itertools.zip_longest(current_iter, next_iter):
655 |         is_last = next_item is None
656 |         data = item
657 |         line = "{first}{serialized}{separator}{last}".format(
658 |             first="[\n" if first else "",
659 |             serialized=textwrap.indent(
660 |                 json.dumps(data, indent=indent, default=repr), " " * indent
661 |             ),
662 |             separator="," if not is_last else "",
663 |             last="\n]" if is_last else "",
664 |         )
665 |         yield line
666 |         first = False
667 |     if first:
668 |         # We didn't output anything, so yield the empty list
669 |         yield "[]"
670 | 


--------------------------------------------------------------------------------
/google_drive_to_sqlite/utils.py:
--------------------------------------------------------------------------------
  1 | from contextlib import contextmanager
  2 | import click
  3 | import httpx
  4 | import itertools
  5 | from time import sleep
  6 | 
  7 | 
  8 | class FilesError(Exception):
  9 |     pass
 10 | 
 11 | 
 12 | def get_file(client, file_id, fields=None):
 13 |     file_url = "https://www.googleapis.com/drive/v3/files/{}".format(file_id)
 14 |     params = {}
 15 |     if fields is not None:
 16 |         params["fields"] = ",".join(fields)
 17 |     return client.get(
 18 |         file_url,
 19 |         params=params,
 20 |     ).json()
 21 | 
 22 | 
 23 | def paginate_files(client, *, corpora=None, q=None, fields=None):
 24 |     pageToken = None
 25 |     files_url = "https://www.googleapis.com/drive/v3/files"
 26 |     params = {}
 27 |     if corpora is not None:
 28 |         params["corpora"] = corpora
 29 |     if fields is not None:
 30 |         params["fields"] = "nextPageToken, files({})".format(",".join(fields))
 31 |     if q:
 32 |         params["q"] = q
 33 |     while True:
 34 |         if pageToken is not None:
 35 |             params["pageToken"] = pageToken
 36 |         else:
 37 |             params.pop("pageToken", None)
 38 |         data = client.get(
 39 |             files_url,
 40 |             params=params,
 41 |         ).json()
 42 |         if "error" in data:
 43 |             raise FilesError(data)
 44 |         yield from data["files"]
 45 |         pageToken = data.get("nextPageToken", None)
 46 |         if pageToken is None:
 47 |             break
 48 | 
 49 | 
 50 | def files_in_folder_recursive(client, folder_id, fields):
 51 |     for file in paginate_files(
 52 |         client, q='"{}" in parents'.format(folder_id), fields=fields
 53 |     ):
 54 |         yield file
 55 |         if file["mimeType"] == "application/vnd.google-apps.folder":
 56 |             yield from files_in_folder_recursive(client, file["id"], fields)
 57 | 
 58 | 
 59 | class APIClient:
 60 |     class Error(click.ClickException):
 61 |         pass
 62 | 
 63 |     timeout = 30.0
 64 | 
 65 |     def __init__(self, refresh_token, client_id, client_secret, logger=None):
 66 |         self.refresh_token = refresh_token
 67 |         self.access_token = None
 68 |         self.client_id = client_id
 69 |         self.client_secret = client_secret
 70 |         self.log = logger or (lambda s: None)
 71 | 
 72 |     def get_access_token(self, force_refresh=False):
 73 |         if self.access_token and not force_refresh:
 74 |             return self.access_token
 75 |         url = "https://www.googleapis.com/oauth2/v4/token"
 76 |         self.log("POST {}".format(url))
 77 |         data = httpx.post(
 78 |             url,
 79 |             data={
 80 |                 "grant_type": "refresh_token",
 81 |                 "refresh_token": self.refresh_token,
 82 |                 "client_id": self.client_id,
 83 |                 "client_secret": self.client_secret,
 84 |             },
 85 |             timeout=self.timeout,
 86 |         ).json()
 87 |         if "error" in data:
 88 |             raise self.Error(str(data))
 89 |         self.access_token = data["access_token"]
 90 |         return self.access_token
 91 | 
 92 |     def get(
 93 |         self,
 94 |         url,
 95 |         params=None,
 96 |         headers=None,
 97 |         allow_token_refresh=True,
 98 |         transport_retries=2,
 99 |     ):
100 |         headers = headers or {}
101 |         headers["Authorization"] = "Bearer {}".format(self.get_access_token())
102 |         self.log("GET: {} {}".format(url, params or "").strip())
103 |         try:
104 |             response = httpx.get(
105 |                 url, params=params, headers=headers, timeout=self.timeout
106 |             )
107 |         except httpx.TransportError as ex:
108 |             if transport_retries:
109 |                 sleep(2)
110 |                 self.log("  Got {}, retrying".format(ex.__class__.__name__))
111 |                 return self.get(
112 |                     url,
113 |                     params,
114 |                     headers,
115 |                     allow_token_refresh=allow_token_refresh,
116 |                     transport_retries=transport_retries - 1,
117 |                 )
118 |             else:
119 |                 raise
120 | 
121 |         if response.status_code == 401 and allow_token_refresh:
122 |             # Try again after refreshing the token
123 |             self.get_access_token(force_refresh=True)
124 |             return self.get(url, params, headers, allow_token_refresh=False)
125 |         return response
126 | 
127 |     def post(self, url, data=None, headers=None, allow_token_refresh=True):
128 |         headers = headers or {}
129 |         headers["Authorization"] = "Bearer {}".format(self.get_access_token())
130 |         self.log("POST: {}".format(url))
131 |         response = httpx.post(url, data=data, headers=headers, timeout=self.timeout)
132 |         if response.status_code == 403 and allow_token_refresh:
133 |             self.get_access_token(force_refresh=True)
134 |             return self.post(url, data, headers, allow_token_refresh=False)
135 |         return response
136 | 
137 |     @contextmanager
138 |     def stream(self, method, url, params=None):
139 |         with httpx.stream(
140 |             method,
141 |             url,
142 |             params=params,
143 |             headers={"Authorization": "Bearer {}".format(self.get_access_token())},
144 |         ) as stream:
145 |             yield stream
146 | 
147 | 
148 | def save_files_and_folders(db, all):
149 |     # Ensure tables with foreign keys exist
150 |     with db.conn:
151 |         if not db["drive_users"].exists():
152 |             db["drive_users"].create({"permissionId": str}, pk="permissionId")
153 |         for table in ("drive_folders", "drive_files"):
154 |             if not db[table].exists():
155 |                 db[table].create(
156 |                     {
157 |                         "id": str,
158 |                         "_parent": str,
159 |                         "_owner": str,
160 |                         "lastModifyingUser": str,
161 |                     },
162 |                     pk="id",
163 |                 )
164 |                 # Gotta add foreign key after table is created, to avoid
165 |                 # AlterError: No such column: drive_folders.id
166 |                 db.add_foreign_keys(
167 |                     (
168 |                         (table, "_parent", "drive_folders", "id"),
169 |                         (table, "_owner", "drive_users", "permissionId"),
170 |                         (table, "lastModifyingUser", "drive_users", "permissionId"),
171 |                     )
172 |                 )
173 | 
174 |     # Commit every 100 records
175 |     users_seen = set()
176 |     for chunk in chunks(all, 100):
177 |         # Add `_parent` columns
178 |         files = []
179 |         folders = []
180 |         for file in chunk:
181 |             file["_parent"] = file["parents"][0] if file.get("parents") else None
182 |             if file.get("mimeType") == "application/vnd.google-apps.folder":
183 |                 folders.append(file)
184 |             else:
185 |                 files.append(file)
186 |         # Convert "lastModifyingUser" JSON into a foreign key reference to drive_users
187 |         drive_folders_owners_to_insert = []
188 |         drive_files_owners_to_insert = []
189 |         for to_insert_list, sequence in (
190 |             (drive_folders_owners_to_insert, folders),
191 |             (drive_files_owners_to_insert, files),
192 |         ):
193 |             for file in sequence:
194 |                 last_modifying_user = file.get("lastModifyingUser")
195 |                 # This can be {'displayName': '', 'kind': 'drive#user', 'me': False}
196 |                 if last_modifying_user and last_modifying_user.get("permissionId"):
197 |                     user_id = last_modifying_user["permissionId"]
198 |                     if user_id not in users_seen:
199 |                         db["drive_users"].insert(
200 |                             last_modifying_user,
201 |                             replace=True,
202 |                             pk="permissionId",
203 |                             alter=True,
204 |                         )
205 |                         users_seen.add(user_id)
206 |                     file["lastModifyingUser"] = user_id
207 |                 else:
208 |                     file["lastModifyingUser"] = None
209 |                 owners = file.pop("owners", None)
210 |                 file["_owner"] = None
211 |                 if owners and owners[0].get("permissionId"):
212 |                     owner_user_id = owners[0]["permissionId"]
213 |                     if owner_user_id not in users_seen:
214 |                         db["drive_users"].insert(
215 |                             owners[0],
216 |                             replace=True,
217 |                             pk="permissionId",
218 |                             alter=True,
219 |                         )
220 |                         users_seen.add(owner_user_id)
221 |                     file["_owner"] = owner_user_id
222 | 
223 |         with db.conn:
224 |             db["drive_folders"].insert_all(
225 |                 folders,
226 |                 pk="id",
227 |                 replace=True,
228 |                 alter=True,
229 |             )
230 |             db["drive_files"].insert_all(
231 |                 files,
232 |                 pk="id",
233 |                 replace=True,
234 |                 alter=True,
235 |             )
236 |             if drive_folders_owners_to_insert:
237 |                 db["drive_folders_owners"].insert_all(
238 |                     drive_folders_owners_to_insert, replace=True
239 |                 )
240 |             if drive_files_owners_to_insert:
241 |                 db["drive_files_owners"].insert_all(
242 |                     drive_files_owners_to_insert, replace=True
243 |                 )
244 | 
245 | 
246 | def chunks(sequence, size):
247 |     iterator = iter(sequence)
248 |     for item in iterator:
249 |         yield itertools.chain([item], itertools.islice(iterator, size - 1))
250 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | import os
 3 | 
 4 | VERSION = "0.4"
 5 | 
 6 | 
 7 | def get_long_description():
 8 |     with open(
 9 |         os.path.join(os.path.dirname(os.path.abspath(__file__)), "README.md"),
10 |         encoding="utf8",
11 |     ) as fp:
12 |         return fp.read()
13 | 
14 | 
15 | setup(
16 |     name="google-drive-to-sqlite",
17 |     description="Create a SQLite database containing metadata from Google Drive",
18 |     long_description=get_long_description(),
19 |     long_description_content_type="text/markdown",
20 |     author="Simon Willison",
21 |     url="https://github.com/simonw/google-drive-to-sqlite",
22 |     project_urls={
23 |         "Issues": "https://github.com/simonw/google-drive-to-sqlite/issues",
24 |         "CI": "https://github.com/simonw/google-drive-to-sqlite/actions",
25 |         "Changelog": "https://github.com/simonw/google-drive-to-sqlite/releases",
26 |     },
27 |     license="Apache License, Version 2.0",
28 |     version=VERSION,
29 |     packages=["google_drive_to_sqlite"],
30 |     entry_points="""
31 |         [console_scripts]
32 |         google-drive-to-sqlite=google_drive_to_sqlite.cli:cli
33 |     """,
34 |     install_requires=["click", "httpx", "sqlite-utils"],
35 |     extras_require={"test": ["pytest", "pytest-httpx", "pytest-mock", "cogapp"]},
36 |     python_requires=">=3.6",
37 | )
38 | 


--------------------------------------------------------------------------------
/tests/folder-and-children.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "kind": "drive#file",
  4 |     "id": "1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j",
  5 |     "name": "test-folder",
  6 |     "mimeType": "application/vnd.google-apps.folder",
  7 |     "starred": false,
  8 |     "trashed": false,
  9 |     "explicitlyTrashed": false,
 10 |     "parents": [
 11 |       "0AK1CICIR8ECDUk9PVA"
 12 |     ],
 13 |     "spaces": [
 14 |       "drive"
 15 |     ],
 16 |     "version": "4",
 17 |     "webViewLink": "https://drive.google.com/drive/folders/1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j",
 18 |     "iconLink": "https://drive-thirdparty.googleusercontent.com/16/type/application/vnd.google-apps.folder",
 19 |     "hasThumbnail": false,
 20 |     "thumbnailVersion": "0",
 21 |     "viewedByMe": true,
 22 |     "createdTime": "2022-02-19T04:22:24.589Z",
 23 |     "modifiedTime": "2022-02-19T04:22:24.589Z",
 24 |     "modifiedByMe": true,
 25 |     "owners": [
 26 |       {
 27 |         "kind": "drive#user",
 28 |         "displayName": "Simon Willison",
 29 |         "photoLink": "https://lh3.googleusercontent.com/a-/AOh14Gg9Loyxove5ocfBp0mg0u2afcTpM1no8QJnwbWnxw=s64",
 30 |         "me": true,
 31 |         "permissionId": "16974643384157631322",
 32 |         "emailAddress": "...@gmail.com"
 33 |       }
 34 |     ],
 35 |     "lastModifyingUser": {
 36 |       "kind": "drive#user",
 37 |       "displayName": "Simon Willison",
 38 |       "photoLink": "https://lh3.googleusercontent.com/a-/AOh14Gg9Loyxove5ocfBp0mg0u2afcTpM1no8QJnwbWnxw=s64",
 39 |       "me": true,
 40 |       "permissionId": "16974643384157631322",
 41 |       "emailAddress": "...@gmail.com"
 42 |     },
 43 |     "shared": false,
 44 |     "ownedByMe": true,
 45 |     "viewersCanCopyContent": true,
 46 |     "copyRequiresWriterPermission": false,
 47 |     "writersCanShare": true,
 48 |     "folderColorRgb": "#8f8f8f",
 49 |     "quotaBytesUsed": "0",
 50 |     "isAppAuthorized": false,
 51 |     "linkShareMetadata": {
 52 |       "securityUpdateEligible": false,
 53 |       "securityUpdateEnabled": true
 54 |     }
 55 |   },
 56 |   {
 57 |     "kind": "drive#file",
 58 |     "id": "1FYLDMMXi1-gGjxg8dLmvbiixDuR8-FZ3",
 59 |     "name": "two",
 60 |     "mimeType": "application/vnd.google-apps.folder",
 61 |     "starred": false,
 62 |     "trashed": false,
 63 |     "explicitlyTrashed": false,
 64 |     "parents": [
 65 |       "1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j"
 66 |     ],
 67 |     "spaces": [
 68 |       "drive"
 69 |     ],
 70 |     "version": "1",
 71 |     "webViewLink": "https://drive.google.com/drive/folders/1FYLDMMXi1-gGjxg8dLmvbiixDuR8-FZ3",
 72 |     "iconLink": "https://drive-thirdparty.googleusercontent.com/16/type/application/vnd.google-apps.folder",
 73 |     "hasThumbnail": false,
 74 |     "thumbnailVersion": "0",
 75 |     "viewedByMe": true,
 76 |     "createdTime": "2022-02-19T04:22:38.714Z",
 77 |     "modifiedTime": "2022-02-19T04:22:38.714Z",
 78 |     "modifiedByMe": true,
 79 |     "owners": [
 80 |       {
 81 |         "kind": "drive#user",
 82 |         "displayName": "Simon Willison",
 83 |         "photoLink": "https://lh3.googleusercontent.com/a-/AOh14Gg9Loyxove5ocfBp0mg0u2afcTpM1no8QJnwbWnxw=s64",
 84 |         "me": true,
 85 |         "permissionId": "16974643384157631322",
 86 |         "emailAddress": "...@gmail.com"
 87 |       }
 88 |     ],
 89 |     "lastModifyingUser": {
 90 |       "kind": "drive#user",
 91 |       "displayName": "Simon Willison",
 92 |       "photoLink": "https://lh3.googleusercontent.com/a-/AOh14Gg9Loyxove5ocfBp0mg0u2afcTpM1no8QJnwbWnxw=s64",
 93 |       "me": true,
 94 |       "permissionId": "16974643384157631322",
 95 |       "emailAddress": "...@gmail.com"
 96 |     },
 97 |     "shared": false,
 98 |     "ownedByMe": true,
 99 |     "viewersCanCopyContent": true,
100 |     "copyRequiresWriterPermission": false,
101 |     "writersCanShare": true,
102 |     "folderColorRgb": "#8f8f8f",
103 |     "quotaBytesUsed": "0",
104 |     "isAppAuthorized": false,
105 |     "linkShareMetadata": {
106 |       "securityUpdateEligible": false,
107 |       "securityUpdateEnabled": true
108 |     }
109 |   },
110 |   {
111 |     "kind": "drive#file",
112 |     "id": "113Wb_KLL1dtgx3vpeRfSTOYIUDf3QnnN",
113 |     "name": "one",
114 |     "mimeType": "application/vnd.google-apps.folder",
115 |     "starred": false,
116 |     "trashed": false,
117 |     "explicitlyTrashed": false,
118 |     "parents": [
119 |       "1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j"
120 |     ],
121 |     "spaces": [
122 |       "drive"
123 |     ],
124 |     "version": "2",
125 |     "webViewLink": "https://drive.google.com/drive/folders/113Wb_KLL1dtgx3vpeRfSTOYIUDf3QnnN",
126 |     "iconLink": "https://drive-thirdparty.googleusercontent.com/16/type/application/vnd.google-apps.folder",
127 |     "hasThumbnail": false,
128 |     "thumbnailVersion": "0",
129 |     "viewedByMe": true,
130 |     "createdTime": "2022-02-19T04:22:33.581Z",
131 |     "modifiedTime": "2022-02-19T04:22:33.581Z",
132 |     "modifiedByMe": true,
133 |     "owners": [
134 |       {
135 |         "kind": "drive#user",
136 |         "displayName": "Simon Willison",
137 |         "photoLink": "https://lh3.googleusercontent.com/a-/AOh14Gg9Loyxove5ocfBp0mg0u2afcTpM1no8QJnwbWnxw=s64",
138 |         "me": true,
139 |         "permissionId": "16974643384157631322",
140 |         "emailAddress": "...@gmail.com"
141 |       }
142 |     ],
143 |     "lastModifyingUser": {
144 |       "kind": "drive#user",
145 |       "displayName": "Simon Willison",
146 |       "photoLink": "https://lh3.googleusercontent.com/a-/AOh14Gg9Loyxove5ocfBp0mg0u2afcTpM1no8QJnwbWnxw=s64",
147 |       "me": true,
148 |       "permissionId": "16974643384157631322",
149 |       "emailAddress": "...@gmail.com"
150 |     },
151 |     "shared": false,
152 |     "ownedByMe": true,
153 |     "viewersCanCopyContent": true,
154 |     "copyRequiresWriterPermission": false,
155 |     "writersCanShare": true,
156 |     "folderColorRgb": "#8f8f8f",
157 |     "quotaBytesUsed": "0",
158 |     "isAppAuthorized": false,
159 |     "linkShareMetadata": {
160 |       "securityUpdateEligible": false,
161 |       "securityUpdateEnabled": true
162 |     }
163 |   },
164 |   {
165 |     "kind": "drive#file",
166 |     "id": "1Xdqfeoi8B8YJJR0y-_oQlHYpjHHzD5a-",
167 |     "name": "sample.csv",
168 |     "mimeType": "text/csv",
169 |     "starred": false,
170 |     "trashed": false,
171 |     "explicitlyTrashed": false,
172 |     "parents": [
173 |       "113Wb_KLL1dtgx3vpeRfSTOYIUDf3QnnN"
174 |     ],
175 |     "spaces": [
176 |       "drive"
177 |     ],
178 |     "version": "2",
179 |     "webViewLink": "https://drive.google.com/file/d/1Xdqfeoi8B8YJJR0y-_oQlHYpjHHzD5a-/view?usp=drivesdk",
180 |     "iconLink": "https://drive-thirdparty.googleusercontent.com/16/type/text/csv",
181 |     "hasThumbnail": false,
182 |     "thumbnailVersion": "0",
183 |     "viewedByMe": true,
184 |     "createdTime": "2022-02-19T04:25:16.517Z",
185 |     "modifiedTime": "2020-11-11T18:10:31.000Z",
186 |     "modifiedByMe": true,
187 |     "owners": [
188 |       {
189 |         "kind": "drive#user",
190 |         "displayName": "Simon Willison",
191 |         "photoLink": "https://lh3.googleusercontent.com/a-/AOh14Gg9Loyxove5ocfBp0mg0u2afcTpM1no8QJnwbWnxw=s64",
192 |         "me": true,
193 |         "permissionId": "16974643384157631322",
194 |         "emailAddress": "...@gmail.com"
195 |       }
196 |     ],
197 |     "lastModifyingUser": {"displayName": "", "kind": "drive#user", "me": false},
198 |     "shared": false,
199 |     "ownedByMe": true,
200 |     "viewersCanCopyContent": true,
201 |     "copyRequiresWriterPermission": false,
202 |     "writersCanShare": true,
203 |     "quotaBytesUsed": "1070506",
204 |     "isAppAuthorized": false,
205 |     "linkShareMetadata": {
206 |       "securityUpdateEligible": false,
207 |       "securityUpdateEnabled": true
208 |     }
209 |   }
210 | ]
211 | 


--------------------------------------------------------------------------------
/tests/test_google_drive_to_sqlite.py:
--------------------------------------------------------------------------------
  1 | from click.testing import CliRunner
  2 | from google_drive_to_sqlite.cli import cli, DEFAULT_FIELDS
  3 | import httpx
  4 | import json
  5 | import pathlib
  6 | import pytest
  7 | import re
  8 | import stat
  9 | import sqlite_utils
 10 | 
 11 | TOKEN_REQUEST_CONTENT = (
 12 |     b"grant_type=refresh_token&"
 13 |     b"refresh_token=rtoken&"
 14 |     b"client_id=148933860554-98i3hter1bsn24sa6fcq1tcrhcrujrnl.apps.googleusercontent.com&"
 15 |     b"client_secret=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"
 16 | )
 17 | 
 18 | AUTH_JSON = {"google-drive-to-sqlite": {"refresh_token": "rtoken"}}
 19 | FOLDER_AND_CHILDREN_JSON_PATH = (
 20 |     pathlib.Path(__file__).parent / "folder-and-children.json"
 21 | )
 22 | 
 23 | 
 24 | @pytest.mark.parametrize(
 25 |     "response,expected_error",
 26 |     (
 27 |         ({"refresh_token": "rtoken"}, None),
 28 |         (
 29 |             {"error": "bad_error", "error_description": "description"},
 30 |             "Error: bad_error: description",
 31 |         ),
 32 |         (
 33 |             {"unexpected": "error"},
 34 |             "Error: No refresh_token in response",
 35 |         ),
 36 |     ),
 37 | )
 38 | def test_auth(httpx_mock, response, expected_error):
 39 |     httpx_mock.add_response(json=response)
 40 |     runner = CliRunner()
 41 |     with runner.isolated_filesystem():
 42 |         result = runner.invoke(cli, ["auth"], input="my-token")
 43 |         if expected_error:
 44 |             assert result.exit_code == 1
 45 |             assert result.output.strip().endswith(expected_error)
 46 |         else:
 47 |             assert result.exit_code == 0
 48 |             auth = json.load(open("auth.json"))
 49 |             assert auth == {"google-drive-to-sqlite": {"refresh_token": "rtoken"}}
 50 |             # Should be chmod 600
 51 |             st_mode = pathlib.Path("auth.json").stat().st_mode
 52 |             assert stat.filemode(st_mode) == "-rw-------"
 53 | 
 54 | 
 55 | @pytest.mark.parametrize(
 56 |     "auth_file_exists,revoke_response,expected_error",
 57 |     (
 58 |         (False, None, "Error: Could not find google-drive-to-sqlite in auth.json"),
 59 |         (True, {}, None),
 60 |         (True, {"error": "invalid_token"}, "Error: invalid_token"),
 61 |     ),
 62 | )
 63 | def test_revoke(httpx_mock, auth_file_exists, revoke_response, expected_error):
 64 |     runner = CliRunner()
 65 |     with runner.isolated_filesystem():
 66 |         if auth_file_exists:
 67 |             open("auth.json", "w").write(json.dumps(AUTH_JSON))
 68 |             httpx_mock.add_response(json=revoke_response)
 69 |         result = runner.invoke(cli, ["revoke"])
 70 |         if auth_file_exists:
 71 |             request = httpx_mock.get_request()
 72 |             assert (
 73 |                 request.url
 74 |                 == "https://accounts.google.com/o/oauth2/revoke?token=rtoken"
 75 |             )
 76 |         if expected_error:
 77 |             assert result.exit_code == 1
 78 |             assert result.output.strip().endswith(expected_error)
 79 |         else:
 80 |             assert result.exit_code == 0
 81 | 
 82 | 
 83 | @pytest.mark.parametrize(
 84 |     "opts,expected_content",
 85 |     (
 86 |         ([], {"refresh_token": "rtoken"}),
 87 |         (
 88 |             ["--google-client-id", "x", "--google-client-secret", "y"],
 89 |             {
 90 |                 "refresh_token": "rtoken",
 91 |                 "google_client_id": "x",
 92 |                 "google_client_secret": "y",
 93 |             },
 94 |         ),
 95 |         (
 96 |             ["--scope", "SCOPE"],
 97 |             {
 98 |                 "refresh_token": "rtoken",
 99 |                 "scope": "SCOPE",
100 |             },
101 |         ),
102 |     ),
103 | )
104 | def test_auth_custom_client(httpx_mock, opts, expected_content):
105 |     httpx_mock.add_response(json={"refresh_token": "rtoken"})
106 |     runner = CliRunner()
107 |     with runner.isolated_filesystem():
108 |         result = runner.invoke(cli, ["auth"] + opts, input="my-token")
109 |         assert result.exit_code == 0
110 |         auth = json.load(open("auth.json"))
111 |         assert auth == {"google-drive-to-sqlite": expected_content}
112 | 
113 | 
114 | def test_get_single(httpx_mock):
115 |     about_data = {
116 |         "kind": "drive#about",
117 |         "user": {"kind": "drive#user", "displayName": "User"},
118 |     }
119 |     httpx_mock.add_response(
120 |         url="https://www.googleapis.com/oauth2/v4/token",
121 |         method="POST",
122 |         json={"access_token": "atoken"},
123 |     )
124 |     httpx_mock.add_response(
125 |         url="https://www.googleapis.com/drive/v3/about?fields=*",
126 |         method="GET",
127 |         json=about_data,
128 |     )
129 |     runner = CliRunner()
130 |     with runner.isolated_filesystem():
131 |         open("auth.json", "w").write(json.dumps(AUTH_JSON))
132 |         result = runner.invoke(
133 |             cli, ["get", "https://www.googleapis.com/drive/v3/about?fields=*"]
134 |         )
135 |         token_request, about_request = httpx_mock.get_requests()
136 |         assert token_request.content == TOKEN_REQUEST_CONTENT
137 |         assert about_request.url == "https://www.googleapis.com/drive/v3/about?fields=*"
138 |         assert about_request.headers["authorization"] == "Bearer atoken"
139 |         assert result.exit_code == 0
140 |         assert result.output.strip() == json.dumps(about_data, indent=4)
141 | 
142 | 
143 | def test_get_plain_text(httpx_mock):
144 |     url = "https://www.googleapis.com/drive/v3/files/123/export?mimeType=text/plain"
145 |     httpx_mock.add_response(
146 |         url="https://www.googleapis.com/oauth2/v4/token",
147 |         method="POST",
148 |         json={"access_token": "atoken"},
149 |     )
150 |     httpx_mock.add_response(
151 |         url=url,
152 |         method="GET",
153 |         content="This is plain text",
154 |     )
155 |     runner = CliRunner()
156 |     with runner.isolated_filesystem():
157 |         open("auth.json", "w").write(json.dumps(AUTH_JSON))
158 |         result = runner.invoke(cli, ["get", url])
159 |         token_request, export_request = httpx_mock.get_requests()
160 |         assert token_request.content == TOKEN_REQUEST_CONTENT
161 |         assert export_request.url == url
162 |         assert export_request.headers["authorization"] == "Bearer atoken"
163 |         assert result.exit_code == 0
164 |         assert result.output.strip() == "This is plain text"
165 | 
166 | 
167 | @pytest.mark.parametrize(
168 |     "opts,expected_output",
169 |     (
170 |         (
171 |             [],
172 |             '[\n  {\n    "id": 1\n  },\n  {\n    "id": 2\n  },\n  '
173 |             '{\n    "id": 3\n  },\n  {\n    "id": 4\n  }\n]\n',
174 |         ),
175 |         (
176 |             ["--nl"],
177 |             '{"id": 1}\n{"id": 2}\n{"id": 3}\n{"id": 4}\n',
178 |         ),
179 |     ),
180 | )
181 | def test_get_paginated(httpx_mock, opts, expected_output):
182 |     httpx_mock.add_response(
183 |         url="https://www.googleapis.com/oauth2/v4/token",
184 |         method="POST",
185 |         json={"access_token": "atoken"},
186 |     )
187 |     httpx_mock.add_response(
188 |         url="https://www.googleapis.com/page",
189 |         json={"nextPageToken": "next", "files": [{"id": 1}, {"id": 2}]},
190 |     )
191 |     httpx_mock.add_response(
192 |         url="https://www.googleapis.com/page?pageToken=next",
193 |         json={"nextPageToken": None, "files": [{"id": 3}, {"id": 4}]},
194 |     )
195 |     runner = CliRunner()
196 |     with runner.isolated_filesystem():
197 |         open("auth.json", "w").write(json.dumps(AUTH_JSON))
198 |         result = runner.invoke(
199 |             cli,
200 |             ["get", "https://www.googleapis.com/page", "--paginate", "files"] + opts,
201 |         )
202 |         _, page1_request, page2_request = httpx_mock.get_requests()
203 |         for request in (page1_request, page2_request):
204 |             assert request.headers["authorization"] == "Bearer atoken"
205 |         assert page2_request.url == "https://www.googleapis.com/page?pageToken=next"
206 |         assert result.exit_code == 0
207 |         assert result.output == expected_output
208 | 
209 | 
210 | @pytest.mark.parametrize(
211 |     "opts,extra_qs",
212 |     (
213 |         ([], ""),
214 |         (["-q", "starred = true"], "&q=starred+%3D+true"),
215 |         (["--full-text", "search"], "&q=fullText+contains+%27search%27"),
216 |         (["--starred"], "&q=starred+%3D+true"),
217 |         (["--trashed"], "&q=trashed+%3D+true"),
218 |         (["--shared-with-me"], "&q=sharedWithMe+%3D+true"),
219 |         (
220 |             ["--starred", "--trashed", "--shared-with-me"],
221 |             "&q=starred+%3D+true+and+trashed+%3D+true+and+sharedWithMe+%3D+true",
222 |         ),
223 |         (
224 |             ["--apps"],
225 |             "&q=%28mimeType+%3D+%27application%2Fvnd.google-apps.document%27+or+mimeType+%3D+%27application%2Fvnd.google-apps.spreadsheet%27+or+mimeType+%3D+%27application%2Fvnd.google-apps.presentation%27+or+mimeType+%3D+%27application%2Fvnd.google-apps.drawing%27%29",
226 |         ),
227 |         (
228 |             ["--docs"],
229 |             "&q=%28mimeType+%3D+%27application%2Fvnd.google-apps.document%27%29",
230 |         ),
231 |         (
232 |             ["--sheets"],
233 |             "&q=%28mimeType+%3D+%27application%2Fvnd.google-apps.spreadsheet%27%29",
234 |         ),
235 |         (
236 |             ["--presentations"],
237 |             "&q=%28mimeType+%3D+%27application%2Fvnd.google-apps.presentation%27%29",
238 |         ),
239 |         (
240 |             ["--drawings"],
241 |             "&q=%28mimeType+%3D+%27application%2Fvnd.google-apps.drawing%27%29",
242 |         ),
243 |     ),
244 | )
245 | @pytest.mark.parametrize("use_db", (True, False))
246 | def test_files_basic(httpx_mock, opts, extra_qs, use_db):
247 |     httpx_mock.add_response(
248 |         method="POST",
249 |         json={"access_token": "atoken"},
250 |     )
251 |     httpx_mock.add_response(
252 |         json={"nextPageToken": "next", "files": [{"id": 1}, {"id": 2}]},
253 |     )
254 |     httpx_mock.add_response(
255 |         json={"nextPageToken": None, "files": [{"id": 3}, {"id": 4}]},
256 |     )
257 |     runner = CliRunner()
258 |     with runner.isolated_filesystem():
259 |         open("auth.json", "w").write(json.dumps(AUTH_JSON))
260 |         args = ["files"]
261 |         if use_db:
262 |             args.append("test.db")
263 |         else:
264 |             args.append("--json")
265 |         result = runner.invoke(cli, args + opts, catch_exceptions=False)
266 |         assert result.exit_code == 0
267 |         token_request, page1_request, page2_request = httpx_mock.get_requests()
268 |         assert token_request.content == TOKEN_REQUEST_CONTENT
269 |         assert page1_request.url == (
270 |             "https://www.googleapis.com/drive/v3/files?fields="
271 |             + "nextPageToken%2C+files%28{}%29".format("%2C".join(DEFAULT_FIELDS))
272 |             + extra_qs
273 |         )
274 |         assert page2_request.url == (
275 |             "https://www.googleapis.com/drive/v3/files?fields="
276 |             + "nextPageToken%2C+files%28{}%29".format("%2C".join(DEFAULT_FIELDS))
277 |             + extra_qs
278 |             + "&pageToken=next"
279 |         )
280 |         if use_db:
281 |             rows = list(sqlite_utils.Database("test.db")["drive_files"].rows)
282 |             assert rows == [
283 |                 {"id": "1", "_parent": None, "_owner": None, "lastModifyingUser": None},
284 |                 {"id": "2", "_parent": None, "_owner": None, "lastModifyingUser": None},
285 |                 {"id": "3", "_parent": None, "_owner": None, "lastModifyingUser": None},
286 |                 {"id": "4", "_parent": None, "_owner": None, "lastModifyingUser": None},
287 |             ]
288 |         else:
289 |             results = json.loads(result.output)
290 |             assert results == [{"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}]
291 | 
292 | 
293 | @pytest.mark.parametrize("verbosity_arg", ("-v", "--verbose"))
294 | def test_files_basic_stop_after_also_test_verbose(httpx_mock, verbosity_arg):
295 |     httpx_mock.add_response(
296 |         method="POST",
297 |         json={"access_token": "atoken"},
298 |     )
299 |     httpx_mock.add_response(
300 |         json={"nextPageToken": None, "files": [{"id": 3}, {"id": 4}]},
301 |     )
302 |     runner = CliRunner(mix_stderr=False)
303 |     with runner.isolated_filesystem():
304 |         open("auth.json", "w").write(json.dumps(AUTH_JSON))
305 |         args = ["files", "--json", "--stop-after", "1", verbosity_arg]
306 |         result = runner.invoke(cli, args)
307 |         assert (
308 |             result.stderr == "POST https://www.googleapis.com/oauth2/v4/token\n"
309 |             "GET: https://www.googleapis.com/drive/v3/files "
310 |             "{'fields': 'nextPageToken, files(kind,id,name,mimeType,starred,trashed,"
311 |             "explicitlyTrashed,parents,spaces,version,webViewLink,iconLink,hasThumbnail,"
312 |             "thumbnailVersion,viewedByMe,createdTime,modifiedTime,modifiedByMe,owners,"
313 |             "lastModifyingUser,shared,ownedByMe,viewersCanCopyContent,"
314 |             "copyRequiresWriterPermission,writersCanShare,folderColorRgb,quotaBytesUsed,"
315 |             "isAppAuthorized,linkShareMetadata)'}\n"
316 |         )
317 |         token_request, page1_request = httpx_mock.get_requests()
318 |         assert token_request.content == TOKEN_REQUEST_CONTENT
319 |         assert page1_request.url == (
320 |             "https://www.googleapis.com/drive/v3/files?fields="
321 |             + "nextPageToken%2C+files%28{}%29".format("%2C".join(DEFAULT_FIELDS))
322 |         )
323 |         results = json.loads(result.output)
324 |         assert results == [{"id": 3}]
325 | 
326 | 
327 | def test_files_folder(httpx_mock):
328 |     httpx_mock.add_response(
329 |         method="POST",
330 |         json={"access_token": "atoken"},
331 |     )
332 |     httpx_mock.add_response(
333 |         json={"id": "folder1", "mimeType": "application/vnd.google-apps.folder"},
334 |     )
335 |     httpx_mock.add_response(
336 |         json={
337 |             "nextPageToken": None,
338 |             "files": [
339 |                 {"id": "doc1", "mimeType": "doc"},
340 |                 {"id": "folder2", "mimeType": "application/vnd.google-apps.folder"},
341 |             ],
342 |         }
343 |     )
344 |     httpx_mock.add_response(
345 |         url=re.compile(".*folder2.*"),
346 |         json={
347 |             "nextPageToken": None,
348 |             "files": [
349 |                 {"id": "doc2", "mimeType": "doc"},
350 |             ],
351 |         },
352 |     )
353 |     runner = CliRunner()
354 |     with runner.isolated_filesystem():
355 |         open("auth.json", "w").write(json.dumps(AUTH_JSON))
356 |         args = ["files", "--folder", "folder1", "--json"]
357 |         result = runner.invoke(cli, args)
358 |         (
359 |             token_request,
360 |             folder_details_request,
361 |             folder1_request,
362 |             folder2_request,
363 |         ) = httpx_mock.get_requests()
364 |         assert token_request.content == TOKEN_REQUEST_CONTENT
365 |         assert folder_details_request.url == (
366 |             "https://www.googleapis.com/drive/v3/files/folder1?fields="
367 |             + "%2C".join(DEFAULT_FIELDS)
368 |         )
369 |         assert folder1_request.url == (
370 |             "https://www.googleapis.com/drive/v3/files?fields="
371 |             + "nextPageToken%2C+files%28{}%29".format("%2C".join(DEFAULT_FIELDS))
372 |             + "&q=%22folder1%22+in+parents"
373 |         )
374 |         assert folder2_request.url == (
375 |             "https://www.googleapis.com/drive/v3/files?fields="
376 |             + "nextPageToken%2C+files%28{}%29".format("%2C".join(DEFAULT_FIELDS))
377 |             + "&q=%22folder2%22+in+parents"
378 |         )
379 |         results = json.loads(result.output)
380 |         assert results == [
381 |             {"id": "folder1", "mimeType": "application/vnd.google-apps.folder"},
382 |             {"id": "doc1", "mimeType": "doc"},
383 |             {"id": "folder2", "mimeType": "application/vnd.google-apps.folder"},
384 |             {"id": "doc2", "mimeType": "doc"},
385 |         ]
386 | 
387 | 
388 | def test_download_two_files(httpx_mock):
389 |     httpx_mock.add_response(
390 |         method="POST",
391 |         json={"access_token": "atoken"},
392 |     )
393 |     httpx_mock.add_response(
394 |         content="this is text",
395 |         headers={"content-type": "text/plain"},
396 |     )
397 |     httpx_mock.add_response(
398 |         content="this is gif",
399 |         headers={"content-type": "image/gif"},
400 |     )
401 |     runner = CliRunner()
402 |     with runner.isolated_filesystem():
403 |         open("auth.json", "w").write(json.dumps(AUTH_JSON))
404 |         result = runner.invoke(cli, ["download", "file1", "file2"])
405 |         assert result.exit_code == 0
406 |         # Should be file1.plain and file2.gif
407 |         assert open("file1.txt").read() == "this is text"
408 |         assert open("file2.gif").read() == "this is gif"
409 |     _, file1_request, file2_request = httpx_mock.get_requests()
410 |     assert (
411 |         file1_request.url == "https://www.googleapis.com/drive/v3/files/file1?alt=media"
412 |     )
413 |     assert (
414 |         file2_request.url == "https://www.googleapis.com/drive/v3/files/file2?alt=media"
415 |     )
416 | 
417 | 
418 | def test_download_output_two_files_error():
419 |     runner = CliRunner()
420 |     result = runner.invoke(cli, ["download", "file1", "file2", "-o", "out.txt"])
421 |     assert result.exit_code == 1
422 |     assert result.output == "Error: --output option only works with a single file\n"
423 | 
424 | 
425 | def test_download_output_stdout(httpx_mock):
426 |     httpx_mock.add_response(
427 |         method="POST",
428 |         json={"access_token": "atoken"},
429 |     )
430 |     httpx_mock.add_response(
431 |         content="this is text",
432 |         headers={"content-type": "text/plain"},
433 |     )
434 |     runner = CliRunner()
435 |     with runner.isolated_filesystem():
436 |         open("auth.json", "w").write(json.dumps(AUTH_JSON))
437 |         result = runner.invoke(cli, ["download", "file1", "-o", "-"])
438 |         assert result.exit_code == 0
439 |         assert result.output == "this is text"
440 | 
441 | 
442 | def test_download_output_path(httpx_mock):
443 |     httpx_mock.add_response(
444 |         method="POST",
445 |         json={"access_token": "atoken"},
446 |     )
447 |     httpx_mock.add_response(
448 |         content="this is text",
449 |         headers={"content-type": "text/plain"},
450 |     )
451 |     runner = CliRunner()
452 |     with runner.isolated_filesystem():
453 |         open("auth.json", "w").write(json.dumps(AUTH_JSON))
454 |         result = runner.invoke(cli, ["download", "file1", "-o", "out.txt"])
455 |         assert result.exit_code == 0
456 |         assert open("out.txt").read() == "this is text"
457 | 
458 | 
459 | def test_export_two_files(httpx_mock):
460 |     httpx_mock.add_response(
461 |         method="POST",
462 |         json={"access_token": "atoken"},
463 |     )
464 |     httpx_mock.add_response(
465 |         content="this is pdf",
466 |         headers={"content-type": "application/pdf"},
467 |     )
468 |     httpx_mock.add_response(
469 |         content="this is also pdf",
470 |         headers={"content-type": "application/pdf"},
471 |     )
472 |     runner = CliRunner()
473 |     with runner.isolated_filesystem():
474 |         open("auth.json", "w").write(json.dumps(AUTH_JSON))
475 |         result = runner.invoke(cli, ["export", "pdf", "file1", "file2"])
476 |         assert result.exit_code == 0
477 |         assert open("file1-export.pdf").read() == "this is pdf"
478 |         assert open("file2-export.pdf").read() == "this is also pdf"
479 |     _, file1_request, file2_request = httpx_mock.get_requests()
480 |     assert (
481 |         file1_request.url
482 |         == "https://www.googleapis.com/drive/v3/files/file1/export?mimeType=application%2Fpdf"
483 |     )
484 |     assert (
485 |         file2_request.url
486 |         == "https://www.googleapis.com/drive/v3/files/file2/export?mimeType=application%2Fpdf"
487 |     )
488 | 
489 | 
490 | def test_refresh_access_token_once_if_it_expires(httpx_mock):
491 |     httpx_mock.add_response(
492 |         method="POST",
493 |         json={"access_token": "atoken"},
494 |     )
495 |     httpx_mock.add_response(
496 |         url="https://www.googleapis.com/drive/v3/about?fields=*",
497 |         json={
498 |             "error": {
499 |                 "errors": [
500 |                     {
501 |                         "domain": "global",
502 |                         "reason": "authError",
503 |                         "message": "Invalid Credentials",
504 |                         "locationType": "header",
505 |                         "location": "Authorization",
506 |                     }
507 |                 ],
508 |                 "code": 401,
509 |                 "message": "Invalid Credentials",
510 |             }
511 |         },
512 |         status_code=401,
513 |     )
514 |     httpx_mock.add_response(
515 |         method="POST",
516 |         json={"access_token": "atoken2"},
517 |     )
518 |     about_data = {
519 |         "kind": "drive#about",
520 |         "user": {"kind": "drive#user", "displayName": "User"},
521 |     }
522 |     httpx_mock.add_response(
523 |         url="https://www.googleapis.com/drive/v3/about?fields=*",
524 |         method="GET",
525 |         json=about_data,
526 |     )
527 |     runner = CliRunner()
528 |     with runner.isolated_filesystem():
529 |         open("auth.json", "w").write(json.dumps(AUTH_JSON))
530 |         result = runner.invoke(
531 |             cli, ["get", "https://www.googleapis.com/drive/v3/about?fields=*"]
532 |         )
533 |         assert result.exit_code == 0
534 | 
535 |     assert json.loads(result.output) == about_data
536 | 
537 |     token1, about_denied, token2, about_success = httpx_mock.get_requests()
538 |     for request in (token1, token2):
539 |         assert request.method == "POST"
540 |         assert request.url == "https://www.googleapis.com/oauth2/v4/token"
541 |     for request2 in (about_denied, about_success):
542 |         assert request2.method == "GET"
543 |         assert request2.url == "https://www.googleapis.com/drive/v3/about?fields=*"
544 |     assert about_denied.headers["Authorization"] == "Bearer atoken"
545 |     assert about_success.headers["Authorization"] == "Bearer atoken2"
546 | 
547 | 
548 | @pytest.mark.parametrize(
549 |     "opt,input",
550 |     (
551 |         ("--import-json", '[{"id": "one"}, {"id": "two"}]'),
552 |         ("--import-nl", '{"id": "one"}\n{"id": "two"}'),
553 |     ),
554 | )
555 | def test_files_input(httpx_mock, opt, input):
556 |     runner = CliRunner()
557 |     with runner.isolated_filesystem():
558 |         result = runner.invoke(cli, ["files", "test.db", opt, "-"], input=input)
559 |         assert len(httpx_mock.get_requests()) == 0
560 |         assert result.exit_code == 0
561 |         db = sqlite_utils.Database("test.db")
562 |         assert set(db.table_names()) == {
563 |             "drive_folders",
564 |             "drive_files",
565 |             "drive_users",
566 |         }
567 |         rows = list(db["drive_files"].rows)
568 |         assert rows == [
569 |             {"id": "one", "_parent": None, "_owner": None, "lastModifyingUser": None},
570 |             {"id": "two", "_parent": None, "_owner": None, "lastModifyingUser": None},
571 |         ]
572 | 
573 | 
574 | def test_files_input_real_example(httpx_mock):
575 |     runner = CliRunner()
576 |     with runner.isolated_filesystem():
577 |         result = runner.invoke(
578 |             cli, ["files", "test.db", "--import-json", FOLDER_AND_CHILDREN_JSON_PATH]
579 |         )
580 |         assert len(httpx_mock.get_requests()) == 0
581 |         assert result.exit_code == 0
582 |         db = sqlite_utils.Database("test.db")
583 |         assert set(db.table_names()) == {
584 |             "drive_folders",
585 |             "drive_files",
586 |             "drive_users",
587 |         }
588 |         schema = db.schema
589 |         assert (
590 |             schema
591 |             == "CREATE TABLE [drive_users] (\n   [permissionId] TEXT PRIMARY KEY\n,"
592 |             " [kind] TEXT, [displayName] TEXT, [photoLink] TEXT, [me] INTEGER,"
593 |             " [emailAddress] TEXT);\nCREATE TABLE [drive_folders] (\n   [id] TEXT"
594 |             " PRIMARY KEY,\n   [_parent] TEXT,\n   [_owner] TEXT,\n  "
595 |             " [lastModifyingUser] TEXT, [kind] TEXT, [name] TEXT, [mimeType] TEXT,"
596 |             " [starred] INTEGER, [trashed] INTEGER, [explicitlyTrashed] INTEGER,"
597 |             " [parents] TEXT, [spaces] TEXT, [version] TEXT, [webViewLink] TEXT,"
598 |             " [iconLink] TEXT, [hasThumbnail] INTEGER, [thumbnailVersion] TEXT,"
599 |             " [viewedByMe] INTEGER, [createdTime] TEXT, [modifiedTime] TEXT,"
600 |             " [modifiedByMe] INTEGER, [shared] INTEGER, [ownedByMe] INTEGER,"
601 |             " [viewersCanCopyContent] INTEGER, [copyRequiresWriterPermission]"
602 |             " INTEGER, [writersCanShare] INTEGER, [folderColorRgb] TEXT,"
603 |             " [quotaBytesUsed] TEXT, [isAppAuthorized] INTEGER, [linkShareMetadata]"
604 |             " TEXT,\n   FOREIGN KEY([_parent]) REFERENCES [drive_folders]([id]),\n  "
605 |             " FOREIGN KEY([_owner]) REFERENCES [drive_users]([permissionId]),\n  "
606 |             " FOREIGN KEY([lastModifyingUser]) REFERENCES"
607 |             " [drive_users]([permissionId])\n);\nCREATE TABLE [drive_files] (\n  "
608 |             " [id] TEXT PRIMARY KEY,\n   [_parent] TEXT,\n   [_owner] TEXT,\n  "
609 |             " [lastModifyingUser] TEXT, [kind] TEXT, [name] TEXT, [mimeType] TEXT,"
610 |             " [starred] INTEGER, [trashed] INTEGER, [explicitlyTrashed] INTEGER,"
611 |             " [parents] TEXT, [spaces] TEXT, [version] TEXT, [webViewLink] TEXT,"
612 |             " [iconLink] TEXT, [hasThumbnail] INTEGER, [thumbnailVersion] TEXT,"
613 |             " [viewedByMe] INTEGER, [createdTime] TEXT, [modifiedTime] TEXT,"
614 |             " [modifiedByMe] INTEGER, [shared] INTEGER, [ownedByMe] INTEGER,"
615 |             " [viewersCanCopyContent] INTEGER, [copyRequiresWriterPermission]"
616 |             " INTEGER, [writersCanShare] INTEGER, [quotaBytesUsed] TEXT,"
617 |             " [isAppAuthorized] INTEGER, [linkShareMetadata] TEXT,\n   FOREIGN"
618 |             " KEY([_parent]) REFERENCES [drive_folders]([id]),\n   FOREIGN"
619 |             " KEY([_owner]) REFERENCES [drive_users]([permissionId]),\n   FOREIGN"
620 |             " KEY([lastModifyingUser]) REFERENCES [drive_users]([permissionId])\n);"
621 |         )
622 |         files_rows = list(db["drive_files"].rows)
623 |         folders_rows = list(db["drive_folders"].rows)
624 |         users_rows = list(db["drive_users"].rows)
625 |         assert files_rows == [
626 |             {
627 |                 "id": "1Xdqfeoi8B8YJJR0y-_oQlHYpjHHzD5a-",
628 |                 "_parent": "113Wb_KLL1dtgx3vpeRfSTOYIUDf3QnnN",
629 |                 "_owner": "16974643384157631322",
630 |                 "lastModifyingUser": None,
631 |                 "kind": "drive#file",
632 |                 "name": "sample.csv",
633 |                 "mimeType": "text/csv",
634 |                 "starred": 0,
635 |                 "trashed": 0,
636 |                 "explicitlyTrashed": 0,
637 |                 "parents": '["113Wb_KLL1dtgx3vpeRfSTOYIUDf3QnnN"]',
638 |                 "spaces": '["drive"]',
639 |                 "version": "2",
640 |                 "webViewLink": "https://drive.google.com/file/d/1Xdqfeoi8B8YJJR0y-_oQlHYpjHHzD5a-/view?usp=drivesdk",
641 |                 "iconLink": "https://drive-thirdparty.googleusercontent.com/16/type/text/csv",
642 |                 "hasThumbnail": 0,
643 |                 "thumbnailVersion": "0",
644 |                 "viewedByMe": 1,
645 |                 "createdTime": "2022-02-19T04:25:16.517Z",
646 |                 "modifiedTime": "2020-11-11T18:10:31.000Z",
647 |                 "modifiedByMe": 1,
648 |                 "shared": 0,
649 |                 "ownedByMe": 1,
650 |                 "viewersCanCopyContent": 1,
651 |                 "copyRequiresWriterPermission": 0,
652 |                 "writersCanShare": 1,
653 |                 "quotaBytesUsed": "1070506",
654 |                 "isAppAuthorized": 0,
655 |                 "linkShareMetadata": '{"securityUpdateEligible": false, "securityUpdateEnabled": true}',
656 |             }
657 |         ]
658 |         assert folders_rows == [
659 |             {
660 |                 "id": "1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j",
661 |                 "_parent": "0AK1CICIR8ECDUk9PVA",
662 |                 "_owner": "16974643384157631322",
663 |                 "lastModifyingUser": "16974643384157631322",
664 |                 "kind": "drive#file",
665 |                 "name": "test-folder",
666 |                 "mimeType": "application/vnd.google-apps.folder",
667 |                 "starred": 0,
668 |                 "trashed": 0,
669 |                 "explicitlyTrashed": 0,
670 |                 "parents": '["0AK1CICIR8ECDUk9PVA"]',
671 |                 "spaces": '["drive"]',
672 |                 "version": "4",
673 |                 "webViewLink": "https://drive.google.com/drive/folders/1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j",
674 |                 "iconLink": "https://drive-thirdparty.googleusercontent.com/16/type/application/vnd.google-apps.folder",
675 |                 "hasThumbnail": 0,
676 |                 "thumbnailVersion": "0",
677 |                 "viewedByMe": 1,
678 |                 "createdTime": "2022-02-19T04:22:24.589Z",
679 |                 "modifiedTime": "2022-02-19T04:22:24.589Z",
680 |                 "modifiedByMe": 1,
681 |                 "shared": 0,
682 |                 "ownedByMe": 1,
683 |                 "viewersCanCopyContent": 1,
684 |                 "copyRequiresWriterPermission": 0,
685 |                 "writersCanShare": 1,
686 |                 "folderColorRgb": "#8f8f8f",
687 |                 "quotaBytesUsed": "0",
688 |                 "isAppAuthorized": 0,
689 |                 "linkShareMetadata": '{"securityUpdateEligible": false, "securityUpdateEnabled": true}',
690 |             },
691 |             {
692 |                 "id": "1FYLDMMXi1-gGjxg8dLmvbiixDuR8-FZ3",
693 |                 "_parent": "1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j",
694 |                 "_owner": "16974643384157631322",
695 |                 "lastModifyingUser": "16974643384157631322",
696 |                 "kind": "drive#file",
697 |                 "name": "two",
698 |                 "mimeType": "application/vnd.google-apps.folder",
699 |                 "starred": 0,
700 |                 "trashed": 0,
701 |                 "explicitlyTrashed": 0,
702 |                 "parents": '["1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j"]',
703 |                 "spaces": '["drive"]',
704 |                 "version": "1",
705 |                 "webViewLink": "https://drive.google.com/drive/folders/1FYLDMMXi1-gGjxg8dLmvbiixDuR8-FZ3",
706 |                 "iconLink": "https://drive-thirdparty.googleusercontent.com/16/type/application/vnd.google-apps.folder",
707 |                 "hasThumbnail": 0,
708 |                 "thumbnailVersion": "0",
709 |                 "viewedByMe": 1,
710 |                 "createdTime": "2022-02-19T04:22:38.714Z",
711 |                 "modifiedTime": "2022-02-19T04:22:38.714Z",
712 |                 "modifiedByMe": 1,
713 |                 "shared": 0,
714 |                 "ownedByMe": 1,
715 |                 "viewersCanCopyContent": 1,
716 |                 "copyRequiresWriterPermission": 0,
717 |                 "writersCanShare": 1,
718 |                 "folderColorRgb": "#8f8f8f",
719 |                 "quotaBytesUsed": "0",
720 |                 "isAppAuthorized": 0,
721 |                 "linkShareMetadata": '{"securityUpdateEligible": false, "securityUpdateEnabled": true}',
722 |             },
723 |             {
724 |                 "id": "113Wb_KLL1dtgx3vpeRfSTOYIUDf3QnnN",
725 |                 "_parent": "1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j",
726 |                 "_owner": "16974643384157631322",
727 |                 "lastModifyingUser": "16974643384157631322",
728 |                 "kind": "drive#file",
729 |                 "name": "one",
730 |                 "mimeType": "application/vnd.google-apps.folder",
731 |                 "starred": 0,
732 |                 "trashed": 0,
733 |                 "explicitlyTrashed": 0,
734 |                 "parents": '["1dbccBzomcvEUGdnoj8-9QG1yHxS0R-_j"]',
735 |                 "spaces": '["drive"]',
736 |                 "version": "2",
737 |                 "webViewLink": "https://drive.google.com/drive/folders/113Wb_KLL1dtgx3vpeRfSTOYIUDf3QnnN",
738 |                 "iconLink": "https://drive-thirdparty.googleusercontent.com/16/type/application/vnd.google-apps.folder",
739 |                 "hasThumbnail": 0,
740 |                 "thumbnailVersion": "0",
741 |                 "viewedByMe": 1,
742 |                 "createdTime": "2022-02-19T04:22:33.581Z",
743 |                 "modifiedTime": "2022-02-19T04:22:33.581Z",
744 |                 "modifiedByMe": 1,
745 |                 "shared": 0,
746 |                 "ownedByMe": 1,
747 |                 "viewersCanCopyContent": 1,
748 |                 "copyRequiresWriterPermission": 0,
749 |                 "writersCanShare": 1,
750 |                 "folderColorRgb": "#8f8f8f",
751 |                 "quotaBytesUsed": "0",
752 |                 "isAppAuthorized": 0,
753 |                 "linkShareMetadata": '{"securityUpdateEligible": false, "securityUpdateEnabled": true}',
754 |             },
755 |         ]
756 |         assert users_rows == [
757 |             {
758 |                 "permissionId": "16974643384157631322",
759 |                 "kind": "drive#user",
760 |                 "displayName": "Simon Willison",
761 |                 "photoLink": "https://lh3.googleusercontent.com/a-/AOh14Gg9Loyxove5ocfBp0mg0u2afcTpM1no8QJnwbWnxw=s64",
762 |                 "me": 1,
763 |                 "emailAddress": "...@gmail.com",
764 |             }
765 |         ]
766 | 
767 | 
768 | @pytest.mark.parametrize(
769 |     "exception", (httpx.TransportError, httpx.RemoteProtocolError, httpx.ConnectError)
770 | )
771 | @pytest.mark.parametrize(
772 |     "num_exceptions,should_succeed",
773 |     (
774 |         (3, False),
775 |         (2, True),
776 |         (1, True),
777 |         (0, True),
778 |     ),
779 | )
780 | def test_files_retry_on_transport_error(
781 |     httpx_mock, mocker, num_exceptions, should_succeed, exception
782 | ):
783 |     mocker.patch("google_drive_to_sqlite.utils.sleep")
784 |     about_data = {
785 |         "kind": "drive#about",
786 |         "user": {"kind": "drive#user", "displayName": "User"},
787 |     }
788 |     httpx_mock.add_response(
789 |         url="https://www.googleapis.com/oauth2/v4/token",
790 |         method="POST",
791 |         json={"access_token": "atoken"},
792 |     )
793 |     for _ in range(min(num_exceptions, 3)):
794 |         httpx_mock.add_exception(exception("Error"))
795 | 
796 |     if should_succeed:
797 |         httpx_mock.add_response(
798 |             url="https://www.googleapis.com/drive/v3/about?fields=*",
799 |             method="GET",
800 |             json=about_data,
801 |         )
802 | 
803 |     runner = CliRunner(mix_stderr=False)
804 |     with runner.isolated_filesystem():
805 |         open("auth.json", "w").write(json.dumps(AUTH_JSON))
806 |         result = runner.invoke(
807 |             cli, ["get", "https://www.googleapis.com/drive/v3/about?fields=*", "-v"]
808 |         )
809 |         if should_succeed:
810 |             assert result.exit_code == 0
811 |         else:
812 |             assert result.exit_code == 1
813 |         requests = httpx_mock.get_requests()
814 |         num_expected = num_exceptions + 1
815 |         if should_succeed:
816 |             num_expected += 1
817 |         assert len(requests) == num_expected
818 | 
819 |     # Test log output for num_exceptions = 2
820 |     if num_exceptions == 2:
821 |         assert result.stderr == (
822 |             "POST https://www.googleapis.com/oauth2/v4/token\n"
823 |             "GET: https://www.googleapis.com/drive/v3/about?fields=*\n"
824 |             + "  Got {}, retrying\n".format(exception.__name__)
825 |             + "GET: https://www.googleapis.com/drive/v3/about?fields=*\n"
826 |             + "  Got {}, retrying\n".format(exception.__name__)
827 |             + "GET: https://www.googleapis.com/drive/v3/about?fields=*\n"
828 |         )
829 | 


--------------------------------------------------------------------------------