├── .circleci └── config.yml ├── .github └── workflows │ └── test.yml ├── .gitignore ├── LICENSE ├── README.md ├── dogsheep_photos ├── __init__.py ├── cli.py └── utils.py ├── setup.py └── tests └── test_s3_auth.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | workflows: 3 | build_and_deploy: 4 | jobs: 5 | - build: 6 | filters: 7 | tags: 8 | only: /.*/ 9 | - test-python-install: 10 | version: "3.8" 11 | requires: 12 | - build 13 | - deploy: 14 | requires: 15 | - build 16 | filters: 17 | tags: 18 | only: /[0-9]+(\.[0-9]+)*[ab]?/ 19 | branches: 20 | ignore: /.*/ 21 | jobs: 22 | build: 23 | docker: 24 | - image: circleci/python:3.8 25 | steps: 26 | - checkout 27 | - restore_cache: 28 | key: v1-dependency-cache-{{ checksum "setup.py" }} 29 | - run: 30 | name: install python dependencies 31 | command: | 32 | python3 -m venv venv 33 | . venv/bin/activate 34 | pip install -e . 35 | - save_cache: 36 | key: v1-dependency-cache-{{ checksum "setup.py" }} 37 | paths: 38 | - "venv" 39 | - run: 40 | name: run tests 41 | command: | 42 | . venv/bin/activate 43 | pip install -e . 44 | pip install pytest 45 | dogsheep-photos --help 46 | pytest 47 | test-python-install: 48 | parameters: 49 | version: 50 | type: string 51 | default: latest 52 | docker: 53 | - image: circleci/python:3.8 54 | steps: 55 | - checkout 56 | - restore_cache: 57 | key: v1-dependency-cache-{{ checksum "setup.py" }} 58 | - run: 59 | name: install python dependencies 60 | command: | 61 | python3 -m venv venv 62 | . venv/bin/activate 63 | pip install -e . 64 | - save_cache: 65 | key: v1-dependency-cache-{{ checksum "setup.py" }} 66 | paths: 67 | - "venv" 68 | - run: 69 | name: run tests 70 | command: | 71 | . venv/bin/activate 72 | pip install -e . 73 | pip install pytest 74 | dogsheep-photos --help 75 | pytest 76 | deploy: 77 | docker: 78 | - image: circleci/python:3.8 79 | steps: 80 | - checkout 81 | - restore_cache: 82 | key: v1-dependency-cache-{{ checksum "setup.py" }} 83 | - run: 84 | name: install python dependencies 85 | command: | 86 | python3 -m venv venv 87 | . venv/bin/activate 88 | pip install -e . 89 | - save_cache: 90 | key: v1-dependency-cache-{{ checksum "setup.py" }} 91 | paths: 92 | - "venv" 93 | - run: 94 | name: init .pypirc 95 | command: | 96 | echo -e "[pypi]" >> ~/.pypirc 97 | echo -e "username = simonw" >> ~/.pypirc 98 | echo -e "password = $PYPI_PASSWORD" >> ~/.pypirc 99 | - run: 100 | name: create packages 101 | command: | 102 | python setup.py bdist_wheel 103 | - run: 104 | name: upload to pypi 105 | command: | 106 | . venv/bin/activate 107 | pip install twine 108 | twine upload dist/* 109 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [master] 9 | pull_request: 10 | branches: [master] 11 | 12 | jobs: 13 | build: 14 | runs-on: macos-latest 15 | strategy: 16 | matrix: 17 | python-version: [3.7, 3.8] 18 | steps: 19 | - uses: actions/checkout@v2 20 | - name: Set up Python ${{ matrix.python-version }} 21 | uses: actions/setup-python@v1 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - uses: actions/cache@v1 25 | name: Configure pip caching 26 | with: 27 | path: ~/.cache/pip 28 | key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} 29 | restore-keys: | 30 | ${{ runner.os }}-pip- 31 | - name: Install dependencies 32 | run: | 33 | python -m pip install --upgrade pip 34 | pip install -e .[test] 35 | - name: Test with pytest 36 | run: | 37 | pytest 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | auth.json 2 | .DS_Store 3 | .venv 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | venv 8 | .eggs 9 | .pytest_cache 10 | *.egg-info 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dogsheep-photos 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/dogsheep-photos.svg)](https://pypi.org/project/dogsheep-photos/) 4 | [![Changelog](https://img.shields.io/github/v/release/dogsheep/dogsheep-photos?include_prereleases&label=changelog)](https://github.com/dogsheep/dogsheep-photos/releases) 5 | [![CircleCI](https://circleci.com/gh/dogsheep/dogsheep-photos.svg?style=svg)](https://circleci.com/gh/dogsheep/dogsheep-photos) 6 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/dogsheep/dogsheep-photos/blob/master/LICENSE) 7 | 8 | Save details of your photos to a SQLite database and upload them to S3. 9 | 10 | See [Using SQL to find my best photo of a pelican according to Apple Photos](https://simonwillison.net/2020/May/21/apple-photos-sqlite/) for background information on this project. 11 | 12 | ## What these tools do 13 | 14 | These tools are a work-in-progress mechanism for taking full ownership of your photos. The core idea is to help implement the following: 15 | 16 | * Every photo you have taken lives in a single, private Amazon S3 bucket 17 | * You have a single SQLite database file which stores metadata about those photos - potentially pulled from multiple different places. This may include EXIF data, Apple Photos, the results of running machine learning APIs against photos and much more besides. 18 | * You can then use [Datasette](https://github.com/simonw/datasette) to explore your own photos. 19 | 20 | I'm a heavy user of Apple Photos so the initial releases of this tool will have a bias towards that, but ideally I would like a subset of these tools to be useful to people no matter which core photo solution they are using. 21 | 22 | ## Installation 23 | 24 | $ pip install dogsheep-photos 25 | 26 | ## Authentication (if using S3) 27 | 28 | If you want to use S3 to store your photos, you will need to first create S3 credentials for a new, dedicated bucket. 29 | 30 | You may find the [s3-credentials tool](https://github.com/simonw/s3-credentials) useful for this. 31 | 32 | Run this command and paste in your credentials. You will need three values: the name of your S3 bucket, your Access key ID and your Secret access key. 33 | 34 | $ dogsheep-photos s3-auth 35 | 36 | This will create a file called `auth.json` in your current directory containing the required values. To save the file at a different path or filename, use the `--auth=myauth.json` option. 37 | 38 | ## Uploading photos 39 | 40 | Run this command to upload every photo in a specific directory to your S3 bucket: 41 | 42 | $ dogsheep-photos upload photos.db \ 43 | ~/Pictures/Photos\ Library.photoslibrary/original 44 | 45 | The command will only upload photos that have not yet been uploaded, based on their sha256 hash. 46 | 47 | `photos.db` will be created with an `uploads` table containing details of which files were uploaded. 48 | 49 | To see what the command would do without uploading any files, use the `--dry-run` option. 50 | 51 | The sha256 hash of the photo contents will be used as the name of the file in the bucket, with an extension matching the type of file. This is an implementation of the [Content addressable storage](https://en.wikipedia.org/wiki/Content-addressable_storage) pattern. 52 | 53 | ## Importing Apple Photos metadata 54 | 55 | The `apple-photos` command imports metadata from your Apple Photos library. 56 | 57 | $ photo-to-sqlite apple-photos photos.db 58 | 59 | Imported metadata includes places, people, albums, quality scores and machine learning labels for the photo contents. 60 | 61 | ## Creating a subset database 62 | 63 | You can create a new, subset database of photos using the `create-subset` command. 64 | 65 | This is useful for creating a shareable SQLite database that only contains metadata for a selected set of photos. 66 | 67 | Since photo metadata contains latitude and longitude you may not want to share a database that includes photos taken at your home address. 68 | 69 | `create-subset` takes three arguments: an existing database file created using the `apple-photos` command, the name of the new, shareable database file you would like to create and a SQL query that returns the `sha256` hash values of the photos you would like to include in that database. 70 | 71 | For example, here's how to create a shareable database of just the photos that have been added to albums containing the word "Public": 72 | 73 | $ dogsheep-photos create-subset \ 74 | photos.db \ 75 | public.db \ 76 | "select sha256 from apple_photos where albums like '%Public%'" 77 | 78 | ## Serving photos locally with datasette-media 79 | 80 | If you don't want to upload your photos to S3 but you still want to browse them using Datasette you can do so using the [datasette-media](https://github.com/simonw/datasette-media) plugin. This plugin adds the ability to serve images and other static files directly from disk, configured using a SQL query. 81 | 82 | To use it, first install Datasette and the plugin: 83 | 84 | $ pip install datasette datasette-media 85 | 86 | If any of your photos are `.HEIC` images taken by an iPhone you should also install the optional `pyheif` dependency: 87 | 88 | $ pip install pyheif 89 | 90 | Now create a `metadata.yaml` file configuring the plugin: 91 | 92 | ```yaml 93 | plugins: 94 | datasette-media: 95 | thumbnail: 96 | sql: |- 97 | select path as filepath, 200 as resize_height from apple_photos where uuid = :key 98 | large: 99 | sql: |- 100 | select path as filepath, 1024 as resize_height from apple_photos where uuid = :key 101 | ``` 102 | This will configure two URL endpoints - one for 200 pixel high thumbnails and one for 1024 pixel high larger images. 103 | 104 | Create your `photos.db` database using the `apple-photos` command, then run Datasette like this: 105 | 106 | $ datasette -m metadata.yaml 107 | 108 | Your photos will be served on URLs that look like this: 109 | 110 | http://127.0.0.1:8001/-/media/thumbnail/F4469918-13F3-43D8-9EC1-734C0E6B60AD 111 | http://127.0.0.1:8001/-/media/large/F4469918-13F3-43D8-9EC1-734C0E6B60AD 112 | 113 | You can find the UUIDs for use in these URLs by running `select uuid from photos_with_apple_metadata`. 114 | 115 | ### Displaying images using datasette-json-html 116 | 117 | If you are using `datasette-media` to serve photos you can include images directly in Datasette query results using the [datasette-json-html](https://github.com/simonw/datasette-json-html) plugin. 118 | 119 | Run `pip install datasette-json-html` to install the plugin, then use queries like this to view your images: 120 | 121 | ```sql 122 | select 123 | json_object( 124 | 'img_src', 125 | '/-/media/thumbnail/' || uuid 126 | ) as photo, 127 | uuid, 128 | date 129 | from 130 | apple_photos 131 | order by 132 | date desc 133 | limit 10; 134 | ``` 135 | The `photo` column returned by this query should render as image tags that display the correct images. 136 | 137 | ### Displaying images using custom template pages 138 | 139 | Datasette's [custom pages](https://datasette.readthedocs.io/en/stable/custom_templates.html#custom-pages) feature lets you create custom pages for a Datasette instance by dropping HTML templates into a `templates/pages` directory and then running Datasette using `datasette --template-dir=templates/`. 140 | 141 | You can combine that ability with the [datasette-template-sql](https://github.com/simonw/datasette-template-sql) plugin to create custom template pages that directly display photos served by `datasette-media`. 142 | 143 | Install the plugin using `pip install datasette-template-sql`. 144 | 145 | Create a `templates/pages` folder and add the following files: 146 | 147 | `recent-photos.html` 148 | ```html+jinja 149 |

Recent photos

150 | 151 |
152 | {% for photo in sql("select * from apple_photos order by date desc limit 20") %} 153 | 154 | {% endfor %} 155 |
156 | ``` 157 | `random-photos.html` 158 | ```html+jinja 159 |

Random photos

160 | 161 |
162 | {% for photo in sql("with foo as (select * from apple_photos order by date desc limit 5000) select * from foo order by random() limit 20") %} 163 | 164 | {% endfor %} 165 |
166 | ``` 167 | Now run Datasette like this: 168 | 169 | $ datasette photos.db -m metadata.yaml --template-dir=templates/ 170 | 171 | Visiting `http://localhost:8001/recent-photos` will display 20 recent photos. Visiting `http://localhost:8001/random-photos` will display 20 photos randomly selected from your 5,000 most recent. 172 | 173 | -------------------------------------------------------------------------------- /dogsheep_photos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dogsheep/dogsheep-photos/325aa38cb23d0757bb1335ee2ea94a082475a66e/dogsheep_photos/__init__.py -------------------------------------------------------------------------------- /dogsheep_photos/cli.py: -------------------------------------------------------------------------------- 1 | import click 2 | import concurrent.futures 3 | import sqlite_utils 4 | from sqlite_utils.db import OperationalError 5 | 6 | try: 7 | import osxphotos 8 | except ImportError: 9 | osxphotos = None 10 | import sqlite3 11 | import boto3 12 | import json 13 | import pathlib 14 | from .utils import ( 15 | calculate_hash, 16 | image_paths, 17 | CONTENT_TYPES, 18 | get_all_keys, 19 | osxphoto_to_row, 20 | to_uuid, 21 | s3_upload, 22 | hash_and_size_path, 23 | ) 24 | 25 | 26 | @click.group() 27 | @click.version_option() 28 | def cli(): 29 | "Save details of your photos to a SQLite database and upload them to S3" 30 | 31 | 32 | @cli.command(name="s3-auth") 33 | @click.option( 34 | "-a", 35 | "--auth", 36 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 37 | default="auth.json", 38 | help="Path to save tokens to, defaults to auth.json", 39 | ) 40 | def s3_auth(auth): 41 | "Save S3 credentials to a JSON file" 42 | click.echo("Create S3 credentials and paste them here:") 43 | click.echo() 44 | bucket = click.prompt("S3 bucket") 45 | access_key_id = click.prompt("Access key ID") 46 | secret_access_key = click.prompt("Secret access key") 47 | if pathlib.Path(auth).exists(): 48 | auth_data = json.load(open(auth)) 49 | else: 50 | auth_data = {} 51 | auth_data.update( 52 | { 53 | "photos_s3_bucket": bucket, 54 | "photos_s3_access_key_id": access_key_id, 55 | "photos_s3_secret_access_key": secret_access_key, 56 | } 57 | ) 58 | open(auth, "w").write(json.dumps(auth_data, indent=4) + "\n") 59 | 60 | 61 | @cli.command() 62 | @click.argument( 63 | "db_path", 64 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 65 | required=True, 66 | ) 67 | @click.argument( 68 | "directories", 69 | nargs=-1, 70 | type=click.Path(file_okay=False, dir_okay=True, allow_dash=False), 71 | ) 72 | @click.option( 73 | "-a", 74 | "--auth", 75 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=True), 76 | default="auth.json", 77 | help="Path to auth.json token file", 78 | ) 79 | @click.option( 80 | "--no-progress", is_flag=True, help="Don't show progress bar", 81 | ) 82 | @click.option( 83 | "--dry-run", is_flag=True, help="Don't upload, just show what would happen", 84 | ) 85 | def upload(db_path, directories, auth, no_progress, dry_run): 86 | "Upload photos from directories to S3" 87 | creds = json.load(open(auth)) 88 | db = sqlite_utils.Database(db_path) 89 | client = boto3.client( 90 | "s3", 91 | aws_access_key_id=creds["photos_s3_access_key_id"], 92 | aws_secret_access_key=creds["photos_s3_secret_access_key"], 93 | ) 94 | click.echo("Fetching existing keys from S3...") 95 | existing_keys = { 96 | key.split(".")[0] for key in get_all_keys(client, creds["photos_s3_bucket"]) 97 | } 98 | click.echo("Got {:,} existing keys".format(len(existing_keys))) 99 | # Now calculate sizes and hashes for files 100 | paths = list(image_paths(directories)) 101 | hash_and_size = {} 102 | hash_bar = None 103 | if not no_progress: 104 | hash_bar = click.progressbar(paths, label="Calculating hashes") 105 | # hashlib docs say: 'For better multithreading performance,the Python GIL is 106 | # released for data larger than 2047 bytes at object creation or on update' 107 | with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: 108 | future_to_path = { 109 | executor.submit(hash_and_size_path, path.resolve()): path for path in paths 110 | } 111 | for future in concurrent.futures.as_completed(future_to_path): 112 | path, sha256, size = future.result() 113 | if hash_bar: 114 | hash_bar.update(1) 115 | hash_and_size[path] = (sha256, size) 116 | 117 | hashes = {v[0] for v in hash_and_size.values()} 118 | new_paths = [p for p in hash_and_size if hash_and_size[p][0] not in existing_keys] 119 | click.echo( 120 | "\n{:,} hashed files, {:,} are not yet in S3".format( 121 | len(hashes), len(new_paths) 122 | ) 123 | ) 124 | 125 | uploads = db.table("uploads", pk="sha256") 126 | total_size = None 127 | bar = None 128 | if dry_run or not no_progress: 129 | # Calculate total size first 130 | total_size = sum(hash_and_size[p][1] for p in new_paths) 131 | click.echo( 132 | "{verb} {num} files, {total_size:.2f} GB".format( 133 | verb="Would upload" if dry_run else "Uploading", 134 | num=len(new_paths), 135 | total_size=total_size / (1024 * 1024 * 1024), 136 | ) 137 | ) 138 | bar = click.progressbar( 139 | length=len(new_paths), 140 | label="Uploading {size:,} files".format(size=len(new_paths)), 141 | show_eta=True, 142 | show_pos=True, 143 | ) 144 | 145 | if dry_run: 146 | return 147 | 148 | # Upload photos in a thread pool 149 | with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: 150 | 151 | future_to_path = {} 152 | for path in new_paths: 153 | ext = path.suffix.lstrip(".") 154 | sha256, size = hash_and_size[path] 155 | future = executor.submit(s3_upload, path, sha256, ext, creds) 156 | future_to_path[future] = path 157 | 158 | for future in concurrent.futures.as_completed(future_to_path): 159 | path = future.result() 160 | sha256, size = hash_and_size[path] 161 | ext = path.suffix.lstrip(".") 162 | uploads.upsert( 163 | {"sha256": sha256, "filepath": str(path), "ext": ext, "size": size} 164 | ) 165 | if bar: 166 | bar.update(1) 167 | 168 | 169 | @cli.command(name="apple-photos") 170 | @click.argument( 171 | "db_path", 172 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 173 | required=True, 174 | ) 175 | @click.option( 176 | "--library", 177 | type=click.Path(file_okay=False, dir_okay=True, allow_dash=False), 178 | help="Location of Photos library to import", 179 | ) 180 | @click.option( 181 | "--image-url-prefix", 182 | help="URL prefix of hosted images - suffix will be sha256.ext", 183 | ) 184 | @click.option( 185 | "--image-url-suffix", help="URL suffix of hosted images, e.g. ?w=600", default="" 186 | ) 187 | def apple_photos(db_path, library, image_url_prefix, image_url_suffix): 188 | "Import photo metadata from Apple Photos" 189 | if osxphotos is None: 190 | raise click.ClickException("Missing dependency osxphotos") 191 | db = sqlite_utils.Database(db_path) 192 | # Ensure index 193 | try: 194 | db["uploads"].create_index(["filepath"]) 195 | except OperationalError: 196 | pass 197 | 198 | if library: 199 | photosdb = osxphotos.PhotosDB(library) 200 | else: 201 | photosdb = osxphotos.PhotosDB() 202 | 203 | db.conn.execute("ATTACH DATABASE '{}' AS attached".format(photosdb._tmp_db)) 204 | if "apple_photos_scores" in db.table_names(): 205 | db["apple_photos_scores"].drop() 206 | db.conn.execute( 207 | """ 208 | create table apple_photos_scores as select 209 | ZGENERICASSET.ZUUID, 210 | ZGENERICASSET.ZOVERALLAESTHETICSCORE, 211 | ZGENERICASSET.ZCURATIONSCORE, 212 | ZGENERICASSET.ZPROMOTIONSCORE, 213 | ZGENERICASSET.ZHIGHLIGHTVISIBILITYSCORE, 214 | ZCOMPUTEDASSETATTRIBUTES.ZBEHAVIORALSCORE, 215 | ZCOMPUTEDASSETATTRIBUTES.ZFAILURESCORE, 216 | ZCOMPUTEDASSETATTRIBUTES.ZHARMONIOUSCOLORSCORE, 217 | ZCOMPUTEDASSETATTRIBUTES.ZIMMERSIVENESSSCORE, 218 | ZCOMPUTEDASSETATTRIBUTES.ZINTERACTIONSCORE, 219 | ZCOMPUTEDASSETATTRIBUTES.ZINTERESTINGSUBJECTSCORE, 220 | ZCOMPUTEDASSETATTRIBUTES.ZINTRUSIVEOBJECTPRESENCESCORE, 221 | ZCOMPUTEDASSETATTRIBUTES.ZLIVELYCOLORSCORE, 222 | ZCOMPUTEDASSETATTRIBUTES.ZLOWLIGHT, 223 | ZCOMPUTEDASSETATTRIBUTES.ZNOISESCORE, 224 | ZCOMPUTEDASSETATTRIBUTES.ZPLEASANTCAMERATILTSCORE, 225 | ZCOMPUTEDASSETATTRIBUTES.ZPLEASANTCOMPOSITIONSCORE, 226 | ZCOMPUTEDASSETATTRIBUTES.ZPLEASANTLIGHTINGSCORE, 227 | ZCOMPUTEDASSETATTRIBUTES.ZPLEASANTPATTERNSCORE, 228 | ZCOMPUTEDASSETATTRIBUTES.ZPLEASANTPERSPECTIVESCORE, 229 | ZCOMPUTEDASSETATTRIBUTES.ZPLEASANTPOSTPROCESSINGSCORE, 230 | ZCOMPUTEDASSETATTRIBUTES.ZPLEASANTREFLECTIONSSCORE, 231 | ZCOMPUTEDASSETATTRIBUTES.ZPLEASANTSYMMETRYSCORE, 232 | ZCOMPUTEDASSETATTRIBUTES.ZSHARPLYFOCUSEDSUBJECTSCORE, 233 | ZCOMPUTEDASSETATTRIBUTES.ZTASTEFULLYBLURREDSCORE, 234 | ZCOMPUTEDASSETATTRIBUTES.ZWELLCHOSENSUBJECTSCORE, 235 | ZCOMPUTEDASSETATTRIBUTES.ZWELLFRAMEDSUBJECTSCORE, 236 | ZCOMPUTEDASSETATTRIBUTES.ZWELLTIMEDSHOTSCORE 237 | from 238 | attached.ZGENERICASSET 239 | join attached.ZCOMPUTEDASSETATTRIBUTES on 240 | attached.ZGENERICASSET.Z_PK = attached.ZCOMPUTEDASSETATTRIBUTES.ZASSET; 241 | """ 242 | ) 243 | db["apple_photos_scores"].create_index(["ZUUID"]) 244 | 245 | skipped = [] 246 | 247 | with click.progressbar(photosdb.photos()) as photos: 248 | for photo in photos: 249 | rows = list(db["uploads"].rows_where("filepath=?", [photo.path])) 250 | if rows: 251 | sha256 = rows[0]["sha256"] 252 | else: 253 | if photo.ismissing: 254 | print("Missing: {}".format(photo)) 255 | continue 256 | sha256 = calculate_hash(pathlib.Path(photo.path)) 257 | photo_row = osxphoto_to_row(sha256, photo) 258 | db["apple_photos"].insert( 259 | photo_row, pk="uuid", replace=True, alter=True, 260 | ) 261 | # Ensure indexes 262 | for column in ("date", "sha256"): 263 | try: 264 | db["apple_photos"].create_index([column]) 265 | except OperationalError: 266 | pass 267 | db.create_view( 268 | "photos_with_apple_metadata", 269 | """ 270 | select 271 | apple_photos.rowid,{} 272 | apple_photos.uuid, 273 | apple_photos.date, 274 | apple_photos.albums, 275 | apple_photos.persons, 276 | uploads.ext, 277 | uploads.sha256, 278 | uploads.size, 279 | latitude, 280 | longitude, 281 | favorite, 282 | portrait, 283 | screenshot, 284 | slow_mo, 285 | time_lapse, 286 | hdr, 287 | selfie, 288 | panorama, 289 | place_city, 290 | place_state_province, 291 | place_country, 292 | apple_photos_scores.* 293 | from 294 | apple_photos 295 | join 296 | uploads on apple_photos.sha256 = uploads.sha256 297 | left join 298 | apple_photos_scores on apple_photos.uuid = apple_photos_scores.ZUUID 299 | order by 300 | apple_photos.date desc 301 | """.format( 302 | """ 303 | json_object( 304 | 'img_src', 305 | '{}' || uploads.sha256 || '.' || uploads.ext || '{}' 306 | ) as photo,""".format( 307 | image_url_prefix, image_url_suffix 308 | ) 309 | if image_url_prefix 310 | else "" 311 | ), 312 | replace=True, 313 | ) 314 | 315 | # Last step: import the labels 316 | labels_db_path = photosdb._dbfile_actual.parent / "search" / "psi.sqlite" 317 | if labels_db_path.exists(): 318 | labels_db = sqlite3.connect(str(labels_db_path)) 319 | if db["labels"].exists(): 320 | db["labels"].drop() 321 | 322 | def all_labels(): 323 | result = labels_db.execute( 324 | """ 325 | select 326 | ga.rowid, 327 | assets.uuid_0, 328 | assets.uuid_1, 329 | groups.rowid as groupid, 330 | groups.category, 331 | groups.owning_groupid, 332 | groups.content_string, 333 | groups.normalized_string, 334 | groups.lookup_identifier 335 | from 336 | ga 337 | join groups on groups.rowid = ga.groupid 338 | join assets on ga.assetid = assets.rowid 339 | order by 340 | ga.rowid 341 | """ 342 | ) 343 | cols = [c[0] for c in result.description] 344 | for row in result.fetchall(): 345 | record = dict(zip(cols, row)) 346 | id = record.pop("rowid") 347 | uuid = to_uuid(record.pop("uuid_0"), record.pop("uuid_1")) 348 | # Strip out the `\u0000` characters: 349 | for key in record: 350 | if isinstance(record[key], str): 351 | record[key] = record[key].replace("\x00", "") 352 | yield {"id": id, "uuid": uuid, **record} 353 | 354 | db["labels"].insert_all(all_labels(), pk="id", replace=True) 355 | db["labels"].create_index(["uuid"], if_not_exists=True) 356 | db["labels"].create_index(["normalized_string"], if_not_exists=True) 357 | 358 | 359 | @cli.command(name="create-subset") 360 | @click.argument( 361 | "db_path", 362 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=False, exists=True), 363 | ) 364 | @click.argument( 365 | "new_db_path", 366 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=False, exists=False), 367 | ) 368 | @click.argument("sql",) 369 | def create_subset(db_path, new_db_path, sql): 370 | "Create a new subset database of photos with sha256 matching those returned by this SQL query" 371 | db = sqlite_utils.Database(db_path) 372 | new_db = sqlite_utils.Database(new_db_path) 373 | # Use the schema from the old database to create tables in the new database 374 | for result in db.conn.execute( 375 | "select sql from sqlite_master where sql is not null" 376 | ): 377 | new_db.conn.execute(result[0]) 378 | # Figure out the photos to copy across 379 | sha256s = [r[0] for r in db.conn.execute(sql).fetchall()] 380 | # Copy across apple_photos, apple_photos_scores, uploads 381 | db.conn.execute("ATTACH DATABASE '{}' AS [{}]".format(str(new_db_path), "newdb")) 382 | # First apple_photos 383 | with db.conn: 384 | sql = """ 385 | INSERT INTO 386 | newdb.apple_photos 387 | SELECT * FROM apple_photos WHERE sha256 in ({}) 388 | """.format( 389 | ", ".join("'{}'".format(sha256) for sha256 in sha256s) 390 | ) 391 | db.conn.execute(sql) 392 | # Now the other tables 393 | for sql in ( 394 | """ 395 | INSERT INTO 396 | newdb.apple_photos_scores 397 | SELECT * FROM apple_photos_scores WHERE ZUUID in (select uuid from newdb.apple_photos) 398 | """, 399 | """INSERT INTO 400 | newdb.labels 401 | SELECT * FROM labels WHERE uuid in (select uuid from newdb.apple_photos)""", 402 | """ 403 | INSERT INTO 404 | newdb.uploads 405 | SELECT * FROM uploads WHERE sha256 in (select sha256 from newdb.apple_photos) 406 | """, 407 | ): 408 | with db.conn: 409 | db.conn.execute(sql) 410 | -------------------------------------------------------------------------------- /dogsheep_photos/utils.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import hashlib 3 | import pathlib 4 | import threading 5 | import uuid 6 | from datetime import timezone 7 | 8 | CONTENT_TYPES = { 9 | "jpg": "image/jpeg", 10 | "jpeg": "image/jpeg", 11 | "png": "image/png", 12 | "gif": "image/gif", 13 | "heic": "image/heic", 14 | } 15 | 16 | HASH_BLOCK_SIZE = 1024 * 1024 17 | 18 | boto3_local = threading.local() 19 | 20 | 21 | def calculate_hash(path): 22 | m = hashlib.sha256() 23 | with path.open("rb") as fp: 24 | while True: 25 | data = fp.read(HASH_BLOCK_SIZE) 26 | if not data: 27 | break 28 | m.update(data) 29 | return m.hexdigest() 30 | 31 | 32 | def image_paths(directories): 33 | for directory in directories: 34 | path = pathlib.Path(directory) 35 | yield from ( 36 | p 37 | for p in path.glob("**/*") 38 | if p.suffix in [".jpg", ".jpeg", ".png", ".gif", ".heic"] 39 | ) 40 | 41 | 42 | def get_all_keys(client, bucket): 43 | paginator = client.get_paginator("list_objects_v2") 44 | keys = [] 45 | for page in paginator.paginate(Bucket=bucket): 46 | for row in page["Contents"]: 47 | keys.append(row["Key"]) 48 | return keys 49 | 50 | 51 | def osxphoto_to_row(sha256, photo): 52 | row = { 53 | "sha256": sha256, 54 | "uuid": photo.uuid, 55 | "burst_uuid": photo._info["burstUUID"], 56 | "filename": photo.filename, 57 | "original_filename": photo.original_filename, 58 | "description": photo.description, 59 | "date": to_utc_isoformat(photo.date), 60 | "date_modified": to_utc_isoformat(photo.date_modified), 61 | "title": photo.title, 62 | "keywords": photo.keywords, 63 | "albums": photo.albums, 64 | "persons": photo.persons, 65 | "path": photo.path, 66 | "ismissing": photo.ismissing, 67 | "hasadjustments": photo.hasadjustments, 68 | "external_edit": photo.external_edit, 69 | "favorite": photo.favorite, 70 | "hidden": photo.hidden, 71 | "latitude": photo._latitude, 72 | "longitude": photo._longitude, 73 | "path_edited": photo.path_edited, 74 | "shared": photo.shared, 75 | "isphoto": photo.isphoto, 76 | "ismovie": photo.ismovie, 77 | "uti": photo.uti, 78 | "burst": photo.burst, 79 | "live_photo": photo.live_photo, 80 | "path_live_photo": photo.path_live_photo, 81 | "iscloudasset": photo.iscloudasset, 82 | "incloud": photo.incloud, 83 | "portrait": photo.portrait, 84 | "screenshot": photo.screenshot, 85 | "slow_mo": photo.slow_mo, 86 | "time_lapse": photo.time_lapse, 87 | "hdr": photo.hdr, 88 | "selfie": photo.selfie, 89 | "panorama": photo.panorama, 90 | "has_raw": photo.has_raw, 91 | "uti_raw": photo.uti_raw, 92 | "path_raw": photo.path_raw, 93 | } 94 | # Now add place keys 95 | place = photo.place 96 | if place is not None: 97 | for key, value in photo.place.address._asdict().items(): 98 | row["place_{}".format(key)] = value 99 | return row 100 | 101 | 102 | def to_utc_isoformat(dt): 103 | if not dt: 104 | return None 105 | fixed = dt.astimezone(timezone.utc).isoformat().split(".")[0] 106 | if not fixed.endswith("+00:00"): 107 | fixed += "+00:00" 108 | return fixed 109 | 110 | 111 | def to_uuid(uuid_0, uuid_1): 112 | b = uuid_0.to_bytes(8, "little", signed=True) + uuid_1.to_bytes( 113 | 8, "little", signed=True 114 | ) 115 | return str(uuid.UUID(bytes=b)).upper() 116 | 117 | 118 | def s3_upload(path, sha256, ext, creds): 119 | client = getattr(boto3_local, "client", None) 120 | if client is None: 121 | client = boto3.client( 122 | "s3", 123 | aws_access_key_id=creds["photos_s3_access_key_id"], 124 | aws_secret_access_key=creds["photos_s3_secret_access_key"], 125 | ) 126 | boto3_local.client = client 127 | keyname = "{}.{}".format(sha256, ext) 128 | client.upload_file( 129 | str(path), 130 | creds["photos_s3_bucket"], 131 | keyname, 132 | ExtraArgs={"ContentType": CONTENT_TYPES[ext]}, 133 | ) 134 | return path 135 | 136 | 137 | def hash_and_size_path(path): 138 | size = path.stat().st_size 139 | sha256 = calculate_hash(path) 140 | return path, sha256, size 141 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import os 3 | 4 | VERSION = "0.4.1" 5 | 6 | 7 | def get_long_description(): 8 | with open( 9 | os.path.join(os.path.dirname(os.path.abspath(__file__)), "README.md"), 10 | encoding="utf8", 11 | ) as fp: 12 | return fp.read() 13 | 14 | 15 | setup( 16 | name="dogsheep-photos", 17 | description="Save details of your photos to a SQLite database and upload them to S3", 18 | long_description=get_long_description(), 19 | long_description_content_type="text/markdown", 20 | author="Simon Willison", 21 | url="https://github.com/dogsheep/dogsheep-photos", 22 | license="Apache License, Version 2.0", 23 | version=VERSION, 24 | packages=["dogsheep_photos"], 25 | entry_points=""" 26 | [console_scripts] 27 | dogsheep-photos=dogsheep_photos.cli:cli 28 | """, 29 | install_requires=[ 30 | "sqlite-utils>=2.7", 31 | "boto3>=1.12.41", 32 | "osxphotos>=0.28.13 ; sys_platform=='darwin'", 33 | ], 34 | extras_require={"test": ["pytest"]}, 35 | tests_require=["dogsheep-photos[test]"], 36 | ) 37 | -------------------------------------------------------------------------------- /tests/test_s3_auth.py: -------------------------------------------------------------------------------- 1 | from click.testing import CliRunner 2 | from dogsheep_photos.cli import cli 3 | import json 4 | 5 | 6 | def test_s3_auth(): 7 | runner = CliRunner() 8 | with runner.isolated_filesystem(): 9 | result = runner.invoke(cli, ["s3-auth"], input="bucket\nxxx\nyyy\n") 10 | assert 0 == result.exit_code 11 | data = json.load(open("auth.json")) 12 | assert { 13 | "photos_s3_bucket": "bucket", 14 | "photos_s3_access_key_id": "xxx", 15 | "photos_s3_secret_access_key": "yyy", 16 | } == data 17 | --------------------------------------------------------------------------------