├── .github ├── dependabot.yml └── workflows │ ├── deploy.yml │ ├── lint.yml │ └── tests.yml ├── .pre-commit-config.yaml ├── .pytest.ini ├── Dockerfile ├── __main__.py ├── changelog.md ├── entrypoint.sh ├── license ├── phockup.py ├── phockup.sh ├── readme.md ├── requirements-dev.txt ├── requirements.txt ├── src ├── __init__.py ├── date.py ├── dependency.py ├── exif.py └── phockup.py ├── tests ├── __init__.py ├── input │ ├── !#$%'+-.^_`~.jpg │ ├── UNKNOWN.jpg │ ├── date_20170101_010101.jpg │ ├── exif.jpg │ ├── exif.mp4 │ ├── link_to_date_20170101_010101.jpg │ ├── other.txt │ ├── phockup's exif test.jpg │ ├── sub_folder │ │ └── date_20180101_010101.jpg │ ├── xmp.jpg │ ├── xmp.jpg.xmp │ ├── xmp_ext.jpg │ ├── xmp_ext.jpg.xmp │ ├── xmp_ext.xmp │ ├── xmp_noext.jpg │ └── xmp_noext.xmp ├── test_date.py ├── test_exif.py └── test_phockup.py └── tox.ini /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "pip" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | - package-ecosystem: "github-actions" 8 | directory: "/" 9 | schedule: 10 | interval: "weekly" 11 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy 2 | 3 | on: 4 | push: 5 | tags: 6 | - "*.*" 7 | 8 | jobs: 9 | test: 10 | runs-on: ubuntu-latest 11 | strategy: 12 | matrix: 13 | python-version: [ '3.7', '3.8', '3.9', '3.10', '3.11' ] 14 | steps: 15 | - name: Checkout the repository 16 | uses: actions/checkout@v4.1.1 17 | with: 18 | fetch-depth: 0 19 | 20 | - name: Set up Python ${{ matrix.python-version }} 21 | uses: actions/setup-python@v5 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | 25 | - name: Install dependencies 26 | run: | 27 | sudo apt-get install -y libimage-exiftool-perl 28 | python -m pip install --upgrade pip 29 | pip install pytest 30 | pip install -r requirements-dev.txt 31 | 32 | - name: Run tests 33 | run: pytest 34 | 35 | deploy-snap: 36 | needs: test 37 | runs-on: ubuntu-18.04 38 | strategy: 39 | matrix: 40 | architecture: [ linux/amd64, linux/386, linux/arm/v7, linux/ppc64le ] 41 | steps: 42 | - name: Checkout the repository 43 | uses: actions/checkout@v4.1.1 44 | with: 45 | fetch-depth: 0 46 | 47 | - id: snapcraft 48 | name: Build Snap 49 | run: | 50 | LATEST_VERSION=`git tag | sort -t. -k 1.2,1n -k 2,2n -k 3,3n -k 4,4n | tail -1` 51 | 52 | CURRENT_VERSION=`echo -e "import urllib.request, json\n\nrequest = urllib.request.Request('http://api.snapcraft.io/v2/snaps/info/phockup')\nrequest.add_header('Snap-Device-Series', '16')\narchitectures = {'amd64': 'linux/amd64', 'i386': 'linux/386', 'arm64': 'linux/arm64', 'armhf': 'linux/arm/v7', 'ppc64el': 'linux/ppc64le', 's390x': 'linux/s390x'}\nwith urllib.request.urlopen(request) as url:\n data = json.loads(url.read().decode())\n for c in data['channel-map']:\n channel = c['channel']\n if architectures.get(channel['architecture']) == '${{ matrix.architecture }}' and channel['name'] == 'stable':\n print(c['version'])" | python3` 53 | 54 | if [ "$LATEST_VERSION" = "$CURRENT_VERSION" ]; then 55 | echo ::set-output name=deploy::0 56 | exit 57 | fi 58 | 59 | echo '{"experimental": true}' | sudo tee /etc/docker/daemon.json > /dev/null 60 | sudo systemctl restart docker 61 | 62 | docker run --rm --tty \ 63 | --security-opt apparmor:unconfined \ 64 | --cap-add SYS_ADMIN \ 65 | multiarch/qemu-user-static --reset -p yes 66 | 67 | docker run --rm --tty \ 68 | --security-opt apparmor:unconfined \ 69 | --cap-add SYS_ADMIN \ 70 | --device /dev/fuse \ 71 | --volume /sys \ 72 | --volume /sys/fs/cgroup:/sys/fs/cgroup:ro \ 73 | --volume $GITHUB_WORKSPACE:$GITHUB_WORKSPACE \ 74 | --workdir $GITHUB_WORKSPACE \ 75 | --platform "${{ matrix.architecture }}" \ 76 | --env PLAYTEST="${{ matrix.playtest }}" \ 77 | diddledan/snapcraft:core18 78 | 79 | SNAP=`find $GITHUB_WORKSPACE -maxdepth 1 -type f -name '*.snap' | head -n1` 80 | echo ::set-output name=snap::"$SNAP" 81 | echo ::set-output name=deploy::1 82 | 83 | - name: Deploy to Snap Store 84 | uses: snapcore/action-publish@v1 85 | if: ${{ steps.snapcraft.outputs.deploy == 1 }} 86 | with: 87 | store_login: ${{ secrets.SNAPCRAFT_LOGIN }} 88 | snap: ${{ steps.snapcraft.outputs.snap }} 89 | release: stable 90 | 91 | deploy-brew: 92 | needs: test 93 | runs-on: ubuntu-latest 94 | steps: 95 | - name: Checkout the main repository 96 | uses: actions/checkout@v4.1.1 97 | with: 98 | fetch-depth: 0 99 | path: phockup 100 | 101 | - name: Checkout the Homebrew repository 102 | uses: actions/checkout@v4.1.1 103 | with: 104 | repository: ivandokov/homebrew-contrib 105 | path: homebrew-contrib 106 | ssh-key: ${{ secrets.HOMEBREW_PUSH_KEY }} 107 | 108 | - name: Deploy to Homebrew 109 | run: | 110 | cd phockup 111 | 112 | LATEST_VERSION=`git tag | sort -t. -k 1.2,1n -k 2,2n -k 3,3n -k 4,4n | tail -1` 113 | 114 | CURRENT_VERSION=`cat ../homebrew-contrib/Formula/phockup.rb | tr '\n' '\r' | sed 's/.*archive\/\([0-9.]*\)\.tar.*/\1/g'` 115 | 116 | if [ "$LATEST_VERSION" = "$CURRENT_VERSION" ]; then 117 | exit 118 | fi 119 | 120 | curl -sLo $LATEST_VERSION.tar.gz https://github.com/ivandokov/phockup/archive/$LATEST_VERSION.tar.gz 121 | SHASUM=`shasum -a 256 $LATEST_VERSION.tar.gz | awk '{print $1}'` 122 | rm $LATEST_VERSION.tar.gz 123 | 124 | cd ../homebrew-contrib 125 | 126 | sed -i "s/archive\/[0-9.]*\.tar/archive\/$LATEST_VERSION\.tar/" Formula/phockup.rb 127 | sed -i "0,/sha256/{s/sha256 .*/sha256 \"$SHASUM\"/}" Formula/phockup.rb 128 | 129 | git config user.name github-actions 130 | git config user.email github-actions@github.com 131 | git add . 132 | git commit -m $LATEST_VERSION 133 | git push 134 | 135 | deploy-dockerhub: 136 | needs: test 137 | runs-on: ubuntu-latest 138 | steps: 139 | - name: Checkout the repository 140 | uses: actions/checkout@v4.1.1 141 | with: 142 | fetch-depth: 0 143 | 144 | - name: Set up QEMU 145 | uses: docker/setup-qemu-action@v3 146 | 147 | - name: Set up Docker Buildx 148 | uses: docker/setup-buildx-action@v3 149 | 150 | - name: Publish to Dockerhub 151 | uses: elgohr/Publish-Docker-Github-Action@v5 152 | with: 153 | name: ivandokov/phockup 154 | username: ${{ secrets.DOCKER_USERNAME }} 155 | password: ${{ secrets.DOCKER_PASSWORD }} 156 | platforms: linux/amd64,linux/arm64 157 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | push: 5 | branches: 6 | - '*' 7 | tags-ignore: 8 | - '*' 9 | pull_request: 10 | branches: 11 | - master 12 | 13 | jobs: 14 | pre-commit: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v4.1.1 18 | - name: Install dependencies 19 | run: pip install -r requirements-dev.txt 20 | - name: Run pre-commit 21 | run: pre-commit run -a 22 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - '*' 7 | tags-ignore: 8 | - '*' 9 | pull_request: 10 | branches: 11 | - master 12 | 13 | jobs: 14 | unit-tests: 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | python-version: [ '3.7', '3.8', '3.9', '3.10', '3.11' ] 19 | steps: 20 | - name: Checkout the repository 21 | uses: actions/checkout@v4.1.1 22 | 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v5 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | 28 | - name: Install dependencies 29 | run: | 30 | sudo apt-get install -y libimage-exiftool-perl 31 | python -m pip install --upgrade pip 32 | pip install -r requirements-dev.txt 33 | 34 | - name: Run tests 35 | run: pytest 36 | 37 | docker-build-test: 38 | runs-on: ubuntu-latest 39 | steps: 40 | - name: Checkout the repository 41 | uses: actions/checkout@v4.1.1 42 | - name: Build 43 | uses: docker/build-push-action@v5 44 | with: 45 | context: . 46 | push: false 47 | tags: ivandokov/phockup:test 48 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.4.0 4 | hooks: 5 | - id: trailing-whitespace 6 | - id: end-of-file-fixer 7 | - id: check-yaml 8 | - id: check-added-large-files 9 | - repo: https://github.com/PyCQA/isort 10 | rev: 5.12.0 11 | hooks: 12 | - id: isort 13 | - repo: https://github.com/PyCQA/flake8 14 | rev: 6.0.0 15 | hooks: 16 | - id: flake8 17 | - repo: https://github.com/pre-commit/mirrors-mypy 18 | rev: v1.3.0 19 | hooks: 20 | - id: mypy 21 | -------------------------------------------------------------------------------- /.pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --disable-socket 3 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-alpine 2 | 3 | VOLUME /mnt/input 4 | VOLUME /mnt/output 5 | 6 | ENV CRON "" 7 | ENV OPTIONS "" 8 | 9 | COPY . /opt/phockup 10 | RUN chmod +x /opt/phockup/entrypoint.sh 11 | 12 | RUN apk --no-cache add exiftool \ 13 | && pip install --no-cache-dir -r /opt/phockup/requirements.txt \ 14 | && ln -s /opt/phockup/phockup.py /usr/local/bin/phockup \ 15 | && apk add bash \ 16 | && apk add flock 17 | 18 | ENTRYPOINT ["/opt/phockup/entrypoint.sh"] 19 | -------------------------------------------------------------------------------- /__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | 4 | from phockup import main 5 | from src.printer import Printer 6 | 7 | if __name__ == '__main__': 8 | try: 9 | main(sys.argv[1:]) 10 | except KeyboardInterrupt: 11 | Printer().empty().line('Exiting...') 12 | sys.exit(0) 13 | -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | ##### `1.13.0` 3 | * Implement `--rmdirs` [#225](https://github.com/ivandokov/phockup/pull/225) 4 | ##### `1.12.0` 5 | * Implement `--movedel` [#223](https://github.com/ivandokov/phockup/pull/223) 6 | ##### `1.11.3` 7 | * Fixed wrong tag in Git 8 | ##### `1.11.2` 9 | * Fixed [deployment to Homebrew](https://github.com/ivandokov/phockup/commit/33aec10e7506fe5f70a8e244e963304ff6f54337) 10 | ##### `1.11.1` 11 | * Fixed code styling 12 | ##### `1.11.0` 13 | * Added `--from-date` and `--to-datezz options to limit the processed files [#202](https://github.com/ivandokov/phockup/pull/202) 14 | * Merged dependabot PRs 15 | * Improved documentation 16 | ##### `1.10.1` 17 | * Fixed python versions for tests and deployments 18 | ##### `1.10.0` 19 | * Fixed Dependabot 20 | * Documented AUR install [#175](https://github.com/ivandokov/phockup/pull/175) 21 | * Timezone support [#182](https://github.com/ivandokov/phockup/pull/182) 22 | * Implementation of Issue [#146](https://github.com/ivandokov/phockup/issues/146) to support prefix and suffix [#189](https://github.com/ivandokov/phockup/pull/189) 23 | * Documented `--max-concurrency` [#188](https://github.com/ivandokov/phockup/pull/188) 24 | * Make sure arguments are not split [#190](https://github.com/ivandokov/phockup/pull/190) 25 | * Dependabot PRs 26 | ##### `1.9.2` 27 | * Fix brew SHA256 mismatch [#169](https://github.com/ivandokov/phockup/issues/169) 28 | * Fix deploy for buildx [#168](https://github.com/ivandokov/phockup/pull/168) 29 | * Use Python 3.10 for Docker [#167](https://github.com/ivandokov/phockup/pull/167) 30 | * Add dependabot to project [#166](https://github.com/ivandokov/phockup/pull/166) 31 | * Update tests.yml [#164](https://github.com/ivandokov/phockup/pull/164) 32 | * Bump pre-commit packages [#165](https://github.com/ivandokov/phockup/pull/165) 33 | * Fix "WARNING: --use-feature=2020-resolver no longer has any effect..." 34 | ##### `1.9.1` 35 | * Specify platforms for Dockerhub action 36 | ##### `1.9.0` 37 | * Rename `unknown` folder [#141](https://github.com/ivandokov/phockup/pull/141) 38 | * Week date format [#142](https://github.com/ivandokov/phockup/pull/142) 39 | * [Update snap to core20](https://github.com/ivandokov/phockup/commit/69783c84fe07b94e9b2c62117cf3c0ae5ca2a29e) 40 | * [Fixed missing dep for snap](https://github.com/ivandokov/phockup/commit/b865b56f31c6fde1eadf71540bcf66ceb7744dd3) 41 | ##### `1.8.0` 42 | * Added support for threads (`--max-concurrency`) to speed up the process [#123](https://github.com/ivandokov/phockup/pull/123) 43 | ##### `1.7.1` 44 | * Fix dependencies due to tqdm [#133](https://github.com/ivandokov/phockup/pull/133) 45 | * Improve check_directories output on error [#132](https://github.com/ivandokov/phockup/pull/132) 46 | * Other improvements [#135](https://github.com/ivandokov/phockup/pull/135), [#128](https://github.com/ivandokov/phockup/pull/128) 47 | ##### `1.7.0` 48 | * Add `--progress` functionality [#118](https://github.com/ivandokov/phockup/pull/118) 49 | * Add pre-commit integration [#121](https://github.com/ivandokov/phockup/pull/121) 50 | ##### `1.6.5` 51 | * Add missing checkout step to the dockerhub deployment action 52 | ##### `1.6.4` 53 | * Add argument "--file-type" to be able to choose between image or video [#114](https://github.com/ivandokov/phockup/issues/114) 54 | * Improved Docker image [#117](https://github.com/ivandokov/phockup/issues/117) 55 | * Automatically deploy new Docker image to Docker Hub [#120](https://github.com/ivandokov/phockup/issues/120) 56 | ##### `1.6.3` 57 | * Fixed double `sed` 58 | ##### `1.6.2` 59 | * Fixed version extraction for snaps 60 | ##### `1.6.1` 61 | * Fixed `--log` argument ([discussion](https://github.com/ivandokov/phockup/pull/106#discussion_r642048830)) 62 | * Fixed multiple custom date fields (`-f|--date-field`) [#113](https://github.com/ivandokov/phockup/issues/113) 63 | ##### `1.6.0` 64 | * Added `--maxdepth` mode [#104](https://github.com/ivandokov/phockup/issues/104) 65 | * Added `--quiet` mode to hide generic output [#103](https://github.com/ivandokov/phockup/issues/103) 66 | * Fixed tests comatibility for Windows [#102](https://github.com/ivandokov/phockup/issues/102) 67 | * Readme updates 68 | ##### `1.5.26` 69 | * Fixed [#98](https://github.com/ivandokov/phockup/issues/98) 70 | * [Disabled automated snap build and deploy for linux/arm64](https://github.com/ivandokov/phockup/issues/99). 71 | ##### `1.5.25` 72 | * Fixed [#97](https://github.com/ivandokov/phockup/issues/97) 73 | ##### `1.5.24` 74 | * Fixed broken `--date` after the merge of [#87](https://github.com/ivandokov/phockup/issues/87) 75 | ##### `1.5.23` 76 | * Removed s930x architecture 77 | * Update snapcraft.yml to more simple setup 78 | ##### `1.5.22` 79 | * Fix quotes 80 | ##### `1.5.21` 81 | * Fix hard coded variable 82 | ##### `1.5.20` 83 | * When taking the current version for snaps get the version for the current architecture and for stable channel 84 | ##### `1.5.19` 85 | * Fixed Homebrew sha256 replacing in the deployment workflow job 86 | ##### `1.5.18` 87 | * Move Homebrew deployment in a separate job because it was executed multiple times because of the python matrix 88 | ##### `1.5.17` 89 | * Fixed workflow wrong step id 90 | ##### `1.5.16` 91 | * Fixed snap deploy condition 92 | ##### `1.5.15` 93 | * Fixed snap build volume directory 94 | ##### `1.5.14` 95 | * Automatic snap deployment thanks to [Daniel Llewellyn's blog post](https://snapcraft.ninja/2020/08/03/snapcraft-continuous-integration-github-actions/) 96 | ##### `1.5.13` 97 | * Fix snapcraft version extraction by [#87](https://github.com/ivandokov/phockup/issues/87) 98 | ##### `1.5.12` 99 | * Merged [#87](https://github.com/ivandokov/phockup/issues/87) 100 | * Merged [#88](https://github.com/ivandokov/phockup/issues/88) 101 | ##### `1.5.11` 102 | * Added Docker support [#75](https://github.com/ivandokov/phockup/issues/75) 103 | ##### `1.5.10` 104 | * Merged [#78](https://github.com/ivandokov/phockup/issues/78) 105 | * Merged [#81](https://github.com/ivandokov/phockup/issues/81) 106 | ##### `1.5.9` 107 | * Fixed [#70](https://github.com/ivandokov/phockup/issues/70) related to Windows issues 108 | ##### `1.5.8` 109 | * Add `--date-field` option to set date extraction fields [#54](https://github.com/ivandokov/phockup/issues/54) 110 | * Handle regex with optional hour information [#62](https://github.com/ivandokov/phockup/issues/62) 111 | * Fix regex support for incomplete time on filename [#55](https://github.com/ivandokov/phockup/issues/55) 112 | * Fix to handle files with illegal characters [#53](https://github.com/ivandokov/phockup/issues/53) 113 | ##### `1.5.7` 114 | * Resolved [#44](https://github.com/ivandokov/phockup/issues/44) 115 | ##### `1.5.6` 116 | * Add `-o | --original-names` option to allow keeping the original filenames 117 | ##### `1.5.5` 118 | * Add `-t` option to allow using file modification time as a last resort 119 | * Workaround EXIF DateTaken time of all-zeros 120 | ##### `1.5.4` 121 | * Handle gracefully files without MIMEType 122 | ##### `1.5.3` 123 | * Handle broken symlinks 124 | ##### `1.5.2` 125 | * Add `SubSecCreateDate` and `SubSecDateTimeOriginal` EXIF dates to the list of allowed ones because exiftool changed the default behavior to not include the subseconds for `CreateDate` and `DateTimeOriginal` 126 | ##### `1.5.1` 127 | * Handle filenames with spaces 128 | ##### `1.5.0` 129 | * Major refactoring. 130 | * Updated all tests. 131 | * Added TravisCI. 132 | ##### `1.4.1` 133 | * Add `-l | --link` flag to link files instead of copy. 134 | ##### `1.4.0` 135 | * Add `-m | --move` flag to move files instead of copy. 136 | ##### `1.3.2` 137 | * More snapcraft.yaml fixes (removed architecture which were producing wrong snaps for amd64). 138 | * Catch some possible write permission for directories and expand absolute path and home directory on *nix 139 | ##### `1.3.1` 140 | * Fixed issue with the snap application and simplified the snapcraft.yaml 141 | ##### `1.3.0` 142 | * Allow different output directories date format with `-d | --date` option. 143 | ##### `1.2.2` 144 | * Allow access to removable media (external HDD, USB, etc) for snap the application 145 | * Continue execution even if date attribute is not present [[#6](https://github.com/ivandokov/phockup/pull/6)] 146 | ##### `1.2.1` 147 | * Windows compatibility fixes 148 | ##### `1.2.0` 149 | * Changed synopsis of the script. `-i|--inputdir` and `-o|--outputdir` are not required anymore. Use first argument for input directory and second for output directory. 150 | * Do not process duplicated files located in different directories. 151 | * Suffix duplicated file names of different files. Sha256 checksum is used for comparison of the source and target files to see if they are identical. 152 | * Ignore `.DS_Store` and `Thumbs.db` files 153 | * Handle case when `exiftool` returns exit code > 0. 154 | * Use `os.walk` instead of `iglob` to support Python < 3.5 155 | * Handle some different date formats from exif data. 156 | ##### `1.1.0` 157 | * Collect all files instead only specified file types. This also enables video sorting. 158 | ##### `1.0.0` 159 | Initial version. 160 | -------------------------------------------------------------------------------- /entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # If the CRON variable is empty, phockup gets executed once as command line tool 4 | if [ -z "$CRON" ]; then 5 | phockup "$@" 6 | 7 | # When CRON is not empty, phockup will run in a cron job until the container is stopped. 8 | else 9 | if [ -f /tmp/phockup.lockfile ]; then 10 | rm /tmp/phockup.lockfile 11 | fi 12 | 13 | CRON_COMMAND="$CRON flock -n /tmp/phockup.lockfile phockup /mnt/input /mnt/output $OPTIONS" 14 | 15 | echo "$CRON_COMMAND" >> /etc/crontabs/root 16 | echo "cron job has been set up with command: $CRON_COMMAND" 17 | 18 | crond -f -d 8 19 | fi 20 | -------------------------------------------------------------------------------- /license: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Ivan Dokov - www.dokov.bg 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /phockup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import logging.handlers 5 | import os 6 | import re 7 | import sys 8 | 9 | from src.date import Date 10 | from src.dependency import check_dependencies 11 | from src.phockup import Phockup 12 | 13 | __version__ = '1.13.0' 14 | 15 | PROGRAM_DESCRIPTION = """\ 16 | Media sorting tool to organize photos and videos from your camera in folders by year, \ 17 | month and day. 18 | The software will collect all files from the input directory and copy them to the output 19 | directory without changing the files content. It will only rename the files and place 20 | them in the proper directory for year, month and day. 21 | """ 22 | 23 | DEFAULT_DIR_FORMAT = ['%Y', '%m', '%d'] 24 | 25 | logger = logging.getLogger('phockup') 26 | 27 | 28 | def parse_args(args=sys.argv[1:]): 29 | parser = argparse.ArgumentParser( 30 | description=PROGRAM_DESCRIPTION, 31 | formatter_class=argparse.RawTextHelpFormatter) 32 | 33 | parser.version = f"v{__version__}" 34 | 35 | parser.add_argument( 36 | '-v', 37 | '--version', 38 | action='version', 39 | ) 40 | 41 | parser.add_argument( 42 | '-d', 43 | '--date', 44 | action='store', 45 | type=Date.parse, 46 | help="""\ 47 | Specify date format for OUTPUTDIR directories. 48 | 49 | You can choose different year format (e.g. 17 instead of 2017) or decide to skip the 50 | day directories and have all photos sorted in year/month. 51 | 52 | Supported formats: 53 | YYYY - 2016, 2017 ... 54 | YY - 16, 17 ... 55 | MM - 07, 08, 09 ... 56 | M - July, August, September ... 57 | m - Jul, Aug, Sept ... 58 | DD - 27, 28, 29 ... (day of month) 59 | DDD - 123, 158, 365 ... (day of year) 60 | U - 00, 01, 53 ... (week of the year, Sunday first day of week) 61 | W - 00, 01, 53 ... (week of the year, Monday first day of week) 62 | 63 | Example: 64 | YYYY/MM/DD -> 2011/07/17 65 | YYYY/M/DD -> 2011/July/17 66 | YYYY/m/DD -> 2011/Jul/17 67 | YY/m-DD -> 11/Jul-17 68 | YYYY/U -> 2011/30 69 | YYYY/W -> 2011/28 70 | """, 71 | ) 72 | 73 | exclusive_group_link_move = parser.add_mutually_exclusive_group() 74 | 75 | exclusive_group_link_move.add_argument( 76 | '-m', 77 | '--move', 78 | action='store_true', 79 | help="""\ 80 | Instead of copying the process will move all files from the INPUTDIR to the OUTPUTDIR. 81 | This is useful when working with a big collection of files and the remaining free space 82 | is not enough to make a copy of the INPUTDIR. 83 | """, 84 | ) 85 | 86 | exclusive_group_link_move.add_argument( 87 | '-l', 88 | '--link', 89 | action='store_true', 90 | help="""\ 91 | Instead of copying the process will make hard links to all files in INPUTDIR and place 92 | them in the OUTPUTDIR. 93 | This is useful when working with working structure and want to create YYYY/MM/DD 94 | structure to point to same files. 95 | """, 96 | ) 97 | 98 | parser.add_argument( 99 | '-o', 100 | '--original-names', 101 | action='store_true', 102 | help="""\ 103 | Organize the files in selected format or using the default year/month/day format but 104 | keep original filenames. 105 | """, 106 | ) 107 | 108 | parser.add_argument( 109 | '-t', 110 | '--timestamp', 111 | action='store_true', 112 | help="""\ 113 | Use the timestamp of the file (last modified date) if there is no EXIF date information. 114 | If the user supplies a regex, it will be used if it finds a match in the filename. 115 | This option is intended as "last resort" since the file modified date may not be 116 | accurate, nevertheless it can be useful if no other date information can be obtained. 117 | """, 118 | ) 119 | 120 | parser.add_argument( 121 | '-y', 122 | '--dry-run', 123 | action='store_true', 124 | help="""\ 125 | Does a trial run with no permanent changes to the filesystem. 126 | So it will not move any files, just shows which changes would be done. 127 | """, 128 | ) 129 | 130 | parser.add_argument( 131 | '-c', 132 | '--max-concurrency', 133 | type=int, 134 | default=1, 135 | choices=range(1, 255), 136 | metavar='1-255', 137 | help="""\ 138 | Sets the level of concurrency for processing files in a directory. 139 | Defaults to 1. Higher values can improve throughput of file operations 140 | """, 141 | ) 142 | 143 | parser.add_argument( 144 | '--maxdepth', 145 | type=int, 146 | default=-1, 147 | choices=range(0, 255), 148 | metavar='1-255', 149 | help="""\ 150 | Descend at most 'maxdepth' levels (a non-negative integer) of directories 151 | """, 152 | ) 153 | 154 | parser.add_argument( 155 | '-r', 156 | '--regex', 157 | action='store', 158 | type=re.compile, 159 | help="""\ 160 | Specify date format for date extraction from filenames if there is no EXIF date 161 | information. 162 | 163 | Example: 164 | {regex} 165 | can be used to extract the date from file names like the following 166 | IMG_27.01.2015-19.20.00.jpg. 167 | """, 168 | ) 169 | 170 | parser.add_argument( 171 | '-f', 172 | '--date-field', 173 | action='store', 174 | help="""\ 175 | Use a custom date extracted from the exif field specified. 176 | To set multiple fields to try in order until finding a valid date, use spaces to 177 | separate fields inside a string. 178 | 179 | Example: 180 | DateTimeOriginal 181 | "DateTimeOriginal CreateDate FileModifyDate" 182 | 183 | These fields are checked by default when this argument is not set: 184 | "SubSecCreateDate SubSecDateTimeOriginal CreateDate DateTimeOriginal" 185 | 186 | To get all date fields available for a file, do: 187 | exiftool -time:all -mimetype -j 188 | """, 189 | ) 190 | 191 | exclusive_group_debug_silent = parser.add_mutually_exclusive_group() 192 | 193 | exclusive_group_debug_silent.add_argument( 194 | '--debug', 195 | action='store_true', 196 | default=False, 197 | help="""\ 198 | Enable debugging. Alternately, set the LOGLEVEL environment variable to DEBUG 199 | """, 200 | ) 201 | 202 | exclusive_group_debug_silent.add_argument( 203 | '--quiet', 204 | action='store_true', 205 | default=False, 206 | help="""\ 207 | Run without output. 208 | """, 209 | ) 210 | 211 | exclusive_group_debug_silent.add_argument( 212 | '--progress', 213 | action='store_true', 214 | default=False, 215 | help="""\ 216 | Run with progressbar output. 217 | """, 218 | ) 219 | 220 | parser.add_argument( 221 | '--log', 222 | action='store', 223 | help="""\ 224 | Specify the output directory where your log file should be exported. 225 | This flag can be used in conjunction with the flag `--quiet` or `--progress`. 226 | """, 227 | ) 228 | 229 | parser.add_argument( 230 | 'input_dir', 231 | metavar='INPUTDIR', 232 | help="""\ 233 | Specify the source directory where your photos are located. 234 | """, 235 | ) 236 | 237 | parser.add_argument( 238 | 'output_dir', 239 | metavar='OUTPUTDIR', 240 | help="""\ 241 | Specify the output directory where your photos should be exported. 242 | """, 243 | ) 244 | 245 | parser.add_argument( 246 | '--file-type', 247 | type=str, 248 | choices=['image', 'video'], 249 | metavar='image|video', 250 | help="""\ 251 | By default, Phockup addresses both image and video files. 252 | If you want to restrict your command to either images or 253 | videos only, use `--file-type=[image|video]`. 254 | """, 255 | ) 256 | 257 | parser.add_argument( 258 | '--no-date-dir', 259 | type=str, 260 | default=Phockup.DEFAULT_NO_DATE_DIRECTORY, 261 | help="""\ 262 | Files without EXIF date information are placed in a directory 263 | named 'unknown' by default. This option overrides that 264 | folder name. e.g. --no-date-dir=misc, --no-date-dir="no date" 265 | """, 266 | ) 267 | 268 | parser.add_argument( 269 | '--skip-unknown', 270 | action='store_true', 271 | default=False, 272 | help="""\ 273 | Ignore files that don't contain valid EXIF data for the criteria specified. 274 | This is useful if you intend to make multiple passes over an input directory 275 | with varying and specific EXIF fields that are note checked by default. 276 | """, 277 | ) 278 | 279 | parser.add_argument( 280 | '--movedel', 281 | action='store_true', 282 | default=False, 283 | help="""\ 284 | DELETE source files which are determined to be duplicates of files 285 | already transferred. Only valid in conjunction with both `--move` 286 | and `--skip-unknown`. 287 | """, 288 | ) 289 | 290 | parser.add_argument( 291 | '--rmdirs', 292 | action='store_true', 293 | default=False, 294 | help="""\ 295 | DELETE empty directories after processing. Only valid in 296 | conjunction with `--move`. 297 | """, 298 | ) 299 | 300 | parser.add_argument( 301 | '--output_prefix', 302 | type=str, 303 | default='', 304 | help="""\ 305 | String to prepend to the output directory to aid in sorting 306 | files by an additional level prior to sorting by date. This 307 | string will immediately follow the output path and is intended 308 | to allow runtime setting of the output path (e.g. via $USER, 309 | $HOSTNAME, %%USERNAME%%, etc.) 310 | """, 311 | ) 312 | 313 | parser.add_argument( 314 | '--output_suffix', 315 | type=str, 316 | default='', 317 | help="""\ 318 | String to append to the destination directory to aid in sorting 319 | files by an additional level after sorting by date. 320 | """, 321 | ) 322 | 323 | parser.add_argument( 324 | '--from-date', 325 | type=str, 326 | default=None, 327 | help="""\ 328 | Limit the operations to the files that are newer than --from-date (inclusive). 329 | The date must be specified in format YYYY-MM-DD. Files with unknown date won't be skipped. 330 | """, 331 | ) 332 | 333 | parser.add_argument( 334 | '--to-date', 335 | type=str, 336 | default=None, 337 | help="""\ 338 | Limit the operations to the files that are older than --to-date (inclusive). 339 | The date must be specified in format YYYY-MM-DD. Files with unknown date won't be skipped. 340 | """, 341 | ) 342 | 343 | return parser.parse_args(args) 344 | 345 | 346 | def setup_logging(options): 347 | """Configure logging.""" 348 | root = logging.getLogger('') 349 | root.setLevel(logging.WARNING) 350 | formatter = logging.Formatter( 351 | '[%(asctime)s] - [%(levelname)s] - %(message)s', '%Y-%m-%d %H:%M:%S') 352 | ch = logging.StreamHandler() 353 | ch.setFormatter(formatter) 354 | root.addHandler(ch) 355 | if not options.quiet ^ options.progress: 356 | logger.setLevel(options.debug and logging.DEBUG or logging.INFO) 357 | else: 358 | logger.setLevel(logging.WARNING) 359 | if options.log: 360 | logfile = os.path.expanduser(options.log) 361 | fh = logging.FileHandler(logfile) 362 | fh.setFormatter(formatter) 363 | logger.addHandler(fh) 364 | logger.debug("Debug logging output enabled.") 365 | logger.debug("Running Phockup version %s", __version__) 366 | 367 | 368 | def main(options): 369 | check_dependencies() 370 | 371 | return Phockup( 372 | options.input_dir, 373 | options.output_dir, 374 | dir_format=options.date, 375 | move=options.move, 376 | link=options.link, 377 | date_regex=options.regex, 378 | original_filenames=options.original_names, 379 | timestamp=options.timestamp, 380 | date_field=options.date_field, 381 | dry_run=options.dry_run, 382 | quiet=options.quiet, 383 | progress=options.progress, 384 | max_depth=options.maxdepth, 385 | file_type=options.file_type, 386 | max_concurrency=options.max_concurrency, 387 | no_date_dir=options.no_date_dir, 388 | skip_unknown=options.skip_unknown, 389 | movedel=options.movedel, 390 | rmdirs=options.rmdirs, 391 | output_prefix=options.output_prefix, 392 | output_suffix=options.output_suffix, 393 | from_date=options.from_date, 394 | to_date=options.to_date 395 | ) 396 | 397 | 398 | if __name__ == '__main__': 399 | try: 400 | options = parse_args() 401 | setup_logging(options) 402 | main(options) 403 | except Exception as e: 404 | logger.warning(e) 405 | sys.exit(1) 406 | except KeyboardInterrupt: 407 | logger.error("Exiting phockup...") 408 | sys.exit(1) 409 | sys.exit(0) 410 | -------------------------------------------------------------------------------- /phockup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export LANGUAGE=C 4 | export LC_ALL=C 5 | export LANG=C 6 | 7 | # figure out the snap architecture lib name 8 | case $SNAP_ARCH in 9 | amd64) 10 | ARCH_LIB_NAME="x86_64-linux-gnu" 11 | ;; 12 | arm64) 13 | ARCH_LIB_NAME="aarch64-linux-gnu" 14 | ;; 15 | *) 16 | # unsupported or unknown architecture 17 | exit 1 18 | ;; 19 | esac 20 | 21 | PERL_VERSION=$(perl -version | grep -Po '\(v\K([^\)]*)') 22 | 23 | PERL5LIB="$PERL5LIB:$SNAP/usr/lib/$ARCH_LIB_NAME/perl/$PERL_VERSION" 24 | PERL5LIB="$PERL5LIB:$SNAP/usr/share/perl/$PERL_VERSION" 25 | PERL5LIB="$PERL5LIB:$SNAP/usr/share/perl5" 26 | 27 | export PERL5LIB 28 | 29 | exec "$SNAP/usr/bin/python3" "$SNAP/phockup.py" "$@" 30 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Phockup 2 | 3 | [![Tests](https://github.com/ivandokov/phockup/actions/workflows/tests.yml/badge.svg)](https://github.com/ivandokov/phockup/actions/workflows/tests.yml) 4 | [![Deploy](https://github.com/ivandokov/phockup/actions/workflows/deploy.yml/badge.svg)](https://github.com/ivandokov/phockup/actions/workflows/deploy.yml) 5 | [![Lint](https://github.com/ivandokov/phockup/actions/workflows/lint.yml/badge.svg)](https://github.com/ivandokov/phockup/actions/workflows/lint.yml) 6 | [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](license) 7 | 8 | Media sorting tool to organize photos and videos from your camera in folders by year, month and day. 9 | 10 | ## How it works 11 | The software will collect all files from the input directory and copy them to the output directory without changing the files content. It will only rename the files and place them in the proper directory for year, month and day. 12 | 13 | All files which are not images or videos or those which do not have creation date information will be placed in a directory called `unknown` without file name change. By doing this you can be sure that the input directory can be safely deleted after the successful process completion because **all** files from the input directory have a copy in the output directory. 14 | 15 | If the target file already exists, its checksum is compared with the source to determine if it is a duplicate. If the checksums are different, we do not have a duplicate and the target filename will be suffixed with a number, for example "-1". If the checksums match, the copy operation will be skipped. 16 | 17 | ## Installation 18 | 19 | ### Docker 20 | 21 | The docker container supports two operation modes. The first allows for a single execution of phockup. In this mode, the container will be stopped after the execution is complete. The second mode allows for execution in intervals. In this mode, the container will continue running until the user decides to stop it. 22 | 23 | #### Single execution mode 24 | In this mode, all phockup parameters need to be passed as direct parameters within the docker run command. As you define a complete set of phockup parameters for this execution mode, this includes the paths to the input and output folders within the container. 25 | To execute phockup only once, use the following command: 26 | 27 | ``` 28 | docker run -v ~/Pictures:/mnt ivandokov/phockup:latest /mnt/input /mnt/output [PHOCKUP ARGUMENTS] 29 | ``` 30 | 31 | #### Continuous execution mode 32 | In this mode, all relevant settings are defined through environment variables and volume mappings. The folders where phockup moves files are always /mnt/input and /mnt/output within the container and can not be changed. You can of course map any folder on your host system to those folders within the container. 33 | 34 | The `-v ~/Pictures/input:/mnt/input` part of the command mounts your `~/Pictures/input` directory to `/mnt/input` inside the container. The same is done for the output folder. You can pass any **absolute** path to be mounted to the container and later on be used as paths for the `phockup` command. The example above provides your `~/Pictures/input` as `INPUTDIR` and `~/Pictures/output` as `OUTPUDIR`. You can pass additional arguments through the `OPTIONS` environment variable. 35 | 36 | To keep the container running and execute phockup in intervals, use the following command: 37 | 38 | ``` 39 | docker run -v ~/Pictures/input:/mnt/input -v ~/Pictures/output:/mnt/output -e "CRON=* * * * *" -e "OPTIONS=[PHOCKUP ARGUMENTS]" ivandokov/phockup:latest 40 | ``` 41 | 42 | This will execute phockup once every minute (as defined by the [value of the CRON environment variable](https://crontab.guru/#*_*_*_*_*)). However, the container will not spawn a new phockup process if another phockup process is still running. You can define other intervals for execution using the usual cron syntax. If you want to pass further arguments to phockup, use the OPTIONS environment variable. In this execution mode, phockup will always use the directories mounted to `/mnt/input` and `/mnt/output` and ignore arguments passed in the style of the single execution mode. 43 | 44 | ### Mac 45 | Requires [Homebrew](http://brew.sh/) 46 | ``` 47 | brew tap ivandokov/homebrew-contrib 48 | brew install phockup 49 | ``` 50 | 51 | ### Linux (snap) 52 | Requires [snapd](https://snapcraft.io/docs/core/install) 53 | ``` 54 | sudo snap install phockup 55 | ``` 56 | *Note: snap applications can access files only in your **home and `/media` directories** for security reasons. If your media files are not located in these directories you should use the installation method below. 57 | If your files are in `/media` you should run the following command to allow access:* 58 | ``` 59 | sudo snap connect phockup:removable-media 60 | ``` 61 | 62 | ### Linux (without snap) 63 | If you are using distro which doesn't support [snapd](https://snapcraft.io/docs/core/install) or you don't want to download the snap you can use the following commands to download the source and set it up 64 | ``` 65 | sudo apt-get install python3 libimage-exiftool-perl -y 66 | curl -L https://github.com/ivandokov/phockup/archive/latest.tar.gz -o phockup.tar.gz 67 | tar -zxf phockup.tar.gz 68 | sudo mv phockup-* /opt/phockup 69 | cd /opt/phockup 70 | pip3 install -r requirements.txt 71 | sudo ln -s /opt/phockup/phockup.py /usr/local/bin/phockup 72 | ``` 73 | 74 | ### Linux (AUR) 75 | 76 | If you are an arch user you can install from the [aur](https://aur.archlinux.org/packages/phockup). 77 | 78 | For example using [yay](https://github.com/Jguer/yay): 79 | 80 | ```bash 81 | yay -S phockup 82 | ``` 83 | 84 | ### Windows 85 | * Download and install latest stable [Python 3](https://www.python.org/downloads/windows/) 86 | * Download Phockup's [latest release](https://github.com/ivandokov/phockup/archive/latest.tar.gz) and extract the archive 87 | * Download exiftool from the official [website](https://exiftool.org/) and extract the archive 88 | * Rename `exiftool(-k).exe` to `exiftool.exe` 89 | * Move `exiftool.exe` to phockup folder 90 | * Open Command Prompt and `cd` to phockup folder 91 | * Use the command below (use `phockup.py` instead of `phockup`) 92 | 93 | ## Usage 94 | Organize photos from one directory into another 95 | ``` 96 | phockup INPUTDIR OUTPUTDIR 97 | ``` 98 | 99 | `INPUTDIR` is the directory where your photos are located. 100 | `OUTPUTDIR` is the directory where your **sorted** photos will be stored. It could be a new not existing directory. 101 | 102 | Example: 103 | ``` 104 | phockup ~/Pictures/camera ~/Pictures/sorted 105 | ``` 106 | 107 | ### Version 108 | If you want to view the version of phockup use the flag `-v | --version`. 109 | 110 | ### Date format 111 | If you want to change the output directories date format you can do it by passing the format as `-d | --date` argument. 112 | You can choose different year format (e.g. 17 instead of 2017) or decide 113 | to skip the day directories and have all photos sorted in year/month. 114 | 115 | ``` 116 | Supported formats: 117 | YYYY - 2016, 2017 ... 118 | YY - 16, 17 ... 119 | MM - 07, 08, 09 ... 120 | M - July, August, September ... 121 | m - Jul, Aug, Sept ... 122 | DD - 27, 28, 29 ... (day of month) 123 | DDD - 123, 158, 365 ... (day of year) 124 | U - 00, 01, 53 ... (week of the year, Sunday first day of week) 125 | W - 00, 01, 53 ... (week of the year, Monday first day of week) 126 | 127 | Example: 128 | YYYY/MM/DD -> 2011/07/17 129 | YYYY/M/DD -> 2011/July/17 130 | YYYY/m/DD -> 2011/Jul/17 131 | YY/m-DD -> 11/Jul-17 132 | YYYY/U -> 2011/30 133 | YYYY/W -> 2011/28 134 | ``` 135 | 136 | ### Prefix/Suffix 137 | In order to support both aggregation and finer granularity of files 138 | sorted, you can specify a prefix or suffix (or both) to aid in storing 139 | files in directories beyond strictly date. 140 | 141 | *NOTE:* Prefixes and suffixes will also apply to the **'unknown'** folder to 142 | isolate files that cannot be processed into their respective folders. 143 | This creates a bit more chaos for 'unknown' files, but should allow 144 | them to be managed by whomever they "belong" to. 145 | 146 | #### Prefix 147 | `--output-prefix` flag can be used to specify a directory to be 148 | appended to the `OUTPUTDIR`, and thus prepended to the date. 149 | 150 | For example: 151 | ``` 152 | phockup ~/Pictures/camera /mnt/sorted --output_prefix=nikon 153 | ``` 154 | would place files in folders similar to: 155 | ``` 156 | /mnt/sorted/nikon/2011/07/17 157 | /mnt/sorted/nikon/unknown 158 | ``` 159 | 160 | While it may seem to be redundant with `OUTPUTDIR`, this flag is 161 | intended to add support for more cleanly determining the output 162 | directory at run-time via environment variable expansion (i.e. use 163 | $USER, %USERNAME%, $HOSTNAME, etc. to aggregate files) 164 | 165 | For example: 166 | ``` 167 | phockup ~/Pictures/camera /mnt/sorted --output_prefix=$USER 168 | ``` 169 | 170 | would yield an output directory of 171 | ``` 172 | /mnt/sorted/ivandokov/2011/07/17 173 | /mnt/sorted/ivandokov/unknown 174 | ``` 175 | 176 | This allows the same script to be deployed to multiple users/machines 177 | and allows sorting into their respective top level directories. 178 | 179 | #### Suffix 180 | `--output-suffix` flag can be used to specify a directory within the 181 | target date directory for a file. This allows files to be sorted in 182 | their respective date/time folders while additionally adding a 183 | directory based on the suffix value for additional metadata. 184 | 185 | For example: 186 | ``` 187 | phockup ~/Pictures/DCIM/NIKOND40 /mnt/sorted --output_suffix=nikon 188 | phockup ~/Pictures/DCIM/100APPLE /mnt/sorted --output_suffix=iphone 189 | ``` 190 | 191 | This would allow files to be stored in the following structure: 192 | 193 | ``` 194 | /mnt/sorted/2011/07/17/nikon/DCS_0001.NEF 195 | ... 196 | /mnt/sorted/2011/07/17/nikon/DCS_0099.NEF 197 | /mnt/sorted/unknown/nikon/ 198 | 199 | /mnt/sorted/2011/07/17/iphone/ABIL6163.HEIC 200 | ... 201 | /mnt/sorted/2011/07/17/iphone/YZYE9497.HEIC 202 | /mnt/sorted/unknown/iphone/ 203 | ``` 204 | 205 | The output suffix also allows for environment variable expansion (e.g. 206 | $USER, $HOSTNAME, %USERNAME%, etc.) allowing dynamic folders to 207 | represent additional metadata about the images. 208 | 209 | For example: 210 | 211 | ``` 212 | phockup ~/Pictures/ /mnt/sorted --output_suffix=$HOSTNAME 213 | 214 | or 215 | 216 | phockup ~/Pictures/ /mnt/sorted --output_suffix=$USER 217 | ``` 218 | could be used to sort images based on the source computer or user, 219 | perventing hetrogenous collections of images from disparate sources 220 | saving to the same central respository. 221 | 222 | The two options above can be used to help sort/store images 223 | 224 | #### Limit files processed by date 225 | `--from-date` flag can be used to limit the operations to the files that are newer than the provided date (inclusive). 226 | The date must be specified in format YYYY-MM-DD. Files with unknown date won't be skipped. 227 | 228 | For example: 229 | ``` 230 | phockup ~/Pictures/DCIM/NIKOND40 ~/Pictures/sorted --from-date="2017-01-02" 231 | ``` 232 | `--to-date` flag can be used to limit the operations to the files that are older than the provided date (inclusive). 233 | The date must be specified in format YYYY-MM-DD. Files with unknown date won't be skipped. 234 | 235 | For example: 236 | ``` 237 | phockup ~/Pictures/DCIM/NIKOND40 ~/Pictures/sorted --to-date="2017-01-02" 238 | ``` 239 | 240 | `--from-date` and `--to-date` can be combined for better control over the files that are processed. 241 | 242 | For example: 243 | ``` 244 | phockup ~/Pictures/DCIM/NIKOND40 ~/Pictures/sorted --from-date="2017-01-02" --to-date="2017-01-03" 245 | ``` 246 | 247 | ### Missing date information in EXIF 248 | If any of the photos does not have date information you can use the `-r | --regex` option to specify date format for date extraction from filenames: 249 | ``` 250 | --regex="(?P\d{2})\.(?P\d{2})\.(?P\d{4})[_-]?(?P\d{2})\.(?P\d{2})\.(?P\d{2})" 251 | ``` 252 | 253 | As a last resort, specify the `-t | --timestamp` option to use the file modification timestamp. This may not be accurate in all cases but can provide some kind of date if you'd rather it not go into the `unknown` folder. 254 | 255 | ### Move files 256 | Instead of copying the process will move all files from the INPUTDIR to the OUTPUTDIR by using the flag `-m | --move`. This is useful when working with a big collection of files and the remaining free space is not enough to make a copy of the INPUTDIR. 257 | 258 | ### Link files 259 | Instead of copying the process will create hard link all files from the INPUTDIR into new structure in OUTPUTDIR by using the flag `-l | --link`. This is useful when working with good structure of photos in INPUTDIR (like folders per device). 260 | 261 | ### Original filenames 262 | Organize the files in selected format or using the default year/month/day format but keep original filenames by using the flag `-o | --original-names`. 263 | 264 | ### File Type 265 | By default, Phockup addresses both image and video files. If you want to restrict your command to either images or videos only, use `--file-type=[image|video]`. 266 | 267 | ### Fix incorrect dates 268 | If date extracted from photos is incorrect, you can use the `-f | --date-field` option to set the correct exif field to get date information from. Use this command to list which fields are available for a file: 269 | ``` 270 | exiftool -time:all -mimetype -j file.jpg 271 | ``` 272 | The output may look like this, but with more fields: 273 | ``` 274 | [{ 275 | "DateTimeOriginal": "2017:10:06 01:01:01", 276 | "CreateDate": "2017:01:01 01:01:01", 277 | ]} 278 | ``` 279 | If the correct date is in `DateTimeOriginal`, you can include the option `--date-field=DateTimeOriginal` to get date information from it. 280 | To set multiple fields to be tried in order until a valid date is found, just join them with spaces in a quoted string like `"CreateDate FileModifyDate"`. 281 | 282 | ### Dry run 283 | If you want phockup to run without any changes (don't copy/move any files) but just show which changes would be done, enable this feature by using the flag `-y | --dry-run`. 284 | 285 | ### Log 286 | If you want phockup to run and store the output in a log file use the flag `--log`. This flag can be used in conjunction with the flags `--quiet` or `--progress`. 287 | ``` 288 | --log=/log.txt 289 | ``` 290 | 291 | ### Quiet run 292 | If you want phockup to run without any output (displaying only error messages, and muting all progress messages) use the flag `--quiet`. 293 | 294 | ### Progress run 295 | If you want phockup to run with a progressbar (displaying only the progress and muting all progress messages (including errors)) use the flag `--progress`. 296 | 297 | 298 | ### Limit directory traversal depth 299 | If you would like to limit how deep the directories are traversed, you can use the `--maxdepth` option to specify the maximum number of levels below the input directory to process. In order to process only the input directory, you can disable sub-directory processing with: 300 | `--maxdepth=0` The current implementation is limited to a maximum depth of 255. 301 | 302 | ### Improving throughput with concurrency 303 | If you want to allocate additional CPUs/cores to the image processing 304 | operations, you can specify additional resources via the 305 | `--max-concurrency` flag. Specifying `--max-concurrency=n`, where `n` 306 | represents the maximum number of operations to attempt 307 | concurrently, will leverage the additional CPU resources to start 308 | additional file operations while waiting for file I/O. This can lead 309 | to significant increases in file processing throughput. 310 | 311 | Due to how concurrency is implemented in Phockup (specifically 312 | `ThreadPoolExecutor`), this option has the greatest impact on 313 | directories with a large numbers of files in them, 314 | versus many directories with small numbers of files in each. As a 315 | general rule, the concurrency _should not_ be set higher than the 316 | core-count of the system processing the images. 317 | 318 | `--max-concurrency=1` has the default behavior of no concurrency while 319 | processing the files in the directories. Beginning with 50% of the 320 | cores available is a good start. Larger numbers can have 321 | diminishing returns as the number of concurrent operations saturate 322 | the file I/O of the system. 323 | 324 | Concurrently processing files does have an impact on the order that 325 | messages are written to the console/log and the ability to quickly 326 | terminate the program, as the execution waits for all in-flight 327 | operations to complete before shutting down. 328 | 329 | ## Development 330 | 331 | ### Running tests 332 | To run the tests, first install the dev dependencies using 333 | 334 | ```bash 335 | pip3 install -r requirements-dev.txt 336 | ``` 337 | 338 | Then run the tests using 339 | 340 | ```bash 341 | pytest 342 | ``` 343 | 344 | To run the tests with coverage reports run 345 | ```bash 346 | pytest --cov-report term-missing:skip-covered --cov=src tests/ 347 | ``` 348 | 349 | Please add the necessary tests when committing a feature or improvement. 350 | 351 | 352 | ### Pre-commit checks 353 | We leverage the [pre-commit](https://pre-commit.com/) framework to automate some general linting/quality checks. 354 | 355 | To install the hooks, from within the activated virtualenv run: 356 | 357 | ```bash 358 | pre-commit install 359 | ``` 360 | 361 | To manually execute the hooks, run: 362 | 363 | ```bash 364 | pre-commit run -a 365 | ``` 366 | 367 | ### Style Guide Ruleset 368 | Please make sure that the code is compliant as described below when committing a feature or improvement. 369 | 370 | #### Flake8 371 | We use [flake8](https://flake8.pycqa.org/en/latest/) to check the PEP 8 ruleset. 372 | 373 | Code style for the line length are following the description of the tool [black](https://black.readthedocs.io/en/stable/the_black_code_style.html#line-length) 374 | In a nutshell, this comes down to 88 characters per line. This number was found to produce significantly shorter files. 375 | 376 | #### isort 377 | We also use [isort](https://github.com/PyCQA/isort) to check if import are sorted alphabetically, separated into sections and by type. 378 | 379 | ##### single-quotes and double-quotes 380 | We try to adhere to the following as much as possible: 381 | Use single-quotes for string literals, e.g. 'my-identifier', but use double-quotes for strings that are likely to contain single-quote characters as part of the string itself (such as error messages, or any strings containing natural language), e.g. "You've got an error!". 382 | 383 | Single-quotes are easier to read and to type, but if a string contains single-quote characters then double-quotes are better than escaping the single-quote characters or wrapping the string in double single-quotes. 384 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | pytest 3 | pytest-mock 4 | pytest-cov 5 | pytest-socket 6 | pre-commit 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tqdm==4.66.1 2 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivandokov/phockup/8591be3ac6de83a1755e3aa35dd1fddba9e854e0/src/__init__.py -------------------------------------------------------------------------------- /src/date.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from datetime import datetime, timedelta 4 | 5 | 6 | class Date: 7 | def __init__(self, filename=None): 8 | self.filename = filename 9 | 10 | @staticmethod 11 | def parse(date: str) -> str: 12 | date = date.replace('YYYY', '%Y') # 2017 (year) 13 | date = date.replace('YY', '%y') # 17 (year) 14 | date = date.replace('m', '%b') # Dec (month) 15 | date = date.replace('MM', '%m') # 12 (month) 16 | date = date.replace('M', '%B') # December (month) 17 | date = date.replace('DDD', '%j') # 123 (day or year) 18 | date = date.replace('DD', '%d') # 25 (day) 19 | date = date.replace('U', '%U') # Week number (Sunday as the first day) 20 | date = date.replace('W', '%W') # Week number (Monday as the first day) 21 | date = date.replace('\\', os.path.sep) # path separator 22 | date = date.replace('/', os.path.sep) # path separator 23 | return date 24 | 25 | @staticmethod 26 | def strptime(date, date_format): 27 | return datetime.strptime(date, date_format) 28 | 29 | @staticmethod 30 | def build(date_object): 31 | return datetime( 32 | date_object['year'], date_object['month'], date_object['day'], 33 | date_object['hour'] if date_object.get('hour') else 0, 34 | date_object['minute'] if date_object.get('minute') else 0, 35 | date_object['second'] if date_object.get('second') else 0) 36 | 37 | def from_exif(self, exif, timestamp=None, user_regex=None, date_field=None): 38 | if date_field: 39 | keys = date_field.split() 40 | else: 41 | keys = ['SubSecCreateDate', 'SubSecDateTimeOriginal', 'CreateDate', 42 | 'DateTimeOriginal'] 43 | 44 | datestr = None 45 | 46 | for key in keys: 47 | # Skip 'bad' dates that return integers (-1) or have the format 0000... 48 | if key in exif and isinstance(exif[key], str) and not exif[key].startswith('0000'): 49 | datestr = exif[key] 50 | break 51 | 52 | # sometimes exif data can return all zeros 53 | # check to see if valid date first 54 | # sometimes this returns an int 55 | if datestr and isinstance(datestr, str) and not \ 56 | datestr.startswith('0000'): 57 | parsed_date = self.from_datestring(datestr) 58 | else: 59 | parsed_date = {'date': None, 'subseconds': ''} 60 | 61 | # apply TimeZone if available 62 | if exif.get('TimeZone') is not None and isinstance(exif['TimeZone'], str): 63 | timezonedata = exif['TimeZone'].split(':') 64 | if timezonedata and len(timezonedata) == 2: 65 | parsed_date['date'] = parsed_date['date'] + timedelta(hours=int(timezonedata[0]), minutes=int(timezonedata[1])) 66 | 67 | if parsed_date.get('date') is not None: 68 | return parsed_date 69 | else: 70 | if self.filename: 71 | return self.from_filename(user_regex, timestamp) 72 | else: 73 | return parsed_date 74 | 75 | @staticmethod 76 | def from_datestring(datestr) -> dict: 77 | datestr = datestr.split('.') 78 | date = datestr[0] 79 | if len(datestr) > 1: 80 | subseconds = datestr[1] 81 | else: 82 | subseconds = '' 83 | search = r'(.*)([+-]\d{2}:\d{2})' 84 | if re.search(search, date) is not None: 85 | date = re.sub(search, r'\1', date) 86 | try: 87 | parsed_date_time = Date.strptime(date, '%Y:%m:%d %H:%M:%S') 88 | except ValueError: 89 | try: 90 | parsed_date_time = Date.strptime(date, '%Y-%m-%d %H:%M:%S') 91 | except ValueError: 92 | parsed_date_time = None 93 | if re.search(search, subseconds) is not None: 94 | subseconds = re.sub(search, r'\1', subseconds) 95 | return { 96 | 'date': parsed_date_time, 97 | 'subseconds': subseconds 98 | } 99 | 100 | def from_filename(self, user_regex, timestamp=None): 101 | # If missing datetime from EXIF data check if filename is in datetime 102 | # format. For this use a user provided regex if possible. Otherwise 103 | # assume a filename such as IMG_20160915_123456.jpg as default. 104 | default_regex = re.compile(r'.*[_-](?P\d{4})(?P\d{2})(?P\d{2})[_-]?(?P\d{2})(?P\d{2})(?P\d{2})') 105 | regex = user_regex or default_regex 106 | matches = regex.search(os.path.basename(self.filename)) 107 | 108 | if matches: 109 | try: 110 | match_dir = matches.groupdict(default='0') 111 | # Convert str to int 112 | match_dir = dict([a, int(x)] for a, x in match_dir.items()) 113 | date = self.build(match_dir) 114 | except (KeyError, ValueError): 115 | date = None 116 | 117 | if date: 118 | return { 119 | 'date': date, 120 | 'subseconds': '' 121 | } 122 | 123 | if timestamp: 124 | return self.from_timestamp() 125 | 126 | def from_timestamp(self) -> dict: 127 | date = datetime.fromtimestamp(os.path.getmtime(self.filename)) 128 | return { 129 | 'date': date, 130 | 'subseconds': '' 131 | } 132 | -------------------------------------------------------------------------------- /src/dependency.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import shutil 3 | 4 | logger = logging.getLogger('phockup') 5 | 6 | 7 | def check_dependencies(): 8 | if shutil.which('exiftool') is None: 9 | raise Exception("Exiftool is not installed.\ 10 | Visit http://www.sno.phy.queensu.ca/~phil/exiftool/") 11 | -------------------------------------------------------------------------------- /src/exif.py: -------------------------------------------------------------------------------- 1 | import json 2 | import shlex 3 | import subprocess 4 | import sys 5 | import threading 6 | from subprocess import CalledProcessError, check_output 7 | 8 | 9 | class Exif(object): 10 | def __init__(self, filename): 11 | self.filename = filename 12 | 13 | def data(self): 14 | try: 15 | exif_command = self.get_exif_command(self.filename) 16 | if threading.current_thread() is threading.main_thread(): 17 | data = check_output(exif_command, shell=True).decode('UTF-8') 18 | else: 19 | # Swallow stderr in the case that multiple threads are executing 20 | data = check_output(exif_command, shell=True, stderr=subprocess.DEVNULL).decode('UTF-8') 21 | exif = json.loads(data)[0] 22 | except (CalledProcessError, UnicodeDecodeError): 23 | return None 24 | 25 | return exif 26 | 27 | @staticmethod 28 | def get_exif_command(filename): 29 | # Handle all platform variations 30 | if sys.platform == 'win32': 31 | return f'exiftool -time:all -mimetype -j "{filename}"' 32 | return f'exiftool -time:all -mimetype -j {shlex.quote(filename)}' 33 | -------------------------------------------------------------------------------- /src/phockup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import concurrent.futures 3 | import filecmp 4 | import logging 5 | import os 6 | import re 7 | import shutil 8 | import sys 9 | import time 10 | 11 | from tqdm import tqdm 12 | 13 | from src.date import Date 14 | from src.exif import Exif 15 | 16 | logger = logging.getLogger('phockup') 17 | ignored_files = ('.DS_Store', 'Thumbs.db') 18 | 19 | 20 | class Phockup: 21 | DEFAULT_DIR_FORMAT = ['%Y', '%m', '%d'] 22 | DEFAULT_NO_DATE_DIRECTORY = "unknown" 23 | 24 | def __init__(self, input_dir, output_dir, **args): 25 | start_time = time.time() 26 | self.files_processed = 0 27 | self.duplicates_found = 0 28 | self.unknown_found = 0 29 | self.files_moved = 0 30 | self.files_copied = 0 31 | 32 | input_dir = os.path.expanduser(input_dir) 33 | output_dir = os.path.expanduser(output_dir) 34 | 35 | if input_dir.endswith(os.path.sep): 36 | input_dir = input_dir[:-1] 37 | if output_dir.endswith(os.path.sep): 38 | output_dir = output_dir[:-1] 39 | 40 | self.input_dir = input_dir 41 | self.output_dir = output_dir 42 | self.output_prefix = args.get('output_prefix' or None) 43 | self.output_suffix = args.get('output_suffix' or '') 44 | self.no_date_dir = args.get('no_date_dir') or Phockup.DEFAULT_NO_DATE_DIRECTORY 45 | self.dir_format = args.get('dir_format') or os.path.sep.join(Phockup.DEFAULT_DIR_FORMAT) 46 | self.move = args.get('move', False) 47 | self.link = args.get('link', False) 48 | self.original_filenames = args.get('original_filenames', False) 49 | self.date_regex = args.get('date_regex', None) 50 | self.timestamp = args.get('timestamp', False) 51 | self.date_field = args.get('date_field', False) 52 | self.skip_unknown = args.get("skip_unknown", False) 53 | self.movedel = args.get("movedel", False), 54 | self.rmdirs = args.get("rmdirs", False), 55 | self.dry_run = args.get('dry_run', False) 56 | self.progress = args.get('progress', False) 57 | self.max_depth = args.get('max_depth', -1) 58 | # default to concurrency of one to retain existing behavior 59 | self.max_concurrency = args.get("max_concurrency", 1) 60 | 61 | self.from_date = args.get("from_date", None) 62 | self.to_date = args.get("to_date", None) 63 | if self.from_date is not None: 64 | self.from_date = Date.strptime(f"{self.from_date} 00:00:00", "%Y-%m-%d %H:%M:%S") 65 | if self.to_date is not None: 66 | self.to_date = Date.strptime(f"{self.to_date} 23:59:59", "%Y-%m-%d %H:%M:%S") 67 | 68 | if self.max_concurrency > 1: 69 | logger.info(f"Using {self.max_concurrency} workers to process files.") 70 | 71 | self.stop_depth = self.input_dir.count(os.sep) + self.max_depth \ 72 | if self.max_depth > -1 else sys.maxsize 73 | self.file_type = args.get('file_type', None) 74 | 75 | if self.dry_run: 76 | logger.warning("Dry-run phockup (does a trial run with no permanent changes)...") 77 | 78 | self.check_directories() 79 | # Get the number of files 80 | if self.progress: 81 | file_count = self.get_file_count() 82 | with tqdm(desc=f"Progressing: '{self.input_dir}' ", 83 | total=file_count, 84 | unit="file", 85 | position=0, 86 | leave=True, 87 | ascii=(sys.platform == 'win32')) as self.pbar: 88 | self.walk_directory() 89 | else: 90 | self.pbar = None 91 | self.walk_directory() 92 | 93 | if self.move and self.rmdirs: 94 | self.rm_subdirs() 95 | 96 | run_time = time.time() - start_time 97 | if self.files_processed and run_time: 98 | self.print_action_report(run_time) 99 | 100 | def print_action_report(self, run_time): 101 | logger.info(f"Processed {self.files_processed} files in {run_time:.2f} seconds. Average Throughput: {self.files_processed/run_time:.2f} files/second") 102 | if self.unknown_found: 103 | logger.info(f"Found {self.unknown_found} files without EXIF date data.") 104 | if self.duplicates_found: 105 | logger.info(f"Found {self.duplicates_found} duplicate files.") 106 | if self.files_copied: 107 | if self.dry_run: 108 | logger.info(f"Would have copied {self.files_copied} files.") 109 | else: 110 | logger.info(f"Copied {self.files_copied} files.") 111 | if self.files_moved: 112 | if self.dry_run: 113 | logger.info(f"Would have moved {self.files_moved} files.") 114 | else: 115 | logger.info(f"Moved {self.files_moved} files.") 116 | 117 | def check_directories(self): 118 | """ 119 | Check if input and output directories exist. 120 | If input does not exist it exits the process. 121 | If output does not exist it tries to create it or exit with error. 122 | """ 123 | 124 | if not os.path.exists(self.input_dir): 125 | raise RuntimeError(f"Input directory '{self.input_dir}' does not exist") 126 | if not os.path.isdir(self.input_dir): 127 | raise RuntimeError(f"Input directory '{self.input_dir}' is not a directory") 128 | if not os.path.exists(self.output_dir): 129 | logger.warning(f"Output directory '{self.output_dir}' does not exist, creating now") 130 | try: 131 | if not self.dry_run: 132 | os.makedirs(self.output_dir) 133 | except OSError: 134 | raise OSError(f"Cannot create output '{self.output_dir}' directory. No write access!") 135 | 136 | def walk_directory(self): 137 | """ 138 | Walk input directory recursively and call process_file for each file 139 | except the ignored ones. 140 | """ 141 | 142 | # Walk the directory 143 | for root, dirnames, files in os.walk(self.input_dir): 144 | files.sort() 145 | file_paths_to_process = [] 146 | for filename in files: 147 | if filename in ignored_files: 148 | continue 149 | file_paths_to_process.append(os.path.join(root, filename)) 150 | if self.max_concurrency > 1: 151 | if not self.process_files(file_paths_to_process): 152 | return 153 | else: 154 | try: 155 | for file_path in file_paths_to_process: 156 | self.process_file(file_path) 157 | except KeyboardInterrupt: 158 | logger.warning("Received interrupt. Shutting down...") 159 | return 160 | if root.count(os.sep) >= self.stop_depth: 161 | del dirnames[:] 162 | 163 | def rm_subdirs(self): 164 | def _get_depth(sub_path): 165 | return sub_path.count(os.sep) - self.input_dir.count(os.sep) 166 | 167 | for root, dirs, files in os.walk(self.input_dir, topdown=False): 168 | # Traverse the tree bottom-up 169 | if _get_depth(root) > self.stop_depth: 170 | continue 171 | for name in dirs: 172 | dir_path = os.path.join(root, name) 173 | if _get_depth(dir_path) > self.stop_depth: 174 | continue 175 | try: 176 | os.rmdir(dir_path) # Try to remove the dir 177 | logger.info(f"Deleted empty directory: {dir_path}") 178 | except OSError as e: 179 | logger.info(f"{e.strerror} - {dir_path} not deleted.") 180 | 181 | def get_file_count(self): 182 | file_count = 0 183 | for root, dirnames, files in os.walk(self.input_dir): 184 | file_count += len(files) 185 | if root.count(os.sep) >= self.stop_depth: 186 | del dirnames[:] 187 | return file_count 188 | 189 | def get_file_type(self, mimetype): 190 | """ 191 | Check if given file_type is image or video 192 | Return None if other 193 | Use mimetype to determine if the file is an image or video. 194 | """ 195 | patternImage = re.compile('^(image/.+|application/vnd.adobe.photoshop)$') 196 | if patternImage.match(mimetype): 197 | return 'image' 198 | 199 | patternVideo = re.compile('^(video/.*)$') 200 | if patternVideo.match(mimetype): 201 | return 'video' 202 | return None 203 | 204 | def get_output_dir(self, date): 205 | """ 206 | Generate output directory path based on the extracted date and 207 | formatted using dir_format. 208 | If date is missing from the exifdata the file is going to "unknown" 209 | directory unless user included a regex from filename or uses timestamp. 210 | """ 211 | try: 212 | path = [self.output_dir, 213 | self.output_prefix, 214 | date['date'].date().strftime(self.dir_format), 215 | self.output_suffix] 216 | except (TypeError, ValueError): 217 | path = [self.output_dir, 218 | self.output_prefix, 219 | self.no_date_dir, 220 | self.output_suffix] 221 | # Remove any None values that made it in the path 222 | path = [p for p in path if p is not None] 223 | fullpath = os.path.normpath(os.path.sep.join(path)) 224 | 225 | if not os.path.isdir(fullpath) and not self.dry_run: 226 | os.makedirs(fullpath, exist_ok=True) 227 | 228 | return fullpath 229 | 230 | def get_file_name(self, original_filename, date): 231 | """ 232 | Generate file name based on exif data unless it is missing or 233 | original filenames are required. Then use original file name 234 | """ 235 | if self.original_filenames: 236 | return os.path.basename(original_filename) 237 | 238 | try: 239 | filename = [ 240 | f'{date["date"].year :04d}', 241 | f'{date["date"].month :02d}', 242 | f'{date["date"].day :02d}', 243 | '-', 244 | f'{date["date"].hour :02d}', 245 | f'{date["date"].minute :02d}', 246 | f'{date["date"].second :02d}', 247 | ] 248 | 249 | if date['subseconds']: 250 | filename.append(date['subseconds']) 251 | 252 | return ''.join(filename) + os.path.splitext(original_filename)[1] 253 | # TODO: Double check if this is correct! 254 | except TypeError: 255 | return os.path.basename(original_filename) 256 | 257 | def process_files(self, file_paths_to_process): 258 | # With all the appropriate files in the directory added to the 259 | # list, process the directory concurrently using threads 260 | with concurrent.futures.ThreadPoolExecutor( 261 | max_workers=self.max_concurrency) as executor: 262 | try: 263 | for _ in executor.map(self.process_file, 264 | file_paths_to_process): 265 | pass 266 | except KeyboardInterrupt: 267 | logger.warning( 268 | f"Received interrupt. Shutting down {self.max_concurrency} workers...") 269 | executor.shutdown(wait=True) 270 | return False 271 | return True 272 | 273 | def process_file(self, filename): 274 | """ 275 | Process the file using the selected strategy 276 | If file is .xmp skip it so process_xmp method can handle it 277 | """ 278 | if str.endswith(filename, '.xmp'): 279 | return None 280 | 281 | progress = f'{filename}' 282 | 283 | output, target_file_name, target_file_path, target_file_type, file_date = self.get_file_name_and_path(filename) 284 | suffix = 1 285 | target_file = target_file_path 286 | 287 | while True: 288 | if self.file_type is not None \ 289 | and self.file_type != target_file_type: 290 | progress = f"{progress} => skipped, file is '{target_file_type}' \ 291 | but looking for '{self.file_type}'" 292 | logger.info(progress) 293 | break 294 | 295 | date_unknown = file_date is None or output.endswith(self.no_date_dir) 296 | if self.skip_unknown and output.endswith(self.no_date_dir): 297 | # Skip files that didn't generate a path from EXIF data 298 | progress = f"{progress} => skipped, unknown date EXIF information for '{target_file_name}'" 299 | self.unknown_found += 1 300 | if self.progress: 301 | self.pbar.write(progress) 302 | logger.info(progress) 303 | break 304 | 305 | if not date_unknown: 306 | skip = False 307 | if type(file_date) is dict: 308 | file_date = file_date["date"] 309 | if self.from_date is not None and file_date < self.from_date: 310 | progress = f"{progress} => {filename} skipped: date {file_date} is older than --from-date {self.from_date}" 311 | skip = True 312 | if self.to_date is not None and file_date > self.to_date: 313 | progress = f"{progress} => {filename} skipped: date {file_date} is newer than --to-date {self.to_date}" 314 | skip = True 315 | if skip: 316 | if self.progress: 317 | self.pbar.write(progress) 318 | logger.info(progress) 319 | break 320 | 321 | if os.path.isfile(target_file): 322 | if filename != target_file and filecmp.cmp(filename, target_file, shallow=False): 323 | if self.movedel and self.move and self.skip_unknown: 324 | if not self.dry_run: 325 | os.remove(filename) 326 | progress = f'{progress} => deleted, duplicated file {target_file}' 327 | else: 328 | progress = f'{progress} => skipped, duplicated file {target_file}' 329 | self.duplicates_found += 1 330 | if self.progress: 331 | self.pbar.write(progress) 332 | logger.info(progress) 333 | break 334 | else: 335 | if self.move: 336 | try: 337 | self.files_moved += 1 338 | if not self.dry_run: 339 | shutil.move(filename, target_file) 340 | except FileNotFoundError: 341 | progress = f'{progress} => skipped, no such file or directory' 342 | if self.progress: 343 | self.pbar.write(progress) 344 | logger.warning(progress) 345 | break 346 | elif self.link and not self.dry_run: 347 | os.link(filename, target_file) 348 | else: 349 | try: 350 | self.files_copied += 1 351 | if not self.dry_run: 352 | shutil.copy2(filename, target_file) 353 | except FileNotFoundError: 354 | progress = f'{progress} => skipped, no such file or directory' 355 | if self.progress: 356 | self.pbar.write(progress) 357 | logger.warning(progress) 358 | break 359 | 360 | progress = f'{progress} => {target_file}' 361 | if self.progress: 362 | self.pbar.write(progress) 363 | logger.info(progress) 364 | 365 | self.process_xmp(filename, target_file_name, suffix, output) 366 | break 367 | 368 | suffix += 1 369 | target_split = os.path.splitext(target_file_path) 370 | target_file = f'{target_split[0]}-{suffix}{target_split[1]}' 371 | 372 | self.files_processed += 1 373 | if self.progress: 374 | self.pbar.update(1) 375 | 376 | def get_file_name_and_path(self, filename): 377 | """ 378 | Returns target file name and path 379 | """ 380 | exif_data = Exif(filename).data() 381 | target_file_type = None 382 | 383 | if exif_data and 'MIMEType' in exif_data: 384 | target_file_type = self.get_file_type(exif_data['MIMEType']) 385 | 386 | date = None 387 | if target_file_type in ['image', 'video']: 388 | date = Date(filename).from_exif(exif_data, self.timestamp, self.date_regex, 389 | self.date_field) 390 | output = self.get_output_dir(date) 391 | target_file_name = self.get_file_name(filename, date) 392 | if not self.original_filenames: 393 | target_file_name = target_file_name.lower() 394 | else: 395 | output = self.get_output_dir([]) 396 | target_file_name = os.path.basename(filename) 397 | 398 | target_file_path = os.path.sep.join([output, target_file_name]) 399 | return output, target_file_name, target_file_path, target_file_type, date 400 | 401 | def process_xmp(self, original_filename, file_name, suffix, output): 402 | """ 403 | Process xmp files. These are metadata for RAW images 404 | """ 405 | xmp_original_with_ext = original_filename + '.xmp' 406 | xmp_original_without_ext = os.path.splitext(original_filename)[0] + '.xmp' 407 | 408 | suffix = f'-{suffix}' if suffix > 1 else '' 409 | 410 | xmp_files = {} 411 | 412 | if os.path.isfile(xmp_original_with_ext): 413 | xmp_target = f'{file_name}{suffix}.xmp' 414 | xmp_files[xmp_original_with_ext] = xmp_target 415 | if os.path.isfile(xmp_original_without_ext): 416 | xmp_target = f'{(os.path.splitext(file_name)[0])}{suffix}.xmp' 417 | xmp_files[xmp_original_without_ext] = xmp_target 418 | 419 | for original, target in xmp_files.items(): 420 | xmp_path = os.path.sep.join([output, target]) 421 | logger.info(f'{original} => {xmp_path}') 422 | 423 | if not self.dry_run: 424 | if self.move: 425 | shutil.move(original, xmp_path) 426 | elif self.link: 427 | os.link(original, xmp_path) 428 | else: 429 | shutil.copy2(original, xmp_path) 430 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivandokov/phockup/8591be3ac6de83a1755e3aa35dd1fddba9e854e0/tests/__init__.py -------------------------------------------------------------------------------- /tests/input/!#$%'+-.^_`~.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivandokov/phockup/8591be3ac6de83a1755e3aa35dd1fddba9e854e0/tests/input/!#$%'+-.^_`~.jpg -------------------------------------------------------------------------------- /tests/input/UNKNOWN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivandokov/phockup/8591be3ac6de83a1755e3aa35dd1fddba9e854e0/tests/input/UNKNOWN.jpg -------------------------------------------------------------------------------- /tests/input/date_20170101_010101.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivandokov/phockup/8591be3ac6de83a1755e3aa35dd1fddba9e854e0/tests/input/date_20170101_010101.jpg -------------------------------------------------------------------------------- /tests/input/exif.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivandokov/phockup/8591be3ac6de83a1755e3aa35dd1fddba9e854e0/tests/input/exif.jpg -------------------------------------------------------------------------------- /tests/input/exif.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivandokov/phockup/8591be3ac6de83a1755e3aa35dd1fddba9e854e0/tests/input/exif.mp4 -------------------------------------------------------------------------------- /tests/input/link_to_date_20170101_010101.jpg: -------------------------------------------------------------------------------- 1 | date_20170101_010101.jpg -------------------------------------------------------------------------------- /tests/input/other.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivandokov/phockup/8591be3ac6de83a1755e3aa35dd1fddba9e854e0/tests/input/other.txt -------------------------------------------------------------------------------- /tests/input/phockup's exif test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivandokov/phockup/8591be3ac6de83a1755e3aa35dd1fddba9e854e0/tests/input/phockup's exif test.jpg -------------------------------------------------------------------------------- /tests/input/sub_folder/date_20180101_010101.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivandokov/phockup/8591be3ac6de83a1755e3aa35dd1fddba9e854e0/tests/input/sub_folder/date_20180101_010101.jpg -------------------------------------------------------------------------------- /tests/input/xmp.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivandokov/phockup/8591be3ac6de83a1755e3aa35dd1fddba9e854e0/tests/input/xmp.jpg -------------------------------------------------------------------------------- /tests/input/xmp.jpg.xmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivandokov/phockup/8591be3ac6de83a1755e3aa35dd1fddba9e854e0/tests/input/xmp.jpg.xmp -------------------------------------------------------------------------------- /tests/input/xmp_ext.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivandokov/phockup/8591be3ac6de83a1755e3aa35dd1fddba9e854e0/tests/input/xmp_ext.jpg -------------------------------------------------------------------------------- /tests/input/xmp_ext.jpg.xmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivandokov/phockup/8591be3ac6de83a1755e3aa35dd1fddba9e854e0/tests/input/xmp_ext.jpg.xmp -------------------------------------------------------------------------------- /tests/input/xmp_ext.xmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivandokov/phockup/8591be3ac6de83a1755e3aa35dd1fddba9e854e0/tests/input/xmp_ext.xmp -------------------------------------------------------------------------------- /tests/input/xmp_noext.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivandokov/phockup/8591be3ac6de83a1755e3aa35dd1fddba9e854e0/tests/input/xmp_noext.jpg -------------------------------------------------------------------------------- /tests/input/xmp_noext.xmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivandokov/phockup/8591be3ac6de83a1755e3aa35dd1fddba9e854e0/tests/input/xmp_noext.xmp -------------------------------------------------------------------------------- /tests/test_date.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import re 4 | from datetime import datetime 5 | 6 | from src.date import Date 7 | 8 | os.chdir(os.path.dirname(__file__)) 9 | 10 | 11 | def test_parse_date_format_valid(): 12 | """Test that parse_date_format returns a valid format for strftime""" 13 | datetime.strftime( 14 | datetime.now(), 15 | Date().parse("YYYY YY m MM M DDD DD \\ /") 16 | ) 17 | 18 | 19 | def test_get_date_from_exif(): 20 | assert Date().from_exif({ 21 | "CreateDate": "2017-01-01 01:01:01" 22 | }) == { 23 | "date": datetime(2017, 1, 1, 1, 1, 1), 24 | "subseconds": "" 25 | } 26 | 27 | 28 | def test_get_date_from_exif_with_timezone(): 29 | assert Date().from_exif({ 30 | "CreateDate": "2023-01-01 01:01:01", 31 | "TimeZone": "-07:00", 32 | }) == { 33 | "date": datetime(2022, 12, 31, 18, 1, 1), 34 | "subseconds": "" 35 | } 36 | 37 | 38 | def test_get_date_from_custom_date_field(): 39 | assert Date().from_exif({ 40 | "CustomField": "2017:01:01 01:01:01" 41 | }, date_field="CustomField") == { 42 | "date": datetime(2017, 1, 1, 1, 1, 1), 43 | "subseconds": "" 44 | } 45 | 46 | 47 | def test_get_date_from_exif_strip_timezone(): 48 | assert Date().from_exif({ 49 | "CreateDate": "2017-01-01 01:01:01-02:00" 50 | }) == { 51 | "date": datetime(2017, 1, 1, 1, 1, 1), 52 | "subseconds": "" 53 | } 54 | 55 | 56 | def test_get_date_from_exif_strip_timezone_sub_sec(): 57 | assert Date().from_exif({ 58 | "SubSecCreateDate": "2019:10:06 11:02:50.575+01:00" 59 | }) == { 60 | "date": datetime(2019, 10, 6, 11, 2, 50), 61 | "subseconds": "575" 62 | } 63 | 64 | 65 | def test_get_date_from_exif_colon(): 66 | assert Date().from_exif({ 67 | "CreateDate": "2017:01:01 01:01:01" 68 | }) == { 69 | "date": datetime(2017, 1, 1, 1, 1, 1), 70 | "subseconds": "" 71 | } 72 | 73 | 74 | def test_get_date_from_exif_subseconds(): 75 | assert Date().from_exif({ 76 | "CreateDate": "2017-01-01 01:01:01.20" 77 | }) == { 78 | "date": datetime(2017, 1, 1, 1, 1, 1), 79 | "subseconds": "20" 80 | } 81 | 82 | 83 | def test_get_date_from_exif_invalid(): 84 | assert Date().from_exif({ 85 | "CreateDate": "Invalid" 86 | }) == { 87 | "date": None, 88 | "subseconds": "" 89 | } 90 | 91 | 92 | def test_get_date_from_filename(): 93 | assert Date("IMG_20170101_010101.jpg").from_exif({}) == { 94 | "date": datetime(2017, 1, 1, 1, 1, 1), 95 | "subseconds": "" 96 | } 97 | 98 | 99 | def test_get_date_filename_invalid(): 100 | assert Date("IMG_20170101_999999.jpg").from_exif({}) is None 101 | 102 | 103 | def test_get_date_none_on_no_info(): 104 | assert Date("Foo.jpg").from_exif({}) is None 105 | 106 | 107 | def test_get_date_none_on_no_error(): 108 | assert Date("IMG_2017_01.jpg").from_exif({}) is None 109 | 110 | 111 | def test_get_date_custom_regex(): 112 | """ 113 | A valid regex with a matching filename. Returns a datetime. 114 | """ 115 | date_regex = re.compile(r"(?P\d{2})\.(?P\d{2})\.(?P\d{4})[_-]?(?P\d{2})\.(?P\d{2})\.(?P\d{2})") # noqa: E501 116 | assert Date("IMG_27.01.2015-19.20.00.jpg").from_exif({}, False, 117 | date_regex) == { 118 | "date": datetime(2015, 1, 27, 19, 20, 00), 119 | "subseconds": "" 120 | } 121 | 122 | 123 | def test_get_date_custom_regex_invalid(): 124 | """ 125 | A valid regex with a matching filename. 126 | Return none because there is not enough information in the filename. 127 | """ 128 | date_regex = re.compile(r"(?P\d{2})\.(?P\d{2})\.(?P\d{2})") # noqa: E501 129 | assert Date("19.20.00.jpg").from_exif({}, False, date_regex) is None 130 | 131 | 132 | def test_get_date_custom_regex_no_match(): 133 | """ 134 | A valid regex with a non-matching filename. 135 | """ 136 | date_regex = re.compile(r"(?P\d{2})\.(?P\d{2})\.(?P\d{4})[_-]?(?P\d{2})\.(?P\d{2})\.(?P\d{2})") # noqa: E501 137 | assert Date("Foo.jpg").from_exif({}, False, date_regex) is None 138 | 139 | 140 | def test_get_date_custom_regex_optional_time(): 141 | """ 142 | A valid regex with a matching filename that doesn't have hour information. 143 | However, the regex in question has hour information as optional. 144 | """ 145 | date_regex = re.compile(r"(?P\d{2})\.(?P\d{2})\.(?P\d{4})[_-]?((?P\d{2})\.(?P\d{2})\.(?P\d{2}))?") # noqa: E501 146 | assert Date("IMG_27.01.2015.jpg").from_exif({}, False, date_regex) == { 147 | "date": datetime(2015, 1, 27, 0, 0, 00), 148 | "subseconds": "" 149 | } 150 | -------------------------------------------------------------------------------- /tests/test_exif.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | from subprocess import CalledProcessError 4 | 5 | from src.exif import Exif 6 | 7 | os.chdir(os.path.dirname(__file__)) 8 | 9 | 10 | def test_exif_reads_valid_file(): 11 | exif = Exif("input/exif.jpg") 12 | assert exif.data()['CreateDate'] == '2017:01:01 01:01:01' 13 | 14 | 15 | def test_exif_reads_files_with_illegal_characters(): 16 | exif = Exif("input/!#$%'+-.^_`~.jpg") 17 | assert exif.data()['CreateDate'] == '2017:01:01 01:01:01' 18 | 19 | 20 | def test_exif_reads_file_with_spaces_punctuation(): 21 | exif = Exif("input/phockup's exif test.jpg") 22 | assert exif.data()['CreateDate'] == '2017:01:01 01:01:01' 23 | 24 | 25 | def test_exif_handles_exception(mocker): 26 | mocker.patch('subprocess.check_output', 27 | side_effect=CalledProcessError(2, 'cmd')) 28 | exif = Exif("not-existing.jpg") 29 | assert exif.data() is None 30 | -------------------------------------------------------------------------------- /tests/test_phockup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import logging 3 | import os 4 | import shutil 5 | import sys 6 | from datetime import datetime 7 | 8 | import pytest 9 | 10 | from src.dependency import check_dependencies 11 | from src.exif import Exif 12 | from src.phockup import Phockup 13 | 14 | os.chdir(os.path.dirname(__file__)) 15 | 16 | 17 | def test_check_dependencies(mocker): 18 | mocker.patch('shutil.which', return_value='exiftool') 19 | mocker.patch('sys.exit') 20 | 21 | check_dependencies() 22 | assert not sys.exit.called 23 | 24 | 25 | def test_check_dependencies_missing(mocker): 26 | mocker.patch('shutil.which', return_value=None) 27 | mocker.patch('sys.exit') 28 | 29 | with pytest.raises(Exception, match="Exiftool is not installed. \ 30 | Visit http://www.sno.phy.queensu.ca/~phil/exiftool/"): 31 | check_dependencies() 32 | 33 | 34 | def test_exception_if_missing_input_directory(mocker): 35 | mocker.patch('os.makedirs') 36 | mocker.patch('sys.exit') 37 | 38 | with pytest.raises(RuntimeError, match="Input directory 'in' does not exist"): 39 | Phockup('in', 'out') 40 | 41 | 42 | def test_exception_if_input_not_directory(mocker): 43 | mocker.patch('os.makedirs') 44 | mocker.patch('sys.exit') 45 | 46 | with pytest.raises(RuntimeError, match="Input directory 'input/exif.jpg' is not a directory"): 47 | Phockup('input/exif.jpg', 'out') 48 | 49 | 50 | def test_removing_trailing_slash_for_input_output(mocker): 51 | mocker.patch('os.makedirs') 52 | mocker.patch('sys.exit') 53 | mocker.patch.object(Phockup, 'check_directories') 54 | if sys.platform == 'win32': 55 | phockup = Phockup('in\\', 'out\\') 56 | else: 57 | phockup = Phockup('in/', 'out/') 58 | assert phockup.input_dir == 'in' 59 | assert phockup.output_dir == 'out' 60 | 61 | 62 | def test_exception_for_no_write_access_when_creating_output_dir(mocker): 63 | mocker.patch.object(Phockup, 'walk_directory') 64 | if sys.platform == 'win32': 65 | protected_dir = f"{os.getenv('WINDIR')}/phockup" 66 | else: 67 | protected_dir = '/root/phockup' 68 | with pytest.raises(OSError, match="Cannot create output.*"): 69 | 70 | Phockup('input', protected_dir) 71 | 72 | 73 | def test_walking_directory(): 74 | shutil.rmtree('output', ignore_errors=True) 75 | Phockup('input', 'output') 76 | validate_copy_operations() 77 | shutil.rmtree('output', ignore_errors=True) 78 | 79 | 80 | def test_walking_directory_prefix(): 81 | shutil.rmtree('output', ignore_errors=True) 82 | prefix = "Phockup Images" 83 | Phockup('input', 'output', output_prefix=prefix) 84 | validate_copy_operations(prefix=prefix) 85 | shutil.rmtree('output', ignore_errors=True) 86 | 87 | 88 | def test_walking_directory_suffix(): 89 | shutil.rmtree('output', ignore_errors=True) 90 | suffix = "iphone" 91 | Phockup('input', 'output', output_suffix=suffix) 92 | validate_copy_operations(suffix=suffix) 93 | shutil.rmtree('output', ignore_errors=True) 94 | 95 | 96 | def test_walking_directory_prefix_suffix(): 97 | shutil.rmtree('output', ignore_errors=True) 98 | prefix = "ivandokov" 99 | suffix = "camera" 100 | Phockup('input', 'output', output_prefix=prefix, output_suffix=suffix) 101 | validate_copy_operations(prefix=prefix, suffix=suffix) 102 | shutil.rmtree('output', ignore_errors=True) 103 | 104 | 105 | def test_dry_run(): 106 | shutil.rmtree('output', ignore_errors=True) 107 | Phockup('input', 'output', dry_run=True) 108 | assert not os.path.isdir('output') 109 | dir1 = 'output/2017/01/01' 110 | dir2 = 'output/2017/10/06' 111 | dir3 = 'output/unknown' 112 | dir4 = 'output/2018/01/01/' 113 | assert not os.path.isdir(dir1) 114 | assert not os.path.isdir(dir2) 115 | assert not os.path.isdir(dir3) 116 | assert not os.path.isdir(dir4) 117 | 118 | 119 | def test_progress(): 120 | shutil.rmtree('output', ignore_errors=True) 121 | Phockup('input', 'output', progress=True) 122 | dir1 = 'output/2017/01/01' 123 | dir2 = 'output/2017/10/06' 124 | dir3 = 'output/unknown' 125 | dir4 = 'output/2018/01/01/' 126 | assert os.path.isdir(dir1) 127 | assert os.path.isdir(dir2) 128 | assert os.path.isdir(dir3) 129 | assert os.path.isdir(dir4) 130 | assert len([name for name in os.listdir(dir1) if 131 | os.path.isfile(os.path.join(dir1, name))]) == 3 132 | assert len([name for name in os.listdir(dir2) if 133 | os.path.isfile(os.path.join(dir2, name))]) == 1 134 | assert len([name for name in os.listdir(dir3) if 135 | os.path.isfile(os.path.join(dir3, name))]) == 1 136 | assert len([name for name in os.listdir(dir4) if 137 | os.path.isfile(os.path.join(dir4, name))]) == 1 138 | shutil.rmtree('output', ignore_errors=True) 139 | 140 | 141 | def test_get_file_type(mocker): 142 | mocker.patch.object(Phockup, 'check_directories') 143 | assert Phockup('in', '.').get_file_type("image/jpeg") 144 | assert Phockup('in', '.').get_file_type("video/mp4") 145 | assert not Phockup('in', '.').get_file_type("foo/bar") 146 | 147 | 148 | def test_get_file_name(mocker): 149 | mocker.patch.object(Phockup, 'check_directories') 150 | mocker.patch.object(Phockup, 'walk_directory') 151 | date = { 152 | "date": datetime(2017, 1, 1, 1, 1, 1), 153 | "subseconds": "20" 154 | } 155 | 156 | assert Phockup('in', 'out').get_file_name("Bar/Foo.jpg", date) == \ 157 | "20170101-01010120.jpg" 158 | 159 | 160 | def test_get_file_name_is_original_on_exception(mocker): 161 | mocker.patch.object(Phockup, 'check_directories') 162 | mocker.patch.object(Phockup, 'walk_directory') 163 | assert Phockup('in', 'out').get_file_name("Bar/Foo.jpg", None) == "Foo.jpg" 164 | 165 | 166 | def test_process_file_with_filename_date(mocker): 167 | shutil.rmtree('output', ignore_errors=True) 168 | mocker.patch.object(Phockup, 'check_directories') 169 | mocker.patch.object(Phockup, 'walk_directory') 170 | mocker.patch.object(Exif, 'data') 171 | Exif.data.return_value = { 172 | "MIMEType": "image/jpeg" 173 | } 174 | Phockup('input', 'output').process_file("input/date_20170101_010101.jpg") 175 | assert os.path.isfile("output/2017/01/01/20170101-010101.jpg") 176 | shutil.rmtree('output', ignore_errors=True) 177 | 178 | 179 | def test_process_link_to_file_with_filename_date(mocker): 180 | shutil.rmtree('output', ignore_errors=True) 181 | mocker.patch.object(Phockup, 'check_directories') 182 | mocker.patch.object(Phockup, 'walk_directory') 183 | Phockup('input', 'output').process_file( 184 | "input/link_to_date_20170101_010101.jpg") 185 | assert os.path.isfile("output/2017/01/01/20170101-010101.jpg") 186 | shutil.rmtree('output', ignore_errors=True) 187 | 188 | 189 | def test_process_broken_link(mocker, caplog): 190 | shutil.rmtree('output', ignore_errors=True) 191 | mocker.patch.object(Phockup, 'check_directories') 192 | mocker.patch.object(Phockup, 'walk_directory') 193 | with caplog.at_level(logging.WARNING): 194 | Phockup('input', 'output').process_file("input/not_a_file.jpg") 195 | assert 'skipped, no such file or directory' in caplog.text 196 | shutil.rmtree('output', ignore_errors=True) 197 | 198 | 199 | def test_process_broken_link_move(mocker, caplog): 200 | shutil.rmtree('output', ignore_errors=True) 201 | mocker.patch.object(Phockup, 'check_directories') 202 | mocker.patch.object(Phockup, 'walk_directory') 203 | phockup = Phockup('input', 'output', move=True) 204 | phockup.process_file("input/not_a_file.jpg") 205 | with caplog.at_level(logging.WARNING): 206 | Phockup('input', 'output').process_file("input/not_a_file.jpg") 207 | assert 'skipped, no such file or directory' in caplog.text 208 | shutil.rmtree('output', ignore_errors=True) 209 | 210 | 211 | def test_process_image_exif_date(mocker): 212 | shutil.rmtree('output', ignore_errors=True) 213 | mocker.patch.object(Phockup, 'check_directories') 214 | mocker.patch.object(Phockup, 'walk_directory') 215 | Phockup('input', 'output').process_file("input/exif.jpg") 216 | assert os.path.isfile("output/2017/01/01/20170101-010101.jpg") 217 | shutil.rmtree('output', ignore_errors=True) 218 | 219 | 220 | def test_process_image_xmp(mocker): 221 | shutil.rmtree('output', ignore_errors=True) 222 | mocker.patch.object(Phockup, 'check_directories') 223 | mocker.patch.object(Phockup, 'walk_directory') 224 | Phockup('input', 'output').process_file("input/xmp.jpg") 225 | assert os.path.isfile("output/2017/01/01/20170101-010101.jpg") 226 | assert os.path.isfile("output/2017/01/01/20170101-010101.jpg.xmp") 227 | shutil.rmtree('output', ignore_errors=True) 228 | 229 | 230 | def test_process_image_xmp_noext(mocker): 231 | shutil.rmtree('output', ignore_errors=True) 232 | mocker.patch.object(Phockup, 'check_directories') 233 | mocker.patch.object(Phockup, 'walk_directory') 234 | Phockup('input', 'output').process_file("input/xmp_noext.jpg") 235 | assert os.path.isfile("output/2017/01/01/20170101-010101.jpg") 236 | assert os.path.isfile("output/2017/01/01/20170101-010101.xmp") 237 | shutil.rmtree('output', ignore_errors=True) 238 | 239 | 240 | def test_process_image_xmp_ext_and_noext(mocker): 241 | shutil.rmtree('output', ignore_errors=True) 242 | mocker.patch.object(Phockup, 'check_directories') 243 | mocker.patch.object(Phockup, 'walk_directory') 244 | Phockup('input', 'output').process_file("input/xmp_ext.jpg") 245 | assert os.path.isfile("output/2017/01/01/20170101-010101.jpg") 246 | assert os.path.isfile("output/2017/01/01/20170101-010101.xmp") 247 | assert os.path.isfile("output/2017/01/01/20170101-010101.jpg.xmp") 248 | shutil.rmtree('output', ignore_errors=True) 249 | 250 | 251 | def test_process_image_unknown(mocker): 252 | shutil.rmtree('output', ignore_errors=True) 253 | mocker.patch.object(Phockup, 'check_directories') 254 | mocker.patch.object(Phockup, 'walk_directory') 255 | mocker.patch.object(Exif, 'data') 256 | Exif.data.return_value = { 257 | "MIMEType": "image/jpeg" 258 | } 259 | Phockup('input', 'output').process_file("input/UNKNOWN.jpg") 260 | assert os.path.isfile("output/unknown/unknown.jpg") 261 | shutil.rmtree('output', ignore_errors=True) 262 | 263 | 264 | def test_process_other(mocker): 265 | shutil.rmtree('output', ignore_errors=True) 266 | mocker.patch.object(Phockup, 'check_directories') 267 | mocker.patch.object(Phockup, 'walk_directory') 268 | Phockup('input', 'output').process_file("input/other.txt") 269 | assert os.path.isfile("output/unknown/other.txt") 270 | shutil.rmtree('output', ignore_errors=True) 271 | 272 | 273 | def test_process_move(mocker): 274 | shutil.rmtree('output', ignore_errors=True) 275 | mocker.patch.object(Phockup, 'check_directories') 276 | mocker.patch.object(Phockup, 'walk_directory') 277 | mocker.patch.object(Exif, 'data') 278 | Exif.data.return_value = { 279 | "MIMEType": "image/jpeg" 280 | } 281 | phockup = Phockup('input', 'output', move=True) 282 | open("input/tmp_20170101_010101.jpg", "w").close() 283 | open("input/tmp_20170101_010101.xmp", "w").close() 284 | phockup.process_file("input/tmp_20170101_010101.jpg") 285 | phockup.process_file("input/tmp_20170101_010101.xmp") 286 | assert not os.path.isfile("input/tmp_20170101_010101.jpg") 287 | assert not os.path.isfile("input/tmp_20170101_010101.xmp") 288 | assert os.path.isfile("output/2017/01/01/20170101-010101.jpg") 289 | assert os.path.isfile("output/2017/01/01/20170101-010101.xmp") 290 | shutil.rmtree('output', ignore_errors=True) 291 | 292 | 293 | def test_process_movedel(mocker, caplog): 294 | shutil.rmtree('output', ignore_errors=True) 295 | mocker.patch.object(Phockup, 'check_directories') 296 | mocker.patch.object(Phockup, 'walk_directory') 297 | mocker.patch.object(Exif, 'data') 298 | Exif.data.return_value = { 299 | "MIMEType": "image/jpeg" 300 | } 301 | phockup = Phockup('input', 'output', move=True, movedel=True, skip_unknown=True) 302 | open("input/tmp_20170101_010101.jpg", "w").close() 303 | open("input/sub_folder/tmp_20170101_010101.jpg", "w").close() 304 | phockup.process_file("input/tmp_20170101_010101.jpg") 305 | assert not os.path.isfile("input/tmp_20170101_010101.jpg") 306 | assert os.path.isfile("output/2017/01/01/20170101-010101.jpg") 307 | with caplog.at_level(logging.INFO): 308 | phockup.process_file("input/sub_folder/tmp_20170101_010101.jpg") 309 | assert 'deleted, duplicated file' in caplog.text 310 | assert not os.path.isfile("input/sub_folder/tmp_20170101_010101.jpg") 311 | shutil.rmtree('output', ignore_errors=True) 312 | 313 | 314 | def test_process_rmdirs(mocker, caplog): 315 | shutil.rmtree('output', ignore_errors=True) 316 | shutil.rmtree('input/sub_folder/sub0', ignore_errors=True) 317 | mocker.patch.object(Exif, 'data') 318 | Exif.data.return_value = { 319 | "MIMEType": "image/jpeg" 320 | } 321 | os.mkdir('input/sub_folder/sub0') 322 | os.mkdir('input/sub_folder/sub0/sub1') 323 | os.mkdir('input/sub_folder/sub0/sub2') 324 | os.mkdir('input/sub_folder/sub0/sub2/sub3') 325 | open("input/sub_folder/sub0/tmp_20170101_010101.jpg", "w").close() 326 | open("input/sub_folder/sub0/sub1/tmp_20170101_010102.jpg", "w").close() 327 | open("input/sub_folder/sub0/sub2/tmp_20170101_010103.jpg", "w").close() 328 | open("input/sub_folder/sub0/sub2/sub3/tmp_20170101_010104.jpg", "w").close() 329 | with caplog.at_level(logging.INFO): 330 | Phockup('input/sub_folder/sub0', 'output', move=True, rmdirs=True, max_depth=1) 331 | assert 'Deleted empty directory: input/sub_folder/sub0/sub1' in caplog.text 332 | assert 'input/sub_folder/sub0/sub2/sub3 not deleted' in caplog.text 333 | assert os.path.isfile("output/2017/01/01/20170101-010101.jpg") 334 | assert os.path.isfile("output/2017/01/01/20170101-010102.jpg") 335 | assert os.path.isfile("output/2017/01/01/20170101-010103.jpg") 336 | assert not os.path.isfile("output/2017/01/01/20170101-010104.jpg") 337 | assert not os.path.isdir("input/sub_folder/sub0/sub1") 338 | assert os.path.isdir("input/sub_folder/sub0/sub2") 339 | assert os.path.isdir("input/sub_folder/sub0/sub2/sub3") 340 | with caplog.at_level(logging.INFO): 341 | Phockup('input/sub_folder/sub0', 'output', move=True, rmdirs=True) 342 | assert 'Deleted empty directory: input/sub_folder/sub0/sub2' in caplog.text 343 | assert not os.path.isdir("input/sub_folder/sub0/sub2") 344 | assert os.path.isfile("output/2017/01/01/20170101-010104.jpg") 345 | shutil.rmtree('input/sub_folder/sub0', ignore_errors=True) 346 | shutil.rmtree('output', ignore_errors=True) 347 | 348 | 349 | def test_process_link(mocker): 350 | shutil.rmtree('output', ignore_errors=True) 351 | mocker.patch.object(Phockup, 'check_directories') 352 | mocker.patch.object(Phockup, 'walk_directory') 353 | mocker.patch.object(Exif, 'data') 354 | Exif.data.return_value = { 355 | "MIMEType": "image/jpeg" 356 | } 357 | phockup = Phockup('input', 'output', link=True) 358 | open("input/tmp_20170101_010101.jpg", "w").close() 359 | open("input/tmp_20170101_010101.xmp", "w").close() 360 | phockup.process_file("input/tmp_20170101_010101.jpg") 361 | phockup.process_file("input/tmp_20170101_010101.xmp") 362 | assert os.path.isfile("input/tmp_20170101_010101.jpg") 363 | assert os.path.isfile("input/tmp_20170101_010101.xmp") 364 | assert os.path.isfile("output/2017/01/01/20170101-010101.jpg") 365 | assert os.path.isfile("output/2017/01/01/20170101-010101.xmp") 366 | shutil.rmtree('output', ignore_errors=True) 367 | os.remove("input/tmp_20170101_010101.jpg") 368 | os.remove("input/tmp_20170101_010101.xmp") 369 | 370 | 371 | def test_process_exists_same(mocker, caplog): 372 | shutil.rmtree('output', ignore_errors=True) 373 | mocker.patch.object(Phockup, 'check_directories') 374 | mocker.patch.object(Phockup, 'walk_directory') 375 | phockup = Phockup('input', 'output') 376 | phockup.process_file("input/exif.jpg") 377 | assert os.path.isfile("output/2017/01/01/20170101-010101.jpg") 378 | with caplog.at_level(logging.INFO): 379 | phockup.process_file("input/exif.jpg") 380 | assert 'skipped, duplicated file' in caplog.text 381 | shutil.rmtree('output', ignore_errors=True) 382 | 383 | 384 | def test_process_same_date_different_files_rename(mocker): 385 | shutil.rmtree('output', ignore_errors=True) 386 | mocker.patch.object(Phockup, 'check_directories') 387 | mocker.patch.object(Phockup, 'walk_directory') 388 | phockup = Phockup('input', 'output') 389 | phockup.process_file("input/exif.jpg") 390 | mocker.patch.object(Exif, 'data') 391 | Exif.data.return_value = { 392 | "MIMEType": "image/jpeg", 393 | "CreateDate": "2017:01:01 01:01:01" 394 | } 395 | phockup.process_file("input/date_20170101_010101.jpg") 396 | assert os.path.isfile("output/2017/01/01/20170101-010101-2.jpg") 397 | shutil.rmtree('output', ignore_errors=True) 398 | 399 | 400 | def test_process_skip_xmp(mocker): 401 | # Assume no errors == skip XMP file 402 | mocker.patch.object(Phockup, 'check_directories') 403 | mocker.patch.object(Phockup, 'walk_directory') 404 | phockup = Phockup('input', 'output') 405 | phockup.process_file("skip.xmp") 406 | 407 | 408 | def test_process_skip_ignored_file(): 409 | shutil.rmtree('output', ignore_errors=True) 410 | shutil.rmtree('input_ignored', ignore_errors=True) 411 | os.mkdir('input_ignored') 412 | open("input_ignored/.DS_Store", "w").close() 413 | Phockup('input_ignored', 'output') 414 | assert not os.path.isfile("output/unknown/.DS_Store") 415 | shutil.rmtree('output', ignore_errors=True) 416 | shutil.rmtree('input_ignored', ignore_errors=True) 417 | 418 | 419 | def test_keep_original_filenames(mocker): 420 | shutil.rmtree('output', ignore_errors=True) 421 | mocker.patch.object(Phockup, 'check_directories') 422 | mocker.patch.object(Phockup, 'walk_directory') 423 | Phockup('input', 'output', original_filenames=True).process_file( 424 | "input/exif.jpg") 425 | assert os.path.isfile("output/2017/01/01/exif.jpg") 426 | assert not os.path.isfile("output/2017/01/01/20170101-010101.jpg") 427 | shutil.rmtree('output', ignore_errors=True) 428 | 429 | 430 | def test_keep_original_filenames_and_filenames_case(mocker): 431 | shutil.rmtree('output', ignore_errors=True) 432 | mocker.patch.object(Phockup, 'check_directories') 433 | mocker.patch.object(Phockup, 'walk_directory') 434 | Phockup('input', 'output', original_filenames=True).process_file( 435 | "input/UNKNOWN.jpg") 436 | assert os.path.isfile("output/2017/10/06/UNKNOWN.jpg") 437 | assert 'unknown.jpg' not in os.listdir("output/2017/10/06") 438 | shutil.rmtree('output', ignore_errors=True) 439 | 440 | 441 | def test_maxdepth_zero(): 442 | shutil.rmtree('output', ignore_errors=True) 443 | Phockup('input', 'output', maxdepth=0) 444 | dir1 = 'output/2017/01/01' 445 | dir2 = 'output/2017/10/06' 446 | dir3 = 'output/unknown' 447 | assert os.path.isdir(dir1) 448 | assert os.path.isdir(dir2) 449 | assert os.path.isdir(dir3) 450 | assert len([name for name in os.listdir(dir1) if 451 | os.path.isfile(os.path.join(dir1, name))]) == 3 452 | assert len([name for name in os.listdir(dir2) if 453 | os.path.isfile(os.path.join(dir2, name))]) == 1 454 | assert len([name for name in os.listdir(dir3) if 455 | os.path.isfile(os.path.join(dir3, name))]) == 1 456 | shutil.rmtree('output', ignore_errors=True) 457 | 458 | 459 | def test_maxdepth_one(): 460 | shutil.rmtree('output', ignore_errors=True) 461 | Phockup('input', 'output', maxdepth=1) 462 | validate_copy_operations() 463 | shutil.rmtree('output', ignore_errors=True) 464 | 465 | 466 | def test_maxconcurrency_none(): 467 | shutil.rmtree('output', ignore_errors=True) 468 | Phockup('input', 'output', max_concurrency=0) 469 | validate_copy_operations() 470 | shutil.rmtree('output', ignore_errors=True) 471 | 472 | 473 | def test_maxconcurrency_five(): 474 | shutil.rmtree('output', ignore_errors=True) 475 | Phockup('input', 'output', max_concurrency=5) 476 | validate_copy_operations() 477 | shutil.rmtree('output', ignore_errors=True) 478 | 479 | 480 | def validate_copy_operations(prefix=None, suffix=None): 481 | dir1 = '/2017/01/01' 482 | dir2 = '/2017/10/06' 483 | dir3 = '/unknown' 484 | dir4 = '/2018/01/01/' 485 | validate_copy_operation(output_root='output', file_path=dir1, expected_count=3, prefix=prefix, suffix=suffix) 486 | validate_copy_operation(output_root='output', file_path=dir2, expected_count=1, prefix=prefix, suffix=suffix) 487 | validate_copy_operation(output_root='output', file_path=dir3, expected_count=1, prefix=prefix, suffix=suffix) 488 | validate_copy_operation(output_root='output', file_path=dir4, expected_count=1, prefix=prefix, suffix=suffix) 489 | 490 | 491 | def validate_copy_operation(output_root, file_path, expected_count, prefix=None, suffix=None): 492 | path = [p for p in [output_root, prefix, file_path, suffix] if p is not None] 493 | fullpath = os.path.normpath(os.path.sep.join(path)) 494 | assert os.path.isdir(fullpath) 495 | assert len([name for name in os.listdir(fullpath) if 496 | os.path.isfile(os.path.join(fullpath, name))]) == expected_count 497 | 498 | 499 | def test_no_exif_directory(): 500 | shutil.rmtree('output', ignore_errors=True) 501 | Phockup('input', 'output', no_date_dir='misc') 502 | dir1 = 'output/2017/01/01' 503 | dir2 = 'output/2017/10/06' 504 | dir3 = 'output/unknown' 505 | dir4 = 'output/2018/01/01/' 506 | assert os.path.isdir(dir1) 507 | assert os.path.isdir(dir2) 508 | assert not os.path.isdir(dir3) 509 | assert os.path.isdir(dir4) 510 | shutil.rmtree('output', ignore_errors=True) 511 | 512 | 513 | def test_skip_unknown(): 514 | shutil.rmtree('output', ignore_errors=True) 515 | Phockup('input', 'output', skip_unknown=True) 516 | dir1 = 'output/2017/01/01' 517 | dir2 = 'output/2017/10/06' 518 | dir3 = 'output/misc' 519 | dir4 = 'output/2018/01/01/' 520 | assert os.path.isdir(dir1) 521 | assert os.path.isdir(dir2) 522 | # No files should exist in this directory becaues they were skipped 523 | assert not os.path.isdir(dir3) 524 | assert os.path.isdir(dir4) 525 | assert len([name for name in os.listdir(dir1) if 526 | os.path.isfile(os.path.join(dir1, name))]) == 3 527 | assert len([name for name in os.listdir(dir2) if 528 | os.path.isfile(os.path.join(dir2, name))]) == 1 529 | assert len([name for name in os.listdir(dir4) if 530 | os.path.isfile(os.path.join(dir4, name))]) == 1 531 | shutil.rmtree('output', ignore_errors=True) 532 | 533 | 534 | def test_from_date(): 535 | shutil.rmtree('output', ignore_errors=True) 536 | Phockup('input', 'output', from_date="2017-10-06") 537 | dir1 = 'output/2017/01/01' 538 | dir2 = 'output/2017/10/06' 539 | dir3 = 'output/unknown' 540 | dir4 = 'output/2018/01/01/' 541 | assert os.path.isdir(dir1) 542 | assert os.path.isdir(dir2) 543 | assert os.path.isdir(dir3) 544 | assert os.path.isdir(dir4) 545 | assert len([name for name in os.listdir(dir1) if 546 | os.path.isfile(os.path.join(dir1, name))]) == 0 547 | assert len([name for name in os.listdir(dir2) if 548 | os.path.isfile(os.path.join(dir2, name))]) == 1 549 | assert len([name for name in os.listdir(dir3) if 550 | os.path.isfile(os.path.join(dir3, name))]) == 1 551 | assert len([name for name in os.listdir(dir4) if 552 | os.path.isfile(os.path.join(dir4, name))]) == 1 553 | shutil.rmtree('output', ignore_errors=True) 554 | 555 | 556 | def test_to_date(): 557 | shutil.rmtree('output', ignore_errors=True) 558 | Phockup('input', 'output', to_date="2017-10-06", progress=True) 559 | dir1 = 'output/2017/01/01' 560 | dir2 = 'output/2017/10/06' 561 | dir3 = 'output/unknown' 562 | dir4 = 'output/2018/01/01/' 563 | assert os.path.isdir(dir1) 564 | assert os.path.isdir(dir2) 565 | assert os.path.isdir(dir3) 566 | assert os.path.isdir(dir4) 567 | assert len([name for name in os.listdir(dir1) if 568 | os.path.isfile(os.path.join(dir1, name))]) == 3 569 | assert len([name for name in os.listdir(dir2) if 570 | os.path.isfile(os.path.join(dir2, name))]) == 1 571 | assert len([name for name in os.listdir(dir3) if 572 | os.path.isfile(os.path.join(dir3, name))]) == 1 573 | assert len([name for name in os.listdir(dir4) if 574 | os.path.isfile(os.path.join(dir4, name))]) == 0 575 | shutil.rmtree('output', ignore_errors=True) 576 | 577 | 578 | def test_from_date_to_date(): 579 | shutil.rmtree('output', ignore_errors=True) 580 | Phockup('input', 'output', to_date="2017-10-06", from_date="2017-01-02", progress=True) 581 | dir1 = 'output/2017/01/01' 582 | dir2 = 'output/2017/10/06' 583 | dir3 = 'output/unknown' 584 | dir4 = 'output/2018/01/01/' 585 | assert os.path.isdir(dir1) 586 | assert os.path.isdir(dir2) 587 | assert os.path.isdir(dir3) 588 | assert os.path.isdir(dir4) 589 | assert len([name for name in os.listdir(dir1) if 590 | os.path.isfile(os.path.join(dir1, name))]) == 0 591 | assert len([name for name in os.listdir(dir2) if 592 | os.path.isfile(os.path.join(dir2, name))]) == 1 593 | assert len([name for name in os.listdir(dir3) if 594 | os.path.isfile(os.path.join(dir3, name))]) == 1 595 | assert len([name for name in os.listdir(dir4) if 596 | os.path.isfile(os.path.join(dir4, name))]) == 0 597 | shutil.rmtree('output', ignore_errors=True) 598 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 88 3 | ignore = E203 E501 W503 E126 4 | --------------------------------------------------------------------------------