├── .gitignore ├── CHANGES.txt ├── INSTALL.txt ├── ISSUES.txt ├── LICENSE.txt ├── Makefile ├── README.md ├── README.stupid_dupes ├── act_dedupefiles.c ├── act_dedupefiles.h ├── act_deletefiles.c ├── act_deletefiles.h ├── act_linkfiles.c ├── act_linkfiles.h ├── act_printjson.c ├── act_printjson.h ├── act_printmatches.c ├── act_printmatches.h ├── act_summarize.c ├── act_summarize.h ├── args.c ├── args.h ├── build_msvc.bat ├── checks.c ├── checks.h ├── chroot_build.sh ├── compare_jdupes.sh ├── dumpflags.c ├── dumpflags.h ├── example_scripts ├── delete_but_exclude_nonposix.sh ├── delete_but_exclude_posix.sh ├── example.sh └── fdupes_oneline.sh ├── extfilter.c ├── extfilter.h ├── filehash.c ├── filehash.h ├── filestat.c ├── filestat.h ├── generate_packages.sh ├── hashdb.c ├── hashdb.h ├── hashdb_util.c ├── helptext.c ├── helptext.h ├── icon ├── icon_jdupes_256.ico ├── icon_jdupes_256.xcf ├── icon_jdupes_all.ico ├── icon_jdupes_all.xcf └── jdupes.png ├── interrupt.c ├── interrupt.h ├── jdupes.1 ├── jdupes.c ├── jdupes.h ├── libjodycode_check.c ├── libjodycode_check.h ├── likely_unlikely.h ├── linux-dedupe-static.h ├── loaddir.c ├── loaddir.h ├── match.c ├── match.h ├── progress.c ├── progress.h ├── remove_hashdb_dead_entries.sh ├── sort.c ├── sort.h ├── stupid_dupes.sh ├── test.sh ├── testdir ├── .hidden_dir │ └── hiddendir_two ├── .hidden_two ├── Stilltinydupe1 ├── Tinydupe3 ├── Zero_C ├── atinydupe0 ├── block_size_tests │ ├── 4095b_file1 │ ├── 4095b_file2 │ ├── 4096b_file1 │ ├── 4096b_file2 │ ├── 4097b_file1 │ └── 4097b_file2 ├── extensions │ ├── fake_doc_001.doc │ ├── fake_doc_002.doc │ ├── fake_mp3_001.mp3 │ ├── fake_mp3_002.mp3 │ ├── fake_mp4_001.mp4 │ └── fake_mp4_002.mp4 ├── isolate │ ├── 1 │ │ ├── 1.txt │ │ └── 2.txt │ ├── 2 │ │ ├── 3.txt │ │ └── 4.txt │ ├── 3 │ │ ├── 5.txt │ │ ├── 6.txt │ │ └── 7.txt │ └── 4 │ │ └── 8.txt ├── larger_file_1 ├── larger_file_2 ├── larger_file_3 ├── larger_file_4 ├── nine_upsidedown ├── notsotinydupe1 ├── notsotinydupe2 ├── numeric_sort_copysuffixes │ ├── file1-0 (1).jpg │ ├── file1-0#1.jpg │ ├── file1-0.jpg │ ├── file1-1 (Copy) (2) (2).jpg │ ├── file1-1 (Copy) (2).jpg │ ├── file1-1 (Copy).jpg │ ├── file1-1.jpg │ ├── file1-10.jpg │ └── file1-2.jpg ├── numeric_sort_digitsafter │ ├── file001 │ ├── file001a │ ├── file002 │ ├── file020 │ ├── file021 │ ├── file030 │ ├── file1 │ ├── file10 │ ├── file100 │ ├── file10a │ ├── file1a2 │ ├── file2 │ └── file3 ├── numeric_sort_startwithzero │ ├── 00file4 │ ├── 00file5 │ ├── 00file5a │ ├── 01file4 │ ├── 0file1 │ ├── 0file2 │ ├── 0file3 │ ├── 1file1 │ ├── 1file2 │ └── 1file3 ├── recursed_a │ ├── five │ ├── five_2 │ ├── one │ ├── one_2 │ ├── symlink_infinite_loop │ ├── two │ └── two_2 ├── recursed_b │ ├── four │ ├── one │ ├── three │ └── two_plus_one ├── recursed_c │ ├── five │ ├── level2 │ │ ├── five │ │ ├── one │ │ └── two │ ├── one │ └── two ├── symlink_dir ├── symlink_test │ ├── regular_file │ └── symlinked_file ├── symlink_twice_one ├── symlink_two ├── tinydupe2 ├── tinydupe4 ├── twice_one ├── two ├── unicode_dirnames │ ├── Ελληνιά │ │ └── Unicode testfile.txt │ ├── до свидания │ │ └── Unicode testfile.txt │ ├── दसविदानिया │ │ └── Unicode testfile.txt │ ├── 怖い │ │ └── Unicode testfile.txt │ └── 행운을 빈다 │ │ └── Unicode testfile.txt ├── unicode_filenames │ ├── cassé │ ├── Ελληνιά │ ├── до свидания │ ├── दसविदानिया │ ├── 怖い │ └── 행운을 빈다 ├── with spaces a ├── with spaces b ├── zero_a └── zero_b ├── travcheck.c ├── travcheck.h ├── tune_winres.sh ├── version.h ├── winres.manifest.xml ├── winres.rc ├── winres_xp.rc ├── xxhash.c └── xxhash.h /.gitignore: -------------------------------------------------------------------------------- 1 | # 2 | # Build ignores 3 | # 4 | #.* 5 | *.o 6 | *.o.* 7 | *.obj 8 | *.exe 9 | *.a 10 | *.so 11 | *.so.* 12 | *.1.gz 13 | 14 | # 15 | # Never ignore these 16 | # 17 | !.gitignore 18 | 19 | # 20 | # Normal output and testing dirs 21 | # 22 | /build_date.h 23 | /hashdb_util 24 | /hashdb_util*.exe 25 | /jdupes 26 | /jdupes*.exe 27 | /jdupes-standalone 28 | /jdupes-*-*/ 29 | /jdupes-*-*.zip 30 | /jdupes_hashdb.txt 31 | /*.pkg.tar.* 32 | test_temp 33 | output.log 34 | 35 | # 36 | # Backups / patches 37 | # 38 | *~ 39 | *.orig 40 | *.rej 41 | /*.patch 42 | 43 | # 44 | # debugging and editor stuff 45 | # 46 | core 47 | .gdb_history 48 | .gdbinit 49 | .*.swp 50 | *.gcda 51 | *.gcno 52 | *.gcov 53 | cachegrind.out.* 54 | 55 | # Mac OS 56 | .DS_Store 57 | -------------------------------------------------------------------------------- /INSTALL.txt: -------------------------------------------------------------------------------- 1 | Building and Installing jdupes 2 | ----------------------------------------------------------------------------- 3 | You must have libjodycode to build and run jdupes. Install the libjodycode 4 | library and development headers from your distribution package manager. These 5 | will probably be called "libjodycode" and "libjodycode-dev". 6 | 7 | If your distribution doesn't have libjodycode, get it here: 8 | 9 | https://codeberg.org/jbruchon/libjodycode 10 | 11 | If you clone and build libjodycode into ../libjodycode/ then the build will use 12 | the header and library in that location instead of the libjodycode already 13 | installed on the system. You can then test the dynamically linked jdupes by 14 | telling it where to look: 15 | 16 | LD_LIBRARY_PATH=../libjodycode ./jdupes 17 | 18 | To install the program with the default options and flags, just issue the 19 | following commands: 20 | 21 | make 22 | sudo make install 23 | 24 | This installs all jdupes files under /usr/local by default. You may change this 25 | to a different location by editing the Makefile or specifying a PREFIX on the 26 | command line: 'make PREFIX=/usr install'. DESTDIR is also supported to place 27 | everything into a different location for i.e. building distribution packages: 28 | 'make DESTDIR=/pkg/jdupes install'. There is also a package generation script 29 | 'generate_packages.sh' which tries to make an archive file containing all of 30 | the necessary files for distribution; for Linux you can also tell it which 31 | x86 architecture you want explicitly (i686, x32, x86_64): 32 | 'make package ARCH=x32" 33 | 34 | To force static or dynamic linking of libjodycode, use the make targets 35 | static_jc and dynamic_jc respectively. On Windows you must tell Make to use 36 | the DLL version explicitly: 'make FORCE_JC_DLL=1' (ld on Windows doesn't seem 37 | to respect -Wl,-Bdynamic properly). 38 | 39 | On Windows, you need to use MSYS2 with MinGW-w64 installed. Use this guide to 40 | install the build environment: 41 | 42 | https://stackoverflow.com/a/30071634/1906641 43 | 44 | Running "make" as usual under the MSYS2 mingw32/mingw64 terminal will build a 45 | Windows binary for the bitness of the terminal you're using. The Makefile will 46 | detect a Windows environment and automatically make the needed build changes. 47 | 48 | Various build options are available and can be turned on at compile time by 49 | setting CFLAGS_EXTRA or by passing it to 'make': 50 | 51 | make CFLAGS_EXTRA=-DYOUR_OPTION 52 | make CFLAGS_EXTRA='-DYOUR_OPTION_ONE -DYOUR_OPTION_TWO' 53 | 54 | This is a list of options that can be "turned on" this way: 55 | 56 | ON_WINDOWS Modify code to compile with MinGW on Windows 57 | NO_WINDOWS Disable Windows MinGW special cases (mainly for Cygwin) 58 | NO_ATIME Disable all access time features 59 | NO_CHUNKSIZE Disable auto I/O chunk sizing code and -C option 60 | NO_DELETE Disable deletion -d, -N 61 | NO_ERRORONDUPE Disable error exit on first dupe found -E 62 | NO_EXTFILTER Disable extended filter -X 63 | NO_GETOPT_LONG Disable getopt_long() (long options will not work) 64 | NO_HARDLINKS Disable hard link code -L, -H 65 | NO_HASHDB Disable hash cache database feature -y 66 | NO_HELPTEXT Disable all help text and almost all version text 67 | NO_NUMSORT Disable numerically correct case-ignored symbols-last sort 68 | NO_JSON Disable JSON output -j 69 | NO_MTIME Disable all modify time features 70 | NO_PERMS Disable permission matching -p 71 | NO_SYMLINKS Disable symbolic link code -l, -s 72 | NO_TRAVCHECK Disable double-traversal safety code (-U always on) 73 | NO_USER_ORDER Disable isolation and parameter sort order -I, -O 74 | 75 | Certain options can be turned on by setting a variable passed to make instead 76 | of using CFLAGS_EXTRA, i.e. 'make DEBUG=1': 77 | 78 | NO_UNICODE [Windows only] disable all Unicode support 79 | DEBUG Turn on algorithm statistic reporting with '-D' 80 | LOUD '-@' for low-level debugging; enables DEBUG 81 | ENABLE_DEDUPE Enable '-B' deduplication (Linux/macOS: on by default) 82 | DISABLE_DEDUPE Forcibly disable (undefine) ENABLE_DEDUPE 83 | STATIC_DEDUPE_H Build dedupe support with included minimal header file 84 | LOW_MEMORY Build for extremely low-RAM environments (CAUTION!) 85 | BARE_BONES Build LOW_MEMORY with very aggressive code removal 86 | USE_JODY_HASH Use jody_hash instead of xxHash64 (smaller, slower) 87 | EXTENRAL_HASH_LIB Force hash code to be linked in externally (no build) 88 | FORCE_JC_DLL Windows only: force linking to nearby libjodycode.dll 89 | IGNORE_NEARBY_JC Do NOT use libjodycode at ../libjodycode if it exists 90 | GC_SECTIONS Use gcc/ld section garbage collection to reduce size 91 | 92 | EXTERNAL_HASH_LIB will build jdupes with the interface code for the chosen hash 93 | type (xxhash vs jody_hash) but will suppress building the actual code. This is 94 | intended for use by OS distributions to use a shared library. You will need to 95 | link against the shared library on your own (i.e. LDFLAGS=-lxxhash make). 96 | 97 | The LOW_MEMORY option tweaks various knobs in the program to lower total 98 | memory usage. It also disables some features to reduce the size of certain 99 | data structures. The improvements in memory usage are not very large, but if 100 | you're running in a very RAM-limited environment or have a CPU with very small 101 | caches it may be a good choice. This is primarily meant for use in embedded 102 | systems and should not be used unless you know what you are doing. 103 | 104 | The BARE_BONES option sets LOW_MEMORY and also enables code removals that are 105 | extremely aggressive, to the point that what some might consider fundamental 106 | capabilities and safety features are completely stripped out, inclduing the 107 | NO_DELETE and NO_TRAVCHECK options. It is intended to reduce the program to 108 | the most basic functionality expected of a simple duplicate finder and may 109 | be suitable for embedded systems with extremely limited storage and memory. 110 | This is an example of the size drop for a normal 64-bit Windows build: 111 | 112 | text data bss total filename 113 | 128384 6988 167776 303148 jdupes.exe 114 | 74248 6348 29888 110484 jdupes-barebones.exe 115 | ------------------------------- 116 | - 54136 640 137888 192664 size reduction (64% smaller) 117 | 118 | A test directory is included so that you may familiarize yourself with the way 119 | jdupes operates. You may test the program before installing it by issuing a 120 | command such as "./jdupes testdir" or "./jdupes -r testdir", just to name a 121 | couple of examples. See the README for information on valid options. 122 | 123 | A comparison shell script is also included. It will run your natively 124 | installed 'jdupes' or 'jdupes' with the directories and extra options you 125 | specify and compare the run times and output a 'diff' of the two program 126 | outputs. Unless the core algorithm or sort behavior is changed, both programs 127 | should produce identical outputs and the 'diff' output shouldn't appear at 128 | all. To use it, type: 129 | 130 | ./compare_jdupes.sh [options] 131 | 132 | There are some package generators included as make targets: 133 | 134 | chrootpackage Uses chroots under /chroot to build Linux packages 135 | package Makes auto-detected macOS/Linux/Windows packages 136 | 137 | If you have a multilib compiler for x86_64 you can specify an architecture to 138 | make packages for: 'ARCH=xxx make package' where xxx is i386, x86_64, or x32. 139 | -------------------------------------------------------------------------------- /ISSUES.txt: -------------------------------------------------------------------------------- 1 | This is a list of issues created from the defunct GitHub repo's issue tracker. 2 | 3 | 4 | Interactive deleting mode behaviour (-d) {delete is not respecting ranges with and without spaces} 5 | #269 opened Oct 4, 2023 by amalgame21 6 | 7 | "in X specified" off by one 8 | #258 opened Aug 24, 2023 by odwulf 9 | 10 | completely silent output [feature request] 11 | #257 opened Aug 24, 2023 by 07416 12 | 13 | Feature request: Extend jdupes command-line syntax to accept individual files [feature request] [high priority] 14 | #256 opened Aug 19, 2023 by cfgnunes 15 | 16 | Enhancement: check if target file is already reflinked [feature request] 17 | #245 opened Jun 10, 2023 by Forza-tng 18 | 19 | Enhancement: Print progress during dedupe [feature request] 20 | #244 opened Jun 10, 2023 by Forza-tng 21 | 22 | Would be good to have more user documentasion (I would be prepared to write it) 23 | #242 opened Jun 4, 2023 by ben-tvpp 24 | 25 | Limit hard link counts on command line [feature request] 26 | #241 opened Jun 4, 2023 by leahneukirchen 27 | 28 | Make jdupes aware of resource forks [feature request] 29 | #233 opened Mar 21, 2023 by aaronpriven 30 | 31 | Add option to move duplicates to a different directory instead of deleting them [feature request] 32 | #230 opened Feb 26, 2023 by STrRedWolf 33 | 34 | feature request, mode where duplicates are only found between given dirs, but not withen the same given dirs [feature request] 35 | #220 opened Nov 9, 2022 by calestyo 36 | 37 | Finding / Listing only hard linked duplicates [feature request] 38 | #206 opened May 22, 2022 by Zocker1999NET 39 | 40 | Skip byte-by-byte comparison iff filenames match [feature request] 41 | #198 opened Jan 1, 2022 by hollymcr 42 | 43 | How to build on Windows? Why are dedupe and symlink turned off by default? [feature request] [question] 44 | #194 opened Dec 21, 2021 by kwencel 45 | 46 | option to prevent change of atime? [feature request] 47 | #193 opened Dec 13, 2021 by m95341175 48 | 49 | Folder modification dates with option -B (tested on Mac OS) [feature request] 50 | #187 opened Sep 17, 2021 by cameo69 51 | 52 | Set order by full path depth? [feature request] 53 | #184 opened Aug 6, 2021 by dzg 54 | 55 | exclude duplicates with different names [feature request] 56 | #170 opened May 4, 2021 by madsurgeon 57 | 58 | Hard linking needs to link symlink targets when used with -s (not the symlinks themselves) [bug] [high priority] 59 | #165 opened Feb 14, 2021 by Chaz6 60 | 61 | Make behavior consistent across invocations by pre-sorting incoming files/directories [bug] [high priority] 62 | #152 opened Nov 25, 2020 by jbruchon 63 | 64 | Add sorting by size [feature request] 65 | #150 opened Nov 15, 2020 by FabioPedretti 66 | 67 | Find duplicates by name only [feature request] 68 | #128 opened Apr 9, 2020 by nodecentral 69 | 70 | Compare "source" files/dirs against "destination" ones and only act on the source [feature request] 71 | #121 opened Jan 10, 2020 by nodecentral 72 | 73 | Feature: break (undo) hard links and symlinks [feature request] 74 | #99 opened Apr 25, 2019 by jbruchon 75 | 76 | Test suite? [feature request] 77 | #86 opened Aug 6, 2018 by Freaky 78 | 79 | Feature: delete directories that were emptied out after deletion [feature request] 80 | #78 opened May 14, 2018 by jbruchon 81 | 82 | Add ability to move or copy duplicates to a different directory [feature request] 83 | #73 opened Feb 15, 2018 by jbruchon 84 | 85 | Add ability to specify maximum recursion depth [feature request] 86 | #72 opened Jan 16, 2018 by jbruchon 87 | 88 | Include/exclude by regular expression [feature request] 89 | #71 opened Jan 14, 2018 by jbruchon 90 | 91 | Feature: detecting duplicate directories 92 | #68 opened Dec 16, 2017 by ivanperez-keera 93 | 94 | Duplicate scan between two separate hosts "rsync style" [feature request] 95 | #67 opened Dec 7, 2017 by jbruchon 96 | 97 | Add option to not follow symlinks passed as parameters [feature request] 98 | #65 opened Dec 3, 2017 by jbruchon 99 | 100 | Accept a list of files/directories from stdin for processing [feature request] 101 | #64 opened Nov 21, 2017 by jbruchon 102 | 103 | Interactive mode for links and dedupes [feature request] 104 | #51 opened Jun 17, 2017 by anfractuosity 105 | 106 | Make -m hard link aware and clarify what "occupying X bytes" means [bug] 107 | #43 opened Mar 21, 2017 by mmitch 108 | 109 | Output sort by hard link reference count [feature request] 110 | #42 opened Mar 21, 2017 by mmitch 111 | 112 | Add control over the "triangle problem" [feature request] 113 | #26 opened Nov 3, 2016 by jbruchon 114 | 115 | Keep only the longest or shortest filename [feature request] 116 | #10 opened Mar 6, 2016 by jbruchon 117 | 118 | Exclusion of selected directories from automatic deletion [feature request] 119 | #8 opened Feb 19, 2016 by meoso 120 | 121 | Auto-delete only if file basenames identical (otherwise prompt) [feature request] 122 | #6 opened Jan 26, 2016 by igittigitthub 123 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2015-2023 Jody Lee Bruchon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # jdupes Makefile 2 | 3 | # Default flags to pass to the C compiler (can be overridden) 4 | CFLAGS ?= -O2 -g 5 | 6 | # PREFIX determines where files will be installed. Common examples 7 | # include "/usr" or "/usr/local". 8 | PREFIX = /usr/local 9 | 10 | # PROGRAM_NAME determines the installation name and manual page name 11 | PROGRAM_NAME = jdupes 12 | 13 | # BIN_DIR indicates directory where program is to be installed. 14 | # Suggested value is "$(PREFIX)/bin" 15 | BIN_DIR = $(PREFIX)/bin 16 | 17 | # MAN_DIR indicates directory where the jdupes man page is to be 18 | # installed. Suggested value is "$(PREFIX)/man/man1" 19 | MAN_BASE_DIR = $(PREFIX)/share/man 20 | MAN_DIR = $(MAN_BASE_DIR)/man1 21 | MAN_EXT = 1 22 | 23 | # Required external tools 24 | CC ?= gcc 25 | INSTALL = install 26 | RM = rm -f 27 | RMDIR = rmdir -p 28 | MKDIR = mkdir -p 29 | INSTALL_PROGRAM = $(INSTALL) -m 0755 30 | INSTALL_DATA = $(INSTALL) -m 0644 31 | 32 | # Main object files 33 | OBJS += hashdb.o 34 | OBJS += args.o checks.o dumpflags.o extfilter.o filehash.o filestat.o jdupes.o helptext.o 35 | OBJS += interrupt.o libjodycode_check.o loaddir.o match.o progress.o sort.o travcheck.o 36 | OBJS += act_deletefiles.o act_linkfiles.o act_printmatches.o act_summarize.o act_printjson.o 37 | 38 | # Configuration section 39 | COMPILER_OPTIONS = -Wall -Wwrite-strings -Wcast-align -Wstrict-aliasing -Wstrict-prototypes -Wpointer-arith -Wundef 40 | COMPILER_OPTIONS += -Wshadow -Wfloat-equal -Waggregate-return -Wcast-qual -Wswitch-default -Wswitch-enum -Wunreachable-code -Wformat=2 41 | COMPILER_OPTIONS += -std=gnu11 -D_FILE_OFFSET_BITS=64 -fstrict-aliasing -pipe 42 | COMPILER_OPTIONS += -DNO_ATIME 43 | 44 | # Remove unused code if requested 45 | ifdef GC_SECTIONS 46 | COMPILER_OPTIONS += -fdata-sections -ffunction-sections 47 | LINK_OPTIONS += -Wl,--gc-sections 48 | endif 49 | 50 | 51 | # Bare-bones mode (for the adventurous lunatic) - includes all LOW_MEMORY options 52 | ifdef BARE_BONES 53 | LOW_MEMORY = 1 54 | COMPILER_OPTIONS += -DNO_DELETE -DNO_TRAVCHECK -DBARE_BONES -DNO_ERRORONDUPE 55 | COMPILER_OPTIONS += -DNO_HASHDB -DNO_HELPTEXT -DCHUNK_SIZE=4096 -DPATHBUF_SIZE=1024 56 | endif 57 | 58 | # Low memory mode 59 | ifdef LOW_MEMORY 60 | USE_JODY_HASH = 1 61 | DISABLE_DEDUPE = 1 62 | override undefine ENABLE_DEDUPE 63 | COMPILER_OPTIONS += -DLOW_MEMORY 64 | COMPILER_OPTIONS += -DNO_HARDLINKS -DNO_SYMLINKS -DNO_USER_ORDER -DNO_PERMS 65 | COMPILER_OPTIONS += -DNO_ATIME -DNO_JSON -DNO_EXTFILTER -DNO_CHUNKSIZE 66 | ifndef BARE_BONES 67 | COMPILER_OPTIONS += -DCHUNK_SIZE=16384 68 | endif 69 | endif 70 | 71 | 72 | UNAME_S=$(shell uname -s) 73 | 74 | # Are we running on a Windows OS? 75 | ifeq ($(OS), Windows_NT) 76 | ifndef NO_WINDOWS 77 | ON_WINDOWS=1 78 | endif 79 | endif 80 | 81 | # Debugging code inclusion 82 | ifdef LOUD 83 | DEBUG=1 84 | COMPILER_OPTIONS += -DLOUD_DEBUG 85 | endif 86 | ifdef DEBUG 87 | COMPILER_OPTIONS += -DDEBUG 88 | else 89 | COMPILER_OPTIONS += -DNDEBUG 90 | endif 91 | ifdef HARDEN 92 | COMPILER_OPTIONS += -Wformat -Wformat-security -D_FORTIFY_SOURCE=2 -fstack-protector-strong -fPIE -fpie -Wl,-z,relro -Wl,-z,now 93 | endif 94 | 95 | # MinGW needs this for printf() conversions to work 96 | ifdef ON_WINDOWS 97 | ifndef NO_UNICODE 98 | UNICODE=1 99 | COMPILER_OPTIONS += -municode 100 | endif 101 | SUFFIX=.exe 102 | SO_EXT=.dll 103 | LIB_EXT=.lib 104 | # COMPILER_OPTIONS += -D__USE_MINGW_ANSI_STDIO=1 105 | COMPILER_OPTIONS += -DON_WINDOWS=1 106 | ifeq ($(UNAME_S), MINGW32_NT-5.1) 107 | OBJS += winres_xp.o 108 | else 109 | OBJS += winres.o 110 | endif 111 | override undefine ENABLE_DEDUPE 112 | DISABLE_DEDUPE = 1 113 | else 114 | SO_EXT=.so 115 | LIB_EXT=.a 116 | endif 117 | 118 | # Don't use unsupported compiler options on gcc 3/4 (Mac OS X 10.5.8 Xcode) 119 | # ENABLE_DEDUPE by default - macOS Sierra 10.12 and up required 120 | ifeq ($(UNAME_S), Darwin) 121 | GCCVERSION = $(shell expr `LC_ALL=C gcc -v 2>&1 | grep '[cn][cg] version' | sed 's/[^0-9]*//;s/[ .].*//'` \>= 5) 122 | ifndef DISABLE_DEDUPE 123 | ENABLE_DEDUPE = 1 124 | endif 125 | else 126 | GCCVERSION = 1 127 | BDYNAMIC = -Wl,-Bdynamic 128 | BSTATIC = -Wl,-Bstatic 129 | endif 130 | 131 | ifeq ($(GCCVERSION), 1) 132 | COMPILER_OPTIONS += -Wextra -Wstrict-overflow=5 -Winit-self 133 | endif 134 | 135 | # Use jody_hash instead of xxHash if requested 136 | ifdef USE_JODY_HASH 137 | COMPILER_OPTIONS += -DUSE_JODY_HASH -DNO_XXHASH2 138 | OBJS_CLEAN += xxhash.o 139 | else 140 | ifndef EXTERNAL_HASH_LIB 141 | OBJS += xxhash.o 142 | endif 143 | endif # USE_JODY_HASH 144 | 145 | # Stack size limit can be too small for deep directory trees, so set to 16 MiB 146 | # The ld syntax for Windows is the same for both Cygwin and MinGW 147 | ifndef LOW_MEMORY 148 | ifeq ($(OS), Windows_NT) 149 | COMPILER_OPTIONS += -Wl,--stack=16777216 150 | else ifeq ($(UNAME_S), Darwin) 151 | COMPILER_OPTIONS += -Wl,-stack_size -Wl,0x1000000 152 | else 153 | COMPILER_OPTIONS += -Wl,-z,stack-size=16777216 154 | endif 155 | endif 156 | 157 | # Don't do clonefile on Mac OS X < 10.13 (High Sierra) 158 | ifeq ($(UNAME_S), Darwin) 159 | DARWINVER := $(shell expr `uname -r | cut -d. -f1` \< 17) 160 | ifeq "$(DARWINVER)" "1" 161 | COMPILER_OPTIONS += -DNO_CLONEFILE=1 162 | endif 163 | endif 164 | 165 | ### Dedupe feature stuff (BTRFS, XFS, APFS) 166 | 167 | # ENABLE_DEDUPE should be ON by default for Linux 168 | ifeq ($(UNAME_S), Linux) 169 | ifndef DISABLE_DEDUPE 170 | ENABLE_DEDUPE = 1 171 | endif 172 | endif 173 | 174 | # Allow forced override of ENABLE_DEDUPE 175 | ifdef DISABLE_DEDUPE 176 | override undefine ENABLE_DEDUPE 177 | override undefine STATIC_DEDUPE_H 178 | endif 179 | 180 | # Catch someone trying to enable dedupe in flags and turn on ENABLE_DEDUPE 181 | ifneq (,$(findstring DENABLE_DEDUPE,$(CFLAGS) $(CFLAGS_EXTRA))) 182 | ENABLE_DEDUPE = 1 183 | $(warn Do not enable dedupe in CFLAGS; use make ENABLE_DEDUPE=1 instead) 184 | ifdef DISABLE_DEDUPE 185 | $(error DISABLE_DEDUPE set but -DENABLE_DEDUPE is in CFLAGS. Choose only one) 186 | endif 187 | endif 188 | 189 | # Actually enable dedupe 190 | ifdef ENABLE_DEDUPE 191 | COMPILER_OPTIONS += -DENABLE_DEDUPE 192 | OBJS += act_dedupefiles.o 193 | else 194 | OBJS_CLEAN += act_dedupefiles.o 195 | endif 196 | ifdef STATIC_DEDUPE_H 197 | COMPILER_OPTIONS += -DSTATIC_DEDUPE_H 198 | endif 199 | 200 | 201 | ### Find and use nearby libjodycode by default 202 | ifndef IGNORE_NEARBY_JC 203 | ifneq ("$(wildcard ../libjodycode/libjodycode.h)","") 204 | $(info Found and using nearby libjodycode at ../libjodycode) 205 | COMPILER_OPTIONS += -I../libjodycode -L../libjodycode 206 | ifeq ("$(wildcard ../libjodycode/version.o)","") 207 | $(error You must build libjodycode before building jdupes) 208 | endif 209 | endif 210 | STATIC_LDFLAGS += ../libjodycode/libjodycode$(LIB_EXT) 211 | ifdef ON_WINDOWS 212 | DYN_LDFLAGS += -l:../libjodycode/libjodycode$(SO_EXT) 213 | else 214 | DYN_LDFLAGS += -ljodycode 215 | endif 216 | endif 217 | 218 | 219 | CFLAGS += $(COMPILER_OPTIONS) $(CFLAGS_EXTRA) 220 | LDFLAGS += $(LINK_OPTIONS) $(LDFLAGS_EXTRA) 221 | 222 | 223 | all: libjodycode_hint $(PROGRAM_NAME) dynamic_jc 224 | 225 | hashdb_util: hashdb.o hashdb_util.o 226 | $(CC) $(CFLAGS) hashdb.o hashdb_util.o $(LDFLAGS) $(STATIC_LDFLAGS) $(BDYNAMIC) -o hashdb_util$(SUFFIX) 227 | 228 | dynamic_jc: $(PROGRAM_NAME) 229 | $(CC) $(CFLAGS) $(OBJS) $(BDYNAMIC) $(LDFLAGS) $(DYN_LDFLAGS) -o $(PROGRAM_NAME)$(SUFFIX) 230 | 231 | static_jc: $(PROGRAM_NAME) 232 | $(CC) $(CFLAGS) $(OBJS) $(LDFLAGS) $(STATIC_LDFLAGS) $(BDYNAMIC) -o $(PROGRAM_NAME)$(SUFFIX) 233 | 234 | static: $(PROGRAM_NAME) 235 | $(CC) $(CFLAGS) $(OBJS) -static $(LDFLAGS) $(STATIC_LDFLAGS) -o $(PROGRAM_NAME)$(SUFFIX) 236 | 237 | static_stripped: $(PROGRAM_NAME) static 238 | -strip $(PROGRAM_NAME)$(SUFFIX) 239 | 240 | $(PROGRAM_NAME): $(OBJS) 241 | : 242 | 243 | winres.o: winres.rc winres.manifest.xml 244 | ./tune_winres.sh 245 | windres winres.rc winres.o 246 | 247 | winres_xp.o: winres_xp.rc 248 | ./tune_winres.sh 249 | windres winres_xp.rc winres_xp.o 250 | 251 | installdirs: 252 | test -e $(DESTDIR)$(BIN_DIR) || $(MKDIR) $(DESTDIR)$(BIN_DIR) 253 | test -e $(DESTDIR)$(MAN_DIR) || $(MKDIR) $(DESTDIR)$(MAN_DIR) 254 | 255 | install: $(PROGRAM_NAME) installdirs 256 | $(INSTALL_PROGRAM) $(PROGRAM_NAME)$(SUFFIX) $(DESTDIR)$(BIN_DIR)/$(PROGRAM_NAME)$(SUFFIX) 257 | $(INSTALL_DATA) $(PROGRAM_NAME).1 $(DESTDIR)$(MAN_DIR)/$(PROGRAM_NAME).$(MAN_EXT) 258 | 259 | uninstalldirs: 260 | -test -e $(DESTDIR)$(BIN_DIR) && $(RMDIR) $(DESTDIR)$(BIN_DIR) 261 | -test -e $(DESTDIR)$(MAN_DIR) && $(RMDIR) $(DESTDIR)$(MAN_DIR) 262 | 263 | uninstall: uninstalldirs 264 | $(RM) $(DESTDIR)$(BIN_DIR)/$(PROGRAM_NAME)$(SUFFIX) 265 | $(RM) $(DESTDIR)$(MAN_DIR)/$(PROGRAM_NAME).$(MAN_EXT) 266 | 267 | test: 268 | ./test.sh 269 | 270 | stripped: $(PROGRAM_NAME) 271 | strip $(PROGRAM_NAME)$(SUFFIX) 272 | 273 | clean: 274 | $(RM) $(OBJS) $(OBJS_CLEAN) build_date.h $(PROGRAM_NAME)$(SUFFIX) hashdb_util$(SUFFIX) *~ .*.un~ *.gcno *.gcda *.gcov *.obj 275 | 276 | distclean: clean 277 | $(RM) -rf *.pkg.tar* jdupes-*-*/ jdupes-*-*.zip 278 | 279 | chrootpackage: 280 | +./chroot_build.sh 281 | 282 | package: 283 | +./generate_packages.sh $(ARCH) 284 | 285 | libjodycode_hint: 286 | $(info hint: if ../libjodycode is built but jdupes won't run, try doing 'make static_jc') 287 | -------------------------------------------------------------------------------- /README.stupid_dupes: -------------------------------------------------------------------------------- 1 | Introduction 2 | ------------------------------------------------------------------------------- 3 | stupid_dupes is a shell script that copies the most basic capabilities of 4 | jdupes. It is inefficient. It barely has enough features to be worthy of 5 | using the word "features" at all. Despite all of that, it's pretty safe 6 | and produces the same simple match set printouts as jdupes. 7 | 8 | This program illustrates how a duplicate scanner works on a basic level. 9 | It has a minimal set of requirements: 10 | 11 | * GNU bash 12 | * find with support for -type and -maxdepth 13 | * stat 14 | * cat 15 | * jodyhash (or any other program that outputs ONLY a hash) 16 | * dd (for partial hashing) 17 | 18 | It's slow. 19 | 20 | Real slow. 21 | 22 | You're welcome. 23 | 24 | Please consider financially supporting continued development of 25 | stupid_dupes (like you'd spend the money so smartly otherwise): 26 | 27 | https://www.subscribestar.com/JodyBruchon 28 | 29 | 30 | Contact information 31 | ------------------------------------------------------------------------------- 32 | For stupid_dupes inquiries, contact Jody Bruchon 33 | and be sure to say something really stupid when you do. 34 | 35 | 36 | Legal information and software license 37 | ------------------------------------------------------------------------------- 38 | Copyright (C) 2020-2023 by Jody Bruchon and contributors 39 | and for some reason Jody is willing to admit to writing it. 40 | 41 | The MIT License 42 | 43 | Permission is hereby granted, free of charge, to any person 44 | obtaining a copy of this software and associated documentation files 45 | (the "Software"), to deal in the Software without restriction, 46 | including without limitation the rights to use, copy, modify, merge, 47 | publish, distribute, sublicense, and/or sell copies of the Software, 48 | and to permit persons to whom the Software is furnished to do so, 49 | subject to the following conditions: 50 | 51 | The above copyright notice and this permission notice shall be 52 | included in all copies or substantial portions of the Software. 53 | 54 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 55 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 56 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 57 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 58 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 59 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 60 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 61 | 62 | -------------------------------------------------------------------------------- /act_dedupefiles.c: -------------------------------------------------------------------------------- 1 | /* Deduplication of files with OS-specific copy-on-write mechanisms 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #include "jdupes.h" 5 | 6 | #ifdef ENABLE_DEDUPE 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "act_dedupefiles.h" 17 | #include "libjodycode.h" 18 | 19 | #ifdef __linux__ 20 | /* Use built-in static dedupe header if requested */ 21 | #ifdef STATIC_DEDUPE_H 22 | #include "linux-dedupe-static.h" 23 | #else 24 | #include 25 | #endif /* STATIC_DEDUPE_H */ 26 | 27 | /* If the Linux headers are too old, automatically use the static one */ 28 | #ifndef FILE_DEDUPE_RANGE_SAME 29 | #warning Automatically enabled STATIC_DEDUPE_H due to insufficient header support 30 | #include "linux-dedupe-static.h" 31 | #endif /* FILE_DEDUPE_RANGE_SAME */ 32 | #include 33 | #define JDUPES_DEDUPE_SUPPORTED 1 34 | #define KERNEL_DEDUP_MAX_SIZE 16777216 35 | /* Error messages */ 36 | static const char s_err_dedupe_notabug[] = "This is not a bug in jdupes; check your file stats/permissions."; 37 | static const char s_err_dedupe_repeated[] = "This verbose error description will not be repeated."; 38 | #endif /* __linux__ */ 39 | 40 | #ifdef __APPLE__ 41 | #ifdef NO_HARDLINKS 42 | #error Hard link support is required for dedupe on macOS but NO_HARDLINKS was set 43 | #endif 44 | #include "act_linkfiles.h" 45 | #define JDUPES_DEDUPE_SUPPORTED 1 46 | #endif 47 | 48 | #ifndef JDUPES_DEDUPE_SUPPORTED 49 | #error Dedupe is only supported on Linux and macOS 50 | #endif 51 | 52 | void dedupefiles(file_t * restrict files) 53 | { 54 | #ifdef __linux__ 55 | struct file_dedupe_range *fdr; 56 | struct file_dedupe_range_info *fdri; 57 | file_t *curfile, *curfile2, *dupefile; 58 | int src_fd; 59 | int err_twentytwo = 0, err_ninetyfive = 0; 60 | uint64_t total_files = 0; 61 | 62 | LOUD(fprintf(stderr, "\ndedupefiles: %p\n", files);) 63 | 64 | fdr = (struct file_dedupe_range *)calloc(1, 65 | sizeof(struct file_dedupe_range) 66 | + sizeof(struct file_dedupe_range_info) + 1); 67 | fdr->dest_count = 1; 68 | fdri = &fdr->info[0]; 69 | for (curfile = files; curfile; curfile = curfile->next) { 70 | /* Skip all files that have no duplicates */ 71 | if (!ISFLAG(curfile->flags, FF_HAS_DUPES)) continue; 72 | CLEARFLAG(curfile->flags, FF_HAS_DUPES); 73 | 74 | /* For each duplicate list head, handle the duplicates in the list */ 75 | curfile2 = curfile; 76 | src_fd = open(curfile->d_name, O_RDONLY); 77 | /* If an open fails, keep going down the dupe list until it is exhausted */ 78 | while (src_fd == -1 && curfile2->duplicates && curfile2->duplicates->duplicates) { 79 | fprintf(stderr, "dedupe: open failed (skipping): %s\n", curfile2->d_name); 80 | exit_status = EXIT_FAILURE; 81 | curfile2 = curfile2->duplicates; 82 | src_fd = open(curfile2->d_name, O_RDONLY); 83 | } 84 | if (src_fd == -1) continue; 85 | printf(" [SRC] %s\n", curfile2->d_name); 86 | 87 | /* Run dedupe for each set */ 88 | for (dupefile = curfile->duplicates; dupefile; dupefile = dupefile->duplicates) { 89 | off_t remain; 90 | int err; 91 | 92 | /* Don't pass hard links to dedupe */ 93 | if (dupefile->device == curfile->device && dupefile->inode == curfile->inode) { 94 | printf(" -==-> %s\n", dupefile->d_name); 95 | continue; 96 | } 97 | 98 | /* Open destination file, skipping any that fail */ 99 | fdri->dest_fd = open(dupefile->d_name, O_RDONLY); 100 | if (fdri->dest_fd == -1) { 101 | fprintf(stderr, "dedupe: open failed (skipping): %s\n", dupefile->d_name); 102 | exit_status = EXIT_FAILURE; 103 | continue; 104 | } 105 | 106 | /* Dedupe src <--> dest, 16 MiB or less at a time */ 107 | remain = dupefile->size; 108 | fdri->status = FILE_DEDUPE_RANGE_SAME; 109 | /* Consume data blocks until no data remains */ 110 | while (remain) { 111 | errno = 0; 112 | fdr->src_offset = (uint64_t)(dupefile->size - remain); 113 | fdri->dest_offset = fdr->src_offset; 114 | fdr->src_length = (uint64_t)(remain <= KERNEL_DEDUP_MAX_SIZE ? remain : KERNEL_DEDUP_MAX_SIZE); 115 | ioctl(src_fd, FIDEDUPERANGE, fdr); 116 | if (fdri->status < 0) break; 117 | remain -= (off_t)fdr->src_length; 118 | } 119 | 120 | /* Handle any errors */ 121 | err = fdri->status; 122 | if (err != FILE_DEDUPE_RANGE_SAME || errno != 0) { 123 | printf(" -XX-> %s\n", dupefile->d_name); 124 | fprintf(stderr, "error: "); 125 | if (err == FILE_DEDUPE_RANGE_DIFFERS) { 126 | fprintf(stderr, "not identical (files modified between scan and dedupe?)\n"); 127 | exit_status = EXIT_FAILURE; 128 | } else if (err != 0) { 129 | fprintf(stderr, "%s (%d)\n", strerror(-err), err); 130 | exit_status = EXIT_FAILURE; 131 | } else if (errno != 0) { 132 | fprintf(stderr, "%s (%d)\n", strerror(errno), errno); 133 | exit_status = EXIT_FAILURE; 134 | } 135 | if ((err == -22 || errno == 22) && err_twentytwo == 0) { 136 | fprintf(stderr, " One or more files being deduped are read-only or hard linked.\n"); 137 | fprintf(stderr, " Read-only files can only be deduped by the root user.\n"); 138 | fprintf(stderr, " %s\n", s_err_dedupe_notabug); 139 | fprintf(stderr, " %s\n", s_err_dedupe_repeated); 140 | err_twentytwo = 1; 141 | } 142 | if ((err == -95 || errno == 95) && err_ninetyfive == 0) { 143 | fprintf(stderr, " One or more files is on a filesystem that does not support\n"); 144 | fprintf(stderr, " block-level deduplication or are on different filesystems.\n"); 145 | fprintf(stderr, " %s\n", s_err_dedupe_notabug); 146 | fprintf(stderr, " %s\n", s_err_dedupe_repeated); 147 | err_ninetyfive = 1; 148 | } 149 | } else { 150 | /* Dedupe OK; report to the user and add to file count */ 151 | printf(" ====> %s\n", dupefile->d_name); 152 | total_files++; 153 | } 154 | close((int)fdri->dest_fd); 155 | } 156 | printf("\n"); 157 | close(src_fd); 158 | total_files++; 159 | } 160 | 161 | if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "Deduplication done (%" PRIuMAX " files processed)\n", total_files); 162 | free(fdr); 163 | #endif /* __linux__ */ 164 | 165 | /* On macOS, clonefile() is basically a "hard link" function, so linkfiles will do the work. */ 166 | #ifdef __APPLE__ 167 | linkfiles(files, 2, 0); 168 | #endif /* __APPLE__ */ 169 | return; 170 | } 171 | #endif /* ENABLE_DEDUPE */ 172 | -------------------------------------------------------------------------------- /act_dedupefiles.h: -------------------------------------------------------------------------------- 1 | /* jdupes action for OS-specific block-level or CoW deduplication 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef ACT_DEDUPEFILES_H 5 | #define ACT_DEDUPEFILES_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include "jdupes.h" 12 | void dedupefiles(file_t * restrict files); 13 | 14 | #ifdef __cplusplus 15 | } 16 | #endif 17 | 18 | #endif /* ACT_DEDUPEFILES_H */ 19 | -------------------------------------------------------------------------------- /act_deletefiles.c: -------------------------------------------------------------------------------- 1 | /* Delete duplicate files automatically or interactively 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef NO_DELETE 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include "jdupes.h" 14 | #include "likely_unlikely.h" 15 | #include "act_deletefiles.h" 16 | #include "act_linkfiles.h" 17 | #ifndef NO_HASHDB 18 | #include "hashdb.h" 19 | #endif 20 | 21 | /* For interactive deletion input */ 22 | #define INPUT_SIZE 1024 23 | 24 | 25 | /* Count the following statistics: 26 | - Maximum number of files in a duplicate set (length of longest dupe chain) 27 | - Total number of duplicate file sets (groups) */ 28 | static unsigned int get_max_dupes(const file_t *files, unsigned int * const restrict max) 29 | { 30 | unsigned int groups = 0; 31 | 32 | if (unlikely(files == NULL || max == NULL)) jc_nullptr("get_max_dupes()"); 33 | LOUD(fprintf(stderr, "get_max_dupes(%p, %p)\n", (const void *)files, (void *)max);) 34 | 35 | *max = 0; 36 | 37 | while (files) { 38 | unsigned int n_dupes; 39 | if (ISFLAG(files->flags, FF_HAS_DUPES)) { 40 | groups++; 41 | n_dupes = 1; 42 | for (file_t *curdupe = files->duplicates; curdupe; curdupe = curdupe->duplicates) n_dupes++; 43 | if (n_dupes > *max) *max = n_dupes; 44 | } 45 | files = files->next; 46 | } 47 | return groups; 48 | } 49 | 50 | 51 | void deletefiles(file_t *files, int prompt, FILE *tty) 52 | { 53 | unsigned int counter, groups; 54 | unsigned int curgroup = 0; 55 | file_t *tmpfile; 56 | file_t **dupelist; 57 | unsigned int *preserve; 58 | char *preservestr; 59 | char *token; 60 | char *tstr; 61 | unsigned int number, sum, max, x; 62 | size_t i; 63 | 64 | LOUD(fprintf(stderr, "deletefiles: %p, %d, %p\n", files, prompt, tty)); 65 | 66 | groups = get_max_dupes(files, &max); 67 | 68 | max++; 69 | 70 | dupelist = (file_t **) malloc(sizeof(file_t*) * max); 71 | preserve = (unsigned int *) malloc(sizeof(int) * max); 72 | preservestr = (char *) malloc(INPUT_SIZE); 73 | 74 | if (!dupelist || !preserve || !preservestr) jc_oom("deletefiles() structures"); 75 | 76 | for (; files; files = files->next) { 77 | if (ISFLAG(files->flags, FF_HAS_DUPES)) { 78 | curgroup++; 79 | counter = 1; 80 | dupelist[counter] = files; 81 | 82 | if (prompt) { 83 | printf("[%u] ", counter); jc_fwprint(stdout, files->d_name, 1); 84 | } 85 | 86 | tmpfile = files->duplicates; 87 | 88 | while (tmpfile) { 89 | dupelist[++counter] = tmpfile; 90 | if (prompt) { 91 | printf("[%u] ", counter); jc_fwprint(stdout, tmpfile->d_name, 1); 92 | } 93 | tmpfile = tmpfile->duplicates; 94 | } 95 | 96 | if (prompt) printf("\n"); 97 | 98 | /* Preserve only the first file */ 99 | if (!prompt) { 100 | preserve[1] = 1; 101 | for (x = 2; x <= counter; x++) preserve[x] = 0; 102 | } else do { 103 | /* Prompt for files to preserve */ 104 | printf("Specify multiple files with commas like this: 1,2,4,6\n"); 105 | printf("Set %u of %u: keep which files? (1 - %u, [a]ll, [n]one", curgroup, groups, counter); 106 | #ifndef NO_HARDLINKS 107 | printf(", [l]ink all"); 108 | #endif 109 | #ifndef NO_SYMLINKS 110 | printf(", [s]ymlink all"); 111 | #endif 112 | printf(")"); 113 | if (ISFLAG(a_flags, FA_SHOWSIZE)) printf(" (%" PRIuMAX " byte%c each)", (uintmax_t)files->size, 114 | (files->size != 1) ? 's' : ' '); 115 | printf(": "); 116 | fflush(stdout); 117 | 118 | /* Treat fgets() failure as if nothing was entered */ 119 | if (!fgets(preservestr, INPUT_SIZE, tty)) preservestr[0] = '\n'; 120 | 121 | /* If nothing is entered, treat it as if 'a' was entered */ 122 | if (preservestr[0] == '\n') strcpy(preservestr, "a\n"); 123 | 124 | i = strlen(preservestr) - 1; 125 | 126 | /* tail of buffer must be a newline */ 127 | while (preservestr[i] != '\n') { 128 | tstr = (char *)realloc(preservestr, strlen(preservestr) + 1 + INPUT_SIZE); 129 | if (!tstr) jc_oom("deletefiles() prompt"); 130 | 131 | preservestr = tstr; 132 | if (!fgets(preservestr + i + 1, INPUT_SIZE, tty)) 133 | { 134 | preservestr[0] = '\n'; /* treat fgets() failure as if nothing was entered */ 135 | break; 136 | } 137 | i = strlen(preservestr) - 1; 138 | } 139 | 140 | for (x = 1; x <= counter; x++) preserve[x] = 0; 141 | 142 | /* Catch attempts to use invalid characters and block them */ 143 | for (char *pscheck = preservestr; *pscheck != '\0'; pscheck++) { 144 | switch (*pscheck) { 145 | case ',': 146 | case ' ': 147 | case 'a': 148 | case 'A': 149 | case 's': 150 | case 'S': 151 | case 'l': 152 | case 'L': 153 | case 'n': 154 | case 'N': 155 | case '\n': 156 | case '\0': 157 | continue; 158 | default: 159 | break; 160 | } 161 | if (*pscheck >= '0' && *pscheck <= '9') continue; 162 | if (*pscheck == '-') { 163 | fprintf(stderr, "error: number ranges are not yet supported; taking no action\n"); 164 | goto skip_deletion; 165 | } 166 | fprintf(stderr, "error: invalid character '%c' in preserve answer; taking no action\n", *pscheck); 167 | goto skip_deletion; 168 | } 169 | token = strtok(preservestr, " ,\n"); 170 | if (token != NULL) { 171 | #if defined NO_HARDLINKS && defined NO_SYMLINKS 172 | /* no linktype needed */ 173 | #else 174 | int linktype = -1; 175 | #endif /* defined NO_HARDLINKS && defined NO_SYMLINKS */ 176 | /* "Delete none" = stop parsing string */ 177 | if (*token == 'n' || *token == 'N') goto stop_scanning; 178 | /* If requested, link this set instead */ 179 | #ifndef NO_HARDLINKS 180 | if (*token == 'l' || *token == 'L') linktype = 1; /* hard link */ 181 | #endif 182 | #ifndef NO_SYMLINKS 183 | if (*token == 's' || *token == 'S') linktype = 0; /* symlink */ 184 | #endif 185 | #if defined NO_HARDLINKS && defined NO_SYMLINKS 186 | /* no linking calls */ 187 | #else 188 | if (linktype != -1) { 189 | linkfiles(files, linktype, 1); 190 | goto skip_deletion; 191 | } 192 | #endif /* defined NO_HARDLINKS && defined NO_SYMLINKS */ 193 | } 194 | 195 | while (token != NULL) { 196 | if (*token == 'a' || *token == 'A') 197 | for (x = 0; x <= counter; x++) preserve[x] = 1; 198 | 199 | number = 0; 200 | sscanf(token, "%u", &number); 201 | if (number > 0 && number <= counter) preserve[number] = 1; 202 | else { 203 | fprintf(stderr, "invalid number '%u' in preserve answer; taking no action\n", number); 204 | goto skip_deletion; 205 | } 206 | 207 | token = strtok(NULL, " ,\n"); 208 | } 209 | 210 | for (sum = 0, x = 1; x <= counter; x++) sum += preserve[x]; 211 | } while (sum < 1); /* save at least one file */ 212 | stop_scanning: 213 | 214 | printf("\n"); 215 | 216 | for (x = 1; x <= counter; x++) { 217 | if (preserve[x]) { 218 | printf(" [+] "); jc_fwprint(stdout, dupelist[x]->d_name, 1); 219 | } else { 220 | if (file_has_changed(dupelist[x])) { 221 | printf(" [!] "); jc_fwprint(stdout, dupelist[x]->d_name, 0); 222 | printf("-- file changed since being scanned\n"); 223 | exit_status = EXIT_FAILURE; 224 | } else if (jc_remove(dupelist[x]->d_name) == 0) { 225 | printf(" [-] "); jc_fwprint(stdout, dupelist[x]->d_name, 1); 226 | #ifndef NO_HASHDB 227 | if (ISFLAG(flags, F_HASHDB)) { 228 | dupelist[x]->mtime = 0; 229 | add_hashdb_entry(NULL, 0, dupelist[x]); 230 | } 231 | #endif 232 | } else { 233 | printf(" [!] "); jc_fwprint(stdout, dupelist[x]->d_name, 0); 234 | printf("-- unable to delete file\n"); 235 | exit_status = EXIT_FAILURE; 236 | } 237 | } 238 | } 239 | #if defined NO_HARDLINKS && defined NO_SYMLINKS 240 | /* label not needed */ 241 | #else 242 | skip_deletion: 243 | #endif /* defined NO_HARDLINKS && defined NO_SYMLINKS */ 244 | printf("\n"); 245 | } 246 | } 247 | free(dupelist); 248 | free(preserve); 249 | free(preservestr); 250 | return; 251 | } 252 | 253 | #endif /* NO_DELETE */ 254 | -------------------------------------------------------------------------------- /act_deletefiles.h: -------------------------------------------------------------------------------- 1 | /* jdupes action for deleting duplicate files 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef NO_DELETE 5 | 6 | #ifndef ACT_DELETEFILES_H 7 | #define ACT_DELETEFILES_H 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | #include "jdupes.h" 14 | extern void deletefiles(file_t *files, int prompt, FILE *tty); 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | 20 | #endif /* ACT_DELETEFILES_H */ 21 | 22 | #endif /* NO_DELETE */ 23 | -------------------------------------------------------------------------------- /act_linkfiles.h: -------------------------------------------------------------------------------- 1 | /* jdupes action for hard and soft file linking 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #if !(defined NO_HARDLINKS && defined NO_SYMLINKS) 5 | 6 | #ifndef ACT_LINKFILES_H 7 | #define ACT_LINKFILES_H 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | #include "jdupes.h" 14 | void linkfiles(file_t *files, const int linktype, const int only_current); 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | 20 | #endif /* ACT_LINKFILES_H */ 21 | 22 | #endif /* NO_*LINKS */ 23 | -------------------------------------------------------------------------------- /act_printjson.c: -------------------------------------------------------------------------------- 1 | /* Print comprehensive information to stdout in JSON format 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef NO_JSON 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include "likely_unlikely.h" 15 | #include "jdupes.h" 16 | #include "version.h" 17 | #include "act_printjson.h" 18 | 19 | #define IS_CONT(a) ((a & 0xc0) == 0x80) 20 | #define GET_CONT(a) (a & 0x3f) 21 | #define TO_HEX(a) (char)(((a) & 0x0f) <= 0x09 ? ((a) & 0x0f) + 0x30 : ((a) & 0x0f) + 0x57) 22 | 23 | /** Decodes a single UTF-8 codepoint, consuming bytes. */ 24 | static inline uint32_t decode_utf8(const char * restrict * const string) { 25 | uint32_t ret = 0; 26 | /** Eat problems up silently. */ 27 | assert(!IS_CONT(**string)); 28 | while (unlikely(IS_CONT(**string))) 29 | (*string)++; 30 | 31 | /** ASCII. */ 32 | if (likely(!(**string & 0x80))) 33 | return (uint32_t)*(*string)++; 34 | 35 | /** Multibyte 2, 3, 4. */ 36 | if ((**string & 0xe0) == 0xc0) { 37 | ret = *(*string)++ & 0x1f; 38 | ret = (ret << 6) | GET_CONT(*(*string)++); 39 | return ret; 40 | } 41 | 42 | if ((**string & 0xf0) == 0xe0) { 43 | ret = *(*string)++ & 0x0f; 44 | ret = (ret << 6) | GET_CONT(*(*string)++); 45 | ret = (ret << 6) | GET_CONT(*(*string)++); 46 | return ret; 47 | } 48 | 49 | if ((**string & 0xf8) == 0xf0) { 50 | ret = *(*string)++ & 0x07; 51 | ret = (ret << 6) | GET_CONT(*(*string)++); 52 | ret = (ret << 6) | GET_CONT(*(*string)++); 53 | ret = (ret << 6) | GET_CONT(*(*string)++); 54 | return ret; 55 | } 56 | 57 | /** We shouldn't be here... Because 5 and 6 bytes are impossible... */ 58 | assert(0); 59 | return 0xffffffff; 60 | } 61 | 62 | /** Escapes a single UTF-16 code unit for JSON. */ 63 | static inline void escape_uni16(uint16_t u16, char ** const json) { 64 | *(*json)++ = '\\'; 65 | *(*json)++ = 'u'; 66 | *(*json)++ = TO_HEX(u16 >> 12); 67 | *(*json)++ = TO_HEX(u16 >> 8); 68 | *(*json)++ = TO_HEX(u16 >> 4); 69 | *(*json)++ = TO_HEX(u16); 70 | } 71 | 72 | /** Escapes a UTF-8 string to ASCII JSON format. */ 73 | static void json_escape(const char * restrict string, char * restrict const target) 74 | { 75 | uint32_t curr = 0; 76 | char *escaped = target; 77 | while (*string != '\0' && (escaped - target) < (PATHBUF_SIZE * 2 - 1)) { 78 | switch (*string) { 79 | case '\"': 80 | case '\\': 81 | *escaped++ = '\\'; 82 | *escaped++ = *string++; 83 | break; 84 | default: 85 | curr = decode_utf8(&string); 86 | if (curr == 0xffffffff) break; 87 | if (likely(curr < 0xffff)) { 88 | if (likely(curr < 0x20 || curr > 0x7f)) 89 | escape_uni16((uint16_t)curr, &escaped); 90 | else 91 | *escaped++ = (char)curr; 92 | } else { 93 | curr -= 0x10000; 94 | escape_uni16((uint16_t)(0xD800 + ((curr >> 10) & 0x03ff)), &escaped); 95 | escape_uni16((uint16_t)(0xDC00 + (curr & 0x03ff)), &escaped); 96 | } 97 | break; 98 | } 99 | } 100 | *escaped = '\0'; 101 | return; 102 | } 103 | 104 | void printjson(file_t * restrict files, const int argc, char **argv) 105 | { 106 | file_t * restrict tmpfile; 107 | int arg = 0, comma = 0, len = 0; 108 | char *temp = (char *)malloc(PATHBUF_SIZE * 2); 109 | char *temp2 = (char *)malloc(PATHBUF_SIZE * 2); 110 | char *temp_insert = temp; 111 | 112 | LOUD(fprintf(stderr, "printjson: %p\n", files)); 113 | 114 | /* Output information about the jdupes command environment */ 115 | printf("{\n \"jdupesVersion\": \"%s\",\n \"jdupesVersionDate\": \"%s\",\n", VER, VERDATE); 116 | 117 | printf(" \"commandLine\": \""); 118 | while (arg < argc) { 119 | len = sprintf(temp_insert, " %s", argv[arg]); 120 | assert(len >= 0); 121 | temp_insert += len; 122 | arg++; 123 | } 124 | json_escape(temp + 1, temp2); /* Skip the starting space */ 125 | printf("%s\",\n", temp2); 126 | printf(" \"extensionFlags\": \""); 127 | #ifndef NO_HELPTEXT 128 | if (feature_flags[0] == NULL) printf("none\",\n"); 129 | else for (int c = 0; feature_flags[c] != NULL; c++) 130 | printf("%s%s", feature_flags[c], feature_flags[c+1] == NULL ? "\",\n" : " "); 131 | #else 132 | printf("unavailable\",\n"); 133 | #endif 134 | 135 | printf(" \"matchSets\": [\n"); 136 | while (files != NULL) { 137 | if (ISFLAG(files->flags, FF_HAS_DUPES)) { 138 | if (comma) printf(",\n"); 139 | printf(" {\n \"fileSize\": %" PRIdMAX ",\n \"fileList\": [\n { \"filePath\": \"", (intmax_t)files->size); 140 | sprintf(temp, "%s", files->d_name); 141 | json_escape(temp, temp2); 142 | jc_fwprint(stdout, temp2, 0); 143 | printf("\""); 144 | tmpfile = files->duplicates; 145 | while (tmpfile != NULL) { 146 | printf(" },\n { \"filePath\": \""); 147 | sprintf(temp, "%s", tmpfile->d_name); 148 | json_escape(temp, temp2); 149 | jc_fwprint(stdout, temp2, 0); 150 | printf("\""); 151 | tmpfile = tmpfile->duplicates; 152 | } 153 | printf(" }\n ]\n }"); 154 | comma = 1; 155 | } 156 | files = files->next; 157 | } 158 | 159 | printf("\n ]\n}\n"); 160 | 161 | free(temp); free(temp2); 162 | return; 163 | } 164 | 165 | #endif /* NO_JSON */ 166 | -------------------------------------------------------------------------------- /act_printjson.h: -------------------------------------------------------------------------------- 1 | /* jdupes action for printing comprehensive data as JSON to stdout 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef NO_JSON 5 | 6 | #ifndef ACT_PRINTJSON_H 7 | #define ACT_PRINTJSON_H 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | #include "jdupes.h" 14 | void printjson(file_t * restrict files, const int argc, char ** const restrict argv); 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | 20 | #endif /* ACT_PRINTJSON_H */ 21 | 22 | #endif /* NO_JSON */ 23 | -------------------------------------------------------------------------------- /act_printmatches.c: -------------------------------------------------------------------------------- 1 | /* Print matched file sets 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #include 5 | #include 6 | #include 7 | #include "jdupes.h" 8 | #include 9 | #include "act_printmatches.h" 10 | 11 | void printmatches(file_t * restrict files) 12 | { 13 | file_t * restrict tmpfile; 14 | int printed = 0; 15 | int cr = 1; 16 | 17 | LOUD(fprintf(stderr, "printmatches: %p\n", files)); 18 | 19 | if (ISFLAG(a_flags, FA_PRINTNULL)) cr = 2; 20 | 21 | while (files != NULL) { 22 | if (ISFLAG(files->flags, FF_HAS_DUPES)) { 23 | printed = 1; 24 | if (!ISFLAG(a_flags, FA_OMITFIRST)) { 25 | if (ISFLAG(a_flags, FA_SHOWSIZE)) printf("%" PRIdMAX " byte%c each:\n", (intmax_t)files->size, 26 | (files->size != 1) ? 's' : ' '); 27 | jc_fwprint(stdout, files->d_name, cr); 28 | } 29 | tmpfile = files->duplicates; 30 | while (tmpfile != NULL) { 31 | jc_fwprint(stdout, tmpfile->d_name, cr); 32 | tmpfile = tmpfile->duplicates; 33 | } 34 | if (files->next != NULL) jc_fwprint(stdout, "", cr); 35 | 36 | } 37 | 38 | files = files->next; 39 | } 40 | 41 | if (printed == 0) printf("%s", s_no_dupes); 42 | 43 | return; 44 | } 45 | 46 | 47 | /* Print files that have no duplicates (unique files) */ 48 | void printunique(file_t *files) 49 | { 50 | file_t *chain, *scan; 51 | int printed = 0; 52 | int cr = 1; 53 | 54 | LOUD(fprintf(stderr, "print_uniques: %p\n", files)); 55 | 56 | if (ISFLAG(a_flags, FA_PRINTNULL)) cr = 2; 57 | 58 | scan = files; 59 | while (scan != NULL) { 60 | if (ISFLAG(scan->flags, FF_HAS_DUPES)) { 61 | chain = scan; 62 | while (chain != NULL) { 63 | SETFLAG(chain->flags, FF_NOT_UNIQUE); 64 | chain = chain->duplicates; 65 | } 66 | } 67 | scan = scan->next; 68 | } 69 | 70 | while (files != NULL) { 71 | if (!ISFLAG(files->flags, FF_NOT_UNIQUE)) { 72 | printed = 1; 73 | if (ISFLAG(a_flags, FA_SHOWSIZE)) printf("%" PRIdMAX " byte%c each:\n", (intmax_t)files->size, 74 | (files->size != 1) ? 's' : ' '); 75 | jc_fwprint(stdout, files->d_name, cr); 76 | } 77 | files = files->next; 78 | } 79 | 80 | if (printed == 0) jc_fwprint(stderr, "No unique files found.", 1); 81 | 82 | return; 83 | } 84 | -------------------------------------------------------------------------------- /act_printmatches.h: -------------------------------------------------------------------------------- 1 | /* jdupes action for printing matched file sets to stdout 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef ACT_PRINTMATCHES_H 5 | #define ACT_PRINTMATCHES_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include "jdupes.h" 12 | void printmatches(file_t * restrict files); 13 | void printunique(file_t *files); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | 19 | #endif /* ACT_PRINTMATCHES_H */ 20 | -------------------------------------------------------------------------------- /act_summarize.c: -------------------------------------------------------------------------------- 1 | /* Print summary of match statistics to stdout 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #include 5 | #include 6 | #include 7 | #include "jdupes.h" 8 | #include "act_summarize.h" 9 | 10 | void summarizematches(const file_t * restrict files) 11 | { 12 | unsigned int numsets = 0; 13 | off_t numbytes = 0; 14 | int numfiles = 0; 15 | 16 | LOUD(fprintf(stderr, "summarizematches: %p\n", files)); 17 | 18 | while (files != NULL) { 19 | file_t *tmpfile; 20 | 21 | if (ISFLAG(files->flags, FF_HAS_DUPES)) { 22 | numsets++; 23 | tmpfile = files->duplicates; 24 | while (tmpfile != NULL) { 25 | numfiles++; 26 | numbytes += files->size; 27 | tmpfile = tmpfile->duplicates; 28 | } 29 | } 30 | files = files->next; 31 | } 32 | 33 | if (numsets == 0) 34 | printf("%s", s_no_dupes); 35 | else 36 | { 37 | printf("%d duplicate files (in %d sets), occupying ", numfiles, numsets); 38 | if (numbytes < 1000) printf("%" PRIdMAX " byte%c\n", (intmax_t)numbytes, (numbytes != 1) ? 's' : ' '); 39 | else if (numbytes <= 1000000) printf("%" PRIdMAX " KB\n", (intmax_t)(numbytes / 1000)); 40 | else printf("%" PRIdMAX " MB\n", (intmax_t)(numbytes / 1000000)); 41 | } 42 | return; 43 | } 44 | -------------------------------------------------------------------------------- /act_summarize.h: -------------------------------------------------------------------------------- 1 | /* jdupes action for printing a summary of match stats to stdout 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef ACT_SUMMARIZE_H 5 | #define ACT_SUMMARIZE_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include "jdupes.h" 12 | extern void summarizematches(const file_t * restrict files); 13 | 14 | #ifdef __cplusplus 15 | } 16 | #endif 17 | 18 | #endif /* ACT_SUMMARIZE_H */ 19 | -------------------------------------------------------------------------------- /args.c: -------------------------------------------------------------------------------- 1 | /* Argument functions 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include "jdupes.h" 10 | 11 | char **cloneargs(const int argc, char **argv) 12 | { 13 | static int x; 14 | static char **args; 15 | 16 | args = (char **)malloc(sizeof(char *) * (unsigned int)argc); 17 | if (args == NULL) jc_oom("cloneargs() start"); 18 | 19 | for (x = 0; x < argc; x++) { 20 | args[x] = (char *)malloc(strlen(argv[x]) + 1); 21 | if (args[x] == NULL) jc_oom("cloneargs() loop"); 22 | strcpy(args[x], argv[x]); 23 | } 24 | 25 | return args; 26 | } 27 | 28 | 29 | int findarg(const char * const arg, const int start, const int argc, char **argv) 30 | { 31 | int x; 32 | 33 | for (x = start; x < argc; x++) 34 | if (jc_streq(argv[x], arg) == 0) 35 | return x; 36 | 37 | return x; 38 | } 39 | 40 | /* Find the first non-option argument after specified option. */ 41 | int nonoptafter(const char *option, const int argc, char **oldargv, char **newargv) 42 | { 43 | int x; 44 | int targetind; 45 | int testind; 46 | int startat = 1; 47 | 48 | targetind = findarg(option, 1, argc, oldargv); 49 | 50 | for (x = optind; x < argc; x++) { 51 | testind = findarg(newargv[x], startat, argc, oldargv); 52 | if (testind > targetind) return x; 53 | else startat = testind; 54 | } 55 | 56 | return x; 57 | } 58 | -------------------------------------------------------------------------------- /args.h: -------------------------------------------------------------------------------- 1 | /* jdupes argument functions 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef JDUPES_ARGS_H 5 | #define JDUPES_ARGS_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | char **cloneargs(const int argc, char **argv); 12 | int findarg(const char * const arg, const int start, const int argc, char **argv); 13 | int nonoptafter(const char *option, const int argc, char **oldargv, char **newargv); 14 | void linkfiles(file_t *files, const int linktype, const int only_current); 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | 20 | #endif /* JDUPES_ARGS_H */ 21 | -------------------------------------------------------------------------------- /build_msvc.bat: -------------------------------------------------------------------------------- 1 | cl /DON_WINDOWS /DUNICODE /O2 /W4 /std:c17 /c *.c 2 | link /lib *.obj /out:libjodycode.lib 3 | link /dll *.obj /out:libjodycode.dll -------------------------------------------------------------------------------- /checks.c: -------------------------------------------------------------------------------- 1 | /* jdupes file check functions 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include "likely_unlikely.h" 11 | #ifndef NO_EXTFILTER 12 | #include "extfilter.h" 13 | #endif 14 | #include "filestat.h" 15 | #include "jdupes.h" 16 | 17 | 18 | /***** End definitions, begin code *****/ 19 | 20 | /***** Add new functions here *****/ 21 | 22 | 23 | /* Check a pair of files for match exclusion conditions 24 | * Returns: 25 | * 0 if all condition checks pass 26 | * -1 or 1 on compare result less/more 27 | * -2 on an absolute exclusion condition met 28 | * 2 on an absolute match condition met 29 | * -3 on exclusion due to isolation 30 | * -4 on exclusion due to same filesystem 31 | * -5 on exclusion due to permissions */ 32 | int check_conditions(const file_t * const restrict file1, const file_t * const restrict file2) 33 | { 34 | if (unlikely(file1 == NULL || file2 == NULL || file1->d_name == NULL || file2->d_name == NULL)) jc_nullptr("check_conditions()"); 35 | 36 | LOUD(fprintf(stderr, "check_conditions('%s', '%s')\n", file1->d_name, file2->d_name);) 37 | 38 | /* Exclude files that are not the same size */ 39 | if (file1->size > file2->size) { 40 | LOUD(fprintf(stderr, "check_conditions: no match: size of file1 > file2 (%" PRIdMAX " > %" PRIdMAX ")\n", 41 | (intmax_t)file1->size, (intmax_t)file2->size)); 42 | return -1; 43 | } 44 | if (file1->size < file2->size) { 45 | LOUD(fprintf(stderr, "check_conditions: no match: size of file1 < file2 (%" PRIdMAX " < %"PRIdMAX ")\n", 46 | (intmax_t)file1->size, (intmax_t)file2->size)); 47 | return 1; 48 | } 49 | 50 | #ifndef NO_USER_ORDER 51 | /* Exclude based on -I/--isolate */ 52 | if (ISFLAG(flags, F_ISOLATE) && (file1->user_order == file2->user_order)) { 53 | LOUD(fprintf(stderr, "check_conditions: files ignored: parameter isolation\n")); 54 | return -3; 55 | } 56 | #endif /* NO_USER_ORDER */ 57 | 58 | /* Exclude based on -1/--one-file-system */ 59 | if (ISFLAG(flags, F_ONEFS) && (file1->device != file2->device)) { 60 | LOUD(fprintf(stderr, "check_conditions: files ignored: not on same filesystem\n")); 61 | return -4; 62 | } 63 | 64 | /* Exclude files by permissions if requested */ 65 | if (ISFLAG(flags, F_PERMISSIONS) && 66 | (file1->mode != file2->mode 67 | #ifndef NO_PERMS 68 | || file1->uid != file2->uid 69 | || file1->gid != file2->gid 70 | #endif 71 | )) { 72 | return -5; 73 | LOUD(fprintf(stderr, "check_conditions: no match: permissions/ownership differ (-p on)\n")); 74 | } 75 | 76 | /* Hard link and symlink + '-s' check */ 77 | #ifndef NO_HARDLINKS 78 | if ((file1->inode == file2->inode) && (file1->device == file2->device)) { 79 | if (ISFLAG(flags, F_CONSIDERHARDLINKS)) { 80 | LOUD(fprintf(stderr, "check_conditions: files match: hard/soft linked (-H on)\n")); 81 | return 2; 82 | } else { 83 | LOUD(fprintf(stderr, "check_conditions: files ignored: hard/soft linked (-H off)\n")); 84 | return -2; 85 | } 86 | } 87 | #endif 88 | 89 | /* Fall through: all checks passed */ 90 | LOUD(fprintf(stderr, "check_conditions: all condition checks passed\n")); 91 | return 0; 92 | } 93 | 94 | 95 | /* Check for exclusion conditions for a single file (1 = fail) */ 96 | int check_singlefile(file_t * const restrict newfile) 97 | { 98 | char * restrict tp = tempname; 99 | 100 | if (unlikely(newfile == NULL)) jc_nullptr("check_singlefile()"); 101 | 102 | LOUD(fprintf(stderr, "check_singlefile: checking '%s'\n", newfile->d_name)); 103 | 104 | /* Exclude hidden files if requested */ 105 | if (likely(ISFLAG(flags, F_EXCLUDEHIDDEN))) { 106 | if (unlikely(newfile->d_name == NULL)) jc_nullptr("check_singlefile newfile->d_name"); 107 | strcpy(tp, newfile->d_name); 108 | tp = basename(tp); 109 | if (tp[0] == '.' && jc_streq(tp, ".") && jc_streq(tp, "..")) { 110 | LOUD(fprintf(stderr, "check_singlefile: excluding hidden file (-A on)\n")); 111 | return 1; 112 | } 113 | } 114 | 115 | /* Get file information and check for validity */ 116 | const int i = getfilestats(newfile); 117 | 118 | if (i || newfile->size == -1) { 119 | LOUD(fprintf(stderr, "check_singlefile: excluding due to bad stat()\n")); 120 | return 1; 121 | } 122 | 123 | if (!JC_S_ISREG(newfile->mode) && !JC_S_ISDIR(newfile->mode)) { 124 | LOUD(fprintf(stderr, "check_singlefile: excluding non-regular file\n")); 125 | return 1; 126 | } 127 | 128 | if (!JC_S_ISDIR(newfile->mode)) { 129 | /* Exclude zero-length files if requested */ 130 | if (newfile->size == 0 && !ISFLAG(flags, F_INCLUDEEMPTY)) { 131 | LOUD(fprintf(stderr, "check_singlefile: excluding zero-length empty file (-z not set)\n")); 132 | return 1; 133 | } 134 | 135 | #ifndef NO_EXTFILTER 136 | if (extfilter_exclude(newfile)) { 137 | LOUD(fprintf(stderr, "check_singlefile: excluding based on an extfilter option\n")); 138 | return 1; 139 | } 140 | #endif /* NO_EXTFILTER */ 141 | } 142 | 143 | #ifdef ON_WINDOWS 144 | /* Windows has a 1023 (+1) hard link limit. If we're hard linking, 145 | * ignore all files that have hit this limit */ 146 | #ifndef NO_HARDLINKS 147 | if (ISFLAG(a_flags, FA_HARDLINKFILES) && newfile->nlink >= 1024) { 148 | #ifdef DEBUG 149 | hll_exclude++; 150 | #endif 151 | LOUD(fprintf(stderr, "check_singlefile: excluding due to Windows 1024 hard link limit\n")); 152 | return 1; 153 | } 154 | #endif /* NO_HARDLINKS */ 155 | #endif /* ON_WINDOWS */ 156 | LOUD(fprintf(stderr, "check_singlefile: all checks passed\n")); 157 | return 0; 158 | } 159 | -------------------------------------------------------------------------------- /checks.h: -------------------------------------------------------------------------------- 1 | /* jdupes file check functions 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef JDUPES_CHECKS_H 5 | #define JDUPES_CHECKS_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | int check_conditions(const file_t * const restrict file1, const file_t * const restrict file2); 12 | int check_singlefile(file_t * const restrict newfile); 13 | 14 | #ifdef __cplusplus 15 | } 16 | #endif 17 | 18 | #endif /* JDUPES_CHECKS_H */ 19 | -------------------------------------------------------------------------------- /chroot_build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Jody's generic chroot build script 4 | # Version 1.0 5 | 6 | ARCHES="i386 x86-64 uclibc-i386 uclibc-x86-64" 7 | test -z "$NAME" && NAME="$(basename "$(pwd)")" 8 | test -e "version.h" && VER="$(grep '#define VER ' version.h | tr -d \\\" | cut -d' ' -f3)" 9 | test -z "$VER" && VER=0 10 | export NAME 11 | export VER 12 | export CHROOT_BASE=/chroots 13 | export WD="$(pwd)" 14 | export PKG="pkg" 15 | 16 | echo "chroot builder: building '$NAME' version '$VER'" 17 | 18 | trap clean_exit INT QUIT ABRT HUP 19 | 20 | clean_exit () { 21 | umount $CHROOT/proc $CHROOT/sys $CHROOT/tmp $CHROOT/dev $CHROOT/usr/src $CHROOT/home 22 | } 23 | 24 | do_build () { 25 | test -z "$WD" && echo "WD not set, aborting" && exit 1 26 | test -z "$PKG" && echo "PKG not set, aborting" && exit 1 27 | if [ -e ./generate_packages.sh ] 28 | then ./generate_packages.sh 29 | else 30 | make clean 31 | PN="${NAME}_$VER-$ARCH.pkg.tar.xz" 32 | if ! make -j$JOBS all 33 | then echo "Build failed"; exit 1 34 | else 35 | echo "WD/PKG: $WD/$PKG" 36 | test -d $WD/$PKG && rm -rf $WD/$PKG 37 | mkdir $WD/$PKG 38 | make DESTDIR=$WD/$PKG install && \ 39 | tar -C pkg -c usr | xz -e > "$PN" 40 | # Set ownership to current directory ownership 41 | chown "$(stat -c '%u:%g' .)" "$PN" 42 | echo "Built $PN" 43 | make clean 44 | fi 45 | fi 46 | } 47 | 48 | if [ "$(id -u)" != "0" ] 49 | then echo "You must be root to auto-build chroot packages." 50 | exit 1 51 | fi 52 | 53 | if [ "$DO_CHROOT_BUILD" = "1" ] 54 | then 55 | test -z "$1" && echo "No arch specified" && exit 1 56 | test ! -d "$1" && echo "Not a directory: $1" && exit 1 57 | cd $1 58 | export WD="$1" 59 | do_build 60 | echo "finished: $1" 61 | exit 62 | 63 | else 64 | echo baz 65 | export DO_CHROOT_BUILD=1 66 | for ARCH in $ARCHES 67 | do 68 | export ARCH 69 | export CHROOT="$CHROOT_BASE/$ARCH" 70 | test ! -d $CHROOT && echo "$CHROOT not present, not building $ARCH package." && continue 71 | echo "Performing package build for $CHROOT" 72 | test ! -x $CHROOT/bin/sh && echo "$CHROOT does not seem to be a chroot; aborting." && exit 1 73 | mount --bind /dev $CHROOT/dev || clean_exit 74 | mount --bind /usr/src $CHROOT/usr/src || clean_exit 75 | mount --bind /home $CHROOT/home || clean_exit 76 | mount -t proc proc $CHROOT/proc || clean_exit 77 | mount -t sysfs sysfs $CHROOT/sys || clean_exit 78 | mount -t tmpfs tmpfs $CHROOT/tmp || clean_exit 79 | if echo "$ARCH" | grep -q "i386" 80 | then linux32 chroot $CHROOT $WD/$0 $WD 81 | else chroot $CHROOT $WD/$0 $WD 82 | fi 83 | umount $CHROOT/proc $CHROOT/sys $CHROOT/tmp $CHROOT/dev $CHROOT/usr/src $CHROOT/home 84 | test -d $WD/$PKG && rm -rf $WD/$PKG 85 | done 86 | fi 87 | -------------------------------------------------------------------------------- /compare_jdupes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Runs the installed *dupes* binary and the built binary and compares 4 | # the output for sameness. Also displays timing statistics. 5 | 6 | ERR=0 7 | 8 | # Detect installed jdupes 9 | if [ -z "$ORIG_JDUPES" ] 10 | then 11 | jdupes -v 2>/dev/null >/dev/null && ORIG_JDUPES=jdupes 12 | test ! -z "$WINDIR" && "$WINDIR/jdupes.exe" -v 2>/dev/null >/dev/null && ORIG_JDUPES="$WINDIR/jdupes.exe" 13 | [ -z "$ORIG_JDUPES" ] && echo "error: can't find old jdupes; set ORIG_JDUPES manually" >&2 && exit 1 14 | fi 15 | 16 | if [ ! $ORIG_JDUPES -v 2>/dev/null >/dev/null ] 17 | then echo "Can't run installed jdupes" 18 | echo "To manually specify an original jdupes, use: ORIG_JDUPES=path/to/jdupes $0" 19 | exit 1 20 | fi 21 | 22 | test ! -e ./jdupes && echo "Build jdupes first, silly" && exit 1 23 | 24 | echo -n "Installed $ORIG_JDUPES:" 25 | sync 26 | time $ORIG_JDUPES -q "$@" > installed_output.txt || ERR=1 27 | echo -en "\nBuilt jdupes:" 28 | sync 29 | time ./jdupes -q "$@" > built_output.txt || ERR=1 30 | diff -Nau installed_output.txt built_output.txt 31 | 32 | rm -f installed_output.txt built_output.txt 33 | test "$ERR" != "0" && echo "Errors were returned during execution" 34 | -------------------------------------------------------------------------------- /dumpflags.c: -------------------------------------------------------------------------------- 1 | /* Debug flag dumping 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #include 5 | #include "jdupes.h" 6 | 7 | #ifdef DEBUG 8 | void dump_all_flags(void) 9 | { 10 | fprintf(stderr, "\nSet flag dump:"); 11 | /* Behavior modification flags */ 12 | if (ISFLAG(flags, F_RECURSE)) fprintf(stderr, " F_RECURSE"); 13 | if (ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, " F_HIDEPROGRESS"); 14 | if (ISFLAG(flags, F_SOFTABORT)) fprintf(stderr, " F_SOFTABORT"); 15 | if (ISFLAG(flags, F_FOLLOWLINKS)) fprintf(stderr, " F_FOLLOWLINKS"); 16 | if (ISFLAG(flags, F_INCLUDEEMPTY)) fprintf(stderr, " F_INCLUDEEMPTY"); 17 | if (ISFLAG(flags, F_CONSIDERHARDLINKS)) fprintf(stderr, " F_CONSIDERHARDLINKS"); 18 | if (ISFLAG(flags, F_RECURSEAFTER)) fprintf(stderr, " F_RECURSEAFTER"); 19 | if (ISFLAG(flags, F_NOPROMPT)) fprintf(stderr, " F_NOPROMPT"); 20 | if (ISFLAG(flags, F_EXCLUDEHIDDEN)) fprintf(stderr, " F_EXCLUDEHIDDEN"); 21 | if (ISFLAG(flags, F_PERMISSIONS)) fprintf(stderr, " F_PERMISSIONS"); 22 | if (ISFLAG(flags, F_EXCLUDESIZE)) fprintf(stderr, " F_EXCLUDESIZE"); 23 | if (ISFLAG(flags, F_QUICKCOMPARE)) fprintf(stderr, " F_QUICKCOMPARE"); 24 | if (ISFLAG(flags, F_USEPARAMORDER)) fprintf(stderr, " F_USEPARAMORDER"); 25 | if (ISFLAG(flags, F_REVERSESORT)) fprintf(stderr, " F_REVERSESORT"); 26 | if (ISFLAG(flags, F_ISOLATE)) fprintf(stderr, " F_ISOLATE"); 27 | if (ISFLAG(flags, F_ONEFS)) fprintf(stderr, " F_ONEFS"); 28 | if (ISFLAG(flags, F_PARTIALONLY)) fprintf(stderr, " F_PARTIALONLY"); 29 | if (ISFLAG(flags, F_NOCHANGECHECK)) fprintf(stderr, " F_NOCHANGECHECK"); 30 | if (ISFLAG(flags, F_NOTRAVCHECK)) fprintf(stderr, " F_NOTRAVCHECK"); 31 | if (ISFLAG(flags, F_SKIPHASH)) fprintf(stderr, " F_SKIPHASH"); 32 | if (ISFLAG(flags, F_BENCHMARKSTOP)) fprintf(stderr, " F_BENCHMARKSTOP"); 33 | if (ISFLAG(flags, F_HASHDB)) fprintf(stderr, " F_HASHDB"); 34 | 35 | if (ISFLAG(flags, F_LOUD)) fprintf(stderr, " F_LOUD"); 36 | if (ISFLAG(flags, F_DEBUG)) fprintf(stderr, " F_DEBUG"); 37 | 38 | /* Action-related flags */ 39 | if (ISFLAG(a_flags, FA_PRINTMATCHES)) fprintf(stderr, " FA_PRINTMATCHES"); 40 | if (ISFLAG(a_flags, FA_PRINTUNIQUE)) fprintf(stderr, " FA_PRINTUNIQUE"); 41 | if (ISFLAG(a_flags, FA_OMITFIRST)) fprintf(stderr, " FA_OMITFIRST"); 42 | if (ISFLAG(a_flags, FA_SUMMARIZEMATCHES)) fprintf(stderr, " FA_SUMMARIZEMATCHES"); 43 | if (ISFLAG(a_flags, FA_DELETEFILES)) fprintf(stderr, " FA_DELETEFILES"); 44 | if (ISFLAG(a_flags, FA_SHOWSIZE)) fprintf(stderr, " FA_SHOWSIZE"); 45 | if (ISFLAG(a_flags, FA_HARDLINKFILES)) fprintf(stderr, " FA_HARDLINKFILES"); 46 | if (ISFLAG(a_flags, FA_DEDUPEFILES)) fprintf(stderr, " FA_DEDUPEFILES"); 47 | if (ISFLAG(a_flags, FA_MAKESYMLINKS)) fprintf(stderr, " FA_MAKESYMLINKS"); 48 | if (ISFLAG(a_flags, FA_PRINTNULL)) fprintf(stderr, " FA_PRINTNULL"); 49 | if (ISFLAG(a_flags, FA_PRINTJSON)) fprintf(stderr, " FA_PRINTJSON"); 50 | if (ISFLAG(a_flags, FA_ERRORONDUPE)) fprintf(stderr, " FA_ERRORONDUPE"); 51 | 52 | /* Extra print flags */ 53 | if (ISFLAG(p_flags, PF_PARTIAL)) fprintf(stderr, " PF_PARTIAL"); 54 | if (ISFLAG(p_flags, PF_EARLYMATCH)) fprintf(stderr, " PF_EARLYMATCH"); 55 | if (ISFLAG(p_flags, PF_FULLHASH)) fprintf(stderr, " PF_FULLHASH"); 56 | fprintf(stderr, " [end of list]\n\n"); 57 | fflush(stderr); 58 | return; 59 | } 60 | #endif 61 | -------------------------------------------------------------------------------- /dumpflags.h: -------------------------------------------------------------------------------- 1 | /* Debug flag dumping 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef JDUPES_DUMPFLAGS_H 5 | #define JDUPES_DUMPFLAGS_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #ifdef DEBUG 12 | extern void dump_all_flags(void); 13 | #endif 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | 19 | #endif /* JDUPES_DUMPFLAGS_H */ 20 | -------------------------------------------------------------------------------- /example_scripts/delete_but_exclude_nonposix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # NOTE: This non-POSIX version is faster but requires bash/ksh/zsh etc. 4 | 5 | # This is a shell script that deletes match sets like jdupes -dN does, but 6 | # excludes any file paths from deletion that match any of the grep regex 7 | # patterns passed to the script. Use it like this: 8 | # 9 | # jdupes whatever | ./delete_but_exclude.sh regex1 [regex2] [...] 10 | 11 | # Announce what this script does so the user knows what's going on 12 | echo "jdupes script - delete duplicates that don't match specified patterns" 13 | 14 | # If no parameters are passed, give usage info and abort 15 | test -z "$1" && echo "usage: $0 regex1 [regex2] [...]" && exit 1 16 | 17 | # Exit status will be 0 on success, 1 on any failure 18 | EXITSTATUS=0 19 | 20 | # Skip the first file in each match set 21 | FIRSTFILE=1 22 | while read -r LINE 23 | do 24 | # Remove Windows CR characters if present in name 25 | LINE=${LINE/$'\r'/} 26 | # Reset on a blank line; next line will be a first file 27 | test -z "$LINE" && FIRSTFILE=1 && continue 28 | # If this is the first file, take no action 29 | test $FIRSTFILE -eq 1 && FIRSTFILE=0 && echo $'\n'"[+] $LINE" && continue 30 | # Move the file specified on the line to the directory specified 31 | for RX in "$1" "$2" "$3" "$4" "$5" "$6" "$7" "$8" "$9" 32 | do test -z "$RX" && continue 33 | if [[ $LINE =~ $RX ]] 34 | then 35 | echo "[+] $LINE" 36 | else 37 | if rm -f "$LINE" 38 | then echo "[-] $LINE" 39 | else echo "[!] $LINE" 40 | EXITSTATUS=1 41 | fi 42 | fi 43 | done 44 | done 45 | 46 | exit $EXITSTATUS 47 | -------------------------------------------------------------------------------- /example_scripts/delete_but_exclude_posix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # This is a shell script that deletes match sets like jdupes -dN does, but 4 | # excludes any file paths from deletion that match any of the grep regex 5 | # patterns passed to the script. Use it like this: 6 | # 7 | # jdupes whatever | ./delete_but_exclude.sh regex1 [regex2] [...] 8 | 9 | # Announce what this script does so the user knows what's going on 10 | echo "jdupes script - delete duplicates that don't match specified patterns" 11 | 12 | # If no parameters are passed, give usage info and abort 13 | test -z "$1" && echo "usage: $0 regex1 [regex2] [...]" && exit 1 14 | 15 | # Exit status will be 0 on success, 1 on any failure 16 | EXITSTATUS=0 17 | 18 | # Skip the first file in each match set 19 | FIRSTFILE=1 20 | while read -r LINE 21 | do 22 | # Remove Windows CR characters if present in name 23 | LINE="$(echo "$LINE" | tr -d '\r')" 24 | # Reset on a blank line; next line will be a first file 25 | test -z "$LINE" && FIRSTFILE=1 && continue 26 | # If this is the first file, take no action 27 | test $FIRSTFILE -eq 1 && FIRSTFILE=0 && echo $'\n'"[+] $LINE" && continue 28 | # Move the file specified on the line to the directory specified 29 | for RX in "$1" "$2" "$3" "$4" "$5" "$6" "$7" "$8" "$9" 30 | do test -z "$RX" && continue 31 | if echo "$LINE" | grep -q "$RX" 32 | then 33 | echo "[+] $LINE" 34 | else 35 | if rm -f "$LINE" 36 | then echo "[-] $LINE" 37 | else echo "[!] $LINE" 38 | EXITSTATUS=1 39 | fi 40 | fi 41 | done 42 | done 43 | 44 | exit $EXITSTATUS 45 | -------------------------------------------------------------------------------- /example_scripts/example.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # This is a shell script that demonstrates how to process the standard 4 | # jdupes output (known as "printmatches") to perform custom actions. 5 | # Use it like this: 6 | # 7 | # jdupes whatever_parameters_you_like | ./example.sh script_parameters 8 | # 9 | # If you are on Windows, jdupes uses backslash path separators which 10 | # must be converted to forward slashes before piping to this script, 11 | # and carriage returns (\r) must also be deleted from jdupes output 12 | # (tested on MSYS2 MinGW, probably true for similar environments): 13 | # 14 | # jdupes params | tr '\\' / | tr -d '\r' | ./example.sh script_params 15 | # 16 | # The general structure of jdupes pipe scripts are: 17 | # * Initialize conditions 18 | # * Iterates through a match set and act on items 19 | # * Reset conditions and restart when a blank line is reached 20 | 21 | # This script moves all duplicate files to a different directory 22 | # without duplicating the directory structure. It can be easily 23 | # modified to make the required directories and create a "mirror" 24 | # consisting of duplicates that 'jdupes -rdN' would delete. 25 | 26 | # Announce what this script does so the user knows what's going on 27 | echo "jdupes example script - moving duplicate files to a directory" 28 | 29 | # If first parameter isn't a valid directory, give usage info and abort 30 | test ! -d "$1" && echo "usage: $0 destination_dir_to_move_files_to" && exit 1 31 | 32 | # Exit status will be 0 on success, 1 on any failure 33 | EXITSTATUS=0 34 | 35 | # Skip the first file in each match set 36 | FIRSTFILE=1 37 | while read LINE 38 | do echo "$LINE" 39 | # Reset on a blank line; next line will be a first file 40 | test -z "$LINE" && FIRSTFILE=1 && continue 41 | # If this is the first file, take no action 42 | test $FIRSTFILE -eq 1 && FIRSTFILE=0 && continue 43 | # Move the file specified on the line to the directory specified 44 | if mv -f "$LINE" "$1" 45 | then 46 | # Print the action that was taken 47 | echo "'$LINE' => '$1/$(basename "$LINE")'" 48 | else 49 | echo "Failed to move: '$LINE' => '$1/$(basename "$LINE")'" >&2 50 | EXITSTATUS=1 51 | fi 52 | done 53 | 54 | exit $EXITSTATUS 55 | -------------------------------------------------------------------------------- /example_scripts/fdupes_oneline.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Emulates fdupes -1 output 4 | # Usage: jdupes command line | ./fdupes_oneline.sh 5 | 6 | # This is a newline. 7 | IFS=' 8 | ' 9 | 10 | if [ "$1" = "-q" ] || [ "$1" = "--shell-quote" ]; then 11 | # This only works with GNU (env printf) or bash (builtin printf). 12 | # If you are using dash, change the command to use env printf... 13 | escape() { printf '%q ' "$LINE"; } 14 | else 15 | escape() { printf '%s' "$LINE" | sed 's/\\/\\\\/g; s/ /\\ /g'; printf ' '; } 16 | fi 17 | 18 | while read -r LINE 19 | do if [ -z "$LINE" ] 20 | then printf '\n' 21 | else escape 22 | fi 23 | done 24 | -------------------------------------------------------------------------------- /extfilter.c: -------------------------------------------------------------------------------- 1 | /* jdupes extended filters 2 | * See jdupes.c for license information */ 3 | 4 | #ifndef NO_EXTFILTER 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include "helptext.h" 13 | #include "jdupes.h" 14 | 15 | /* Extended filter parameter flags */ 16 | #define XF_EXCL_EXT 0x00000001U 17 | #define XF_SIZE_EQ 0x00000002U 18 | #define XF_SIZE_GT 0x00000004U 19 | #define XF_SIZE_LT 0x00000008U 20 | #define XF_ONLY_EXT 0x00000010U 21 | #define XF_EXCL_STR 0x00000020U 22 | #define XF_ONLY_STR 0x00000040U 23 | #define XF_DATE_NEWER 0x00000080U 24 | #define XF_DATE_OLDER 0x00000100U 25 | /* The X-than-or-equal are combination flags */ 26 | #define XF_SIZE_GTEQ 0x00000006U 27 | #define XF_SIZE_LTEQ 0x0000000aU 28 | 29 | /* Flags that use a numeric size with optional suffix */ 30 | #define XF_REQ_NUMBER 0x0000000eU 31 | /* Flags that require a data parameter (after a colon) */ 32 | #define XF_REQ_VALUE 0x0000001fU 33 | /* Flags that take a date that needs to be converted to time_t seconds */ 34 | #define XF_REQ_DATE 0x00000180U 35 | 36 | /* -X extended filter parameter stack */ 37 | struct extfilter { 38 | struct extfilter *next; 39 | unsigned int flags; 40 | int64_t size; /* also used for other large integers */ 41 | char param[]; 42 | }; 43 | 44 | struct extfilter_tags { 45 | const char * const tag; 46 | const uint32_t flags; 47 | }; 48 | 49 | /* Extended filter tree head and static tag list */ 50 | static struct extfilter *extfilter_head = NULL; 51 | static const struct extfilter_tags extfilter_tags[] = { 52 | { "noext", XF_EXCL_EXT }, 53 | { "onlyext", XF_ONLY_EXT }, 54 | { "size+", XF_SIZE_GT }, 55 | { "size-", XF_SIZE_LT }, 56 | { "size+=", XF_SIZE_GTEQ }, 57 | { "size-=", XF_SIZE_LTEQ }, 58 | { "size=", XF_SIZE_EQ }, 59 | { "nostr", XF_EXCL_STR }, 60 | { "onlystr", XF_ONLY_STR }, 61 | { "newer", XF_DATE_NEWER }, 62 | { "older", XF_DATE_OLDER }, 63 | { NULL, 0 }, 64 | }; 65 | 66 | 67 | static void help_text_extfilter(void) 68 | { 69 | #ifndef NO_HELPTEXT 70 | printf("Detailed help for jdupes -X/--ext-filter options\n"); 71 | printf("General format: jdupes -X filter[:value][size_suffix]\n\n"); 72 | 73 | printf("noext:ext1[,ext2,...] \tExclude files with certain extension(s)\n\n"); 74 | printf("onlyext:ext1[,ext2,...] \tOnly include files with certain extension(s)\n\n"); 75 | printf("size[+-=]:size[suffix] \tOnly Include files matching size criteria\n"); 76 | printf(" \tSize specs: + larger, - smaller, = equal to\n"); 77 | printf(" \tSpecs can be mixed, i.e. size+=:100k will\n"); 78 | printf(" \tonly include files 100KiB or more in size.\n\n"); 79 | printf("nostr:text_string \tExclude all paths containing the string\n"); 80 | printf("onlystr:text_string \tOnly allow paths containing the string\n"); 81 | printf(" \tHINT: you can use these for directories:\n"); 82 | printf(" \t-X nostr:/dir_x/ or -X onlystr:/dir_x/\n"); 83 | printf("newer:datetime \tOnly include files newer than specified date\n"); 84 | printf("older:datetime \tOnly include files older than specified date\n"); 85 | printf(" \tDate/time format: \"YYYY-MM-DD HH:MM:SS\"\n"); 86 | printf(" \tTime is optional (remember to escape spaces!)\n"); 87 | /* printf("\t\n"); */ 88 | 89 | printf("\nSome filters take no value or multiple values. Filters that can take\n"); 90 | printf( "a numeric option generally support the size multipliers K/M/G/T/P/E\n"); 91 | printf( "with or without an added iB or B. Multipliers are binary-style unless\n"); 92 | printf( "the -B suffix is used, which will use decimal multipliers. For example,\n"); 93 | printf( "16k or 16kib = 16384; 16kb = 16000. Multipliers are case-insensitive.\n\n"); 94 | 95 | printf( "Filters have cumulative effects: jdupes -X size+:99 -X size-:101 will\n"); 96 | printf( "cause only files of exactly 100 bytes in size to be included.\n\n"); 97 | 98 | printf( "Extension matching is case-insensitive.\n"); 99 | printf( "Path substring matching is case-sensitive.\n"); 100 | #else /* NO_HELPTEXT */ 101 | version_text(0); 102 | #endif /* NO_HELPTEXT */ 103 | } 104 | 105 | 106 | /* Does a file have one of these comma-separated extensions? 107 | * Returns 1 after any match, 0 if no matches */ 108 | static int match_extensions(char *path, const char *extlist) 109 | { 110 | char *dot; 111 | const char *ext; 112 | size_t len, extlen; 113 | 114 | LOUD(fprintf(stderr, "match_extensions('%s', '%s')\n", path, extlist);) 115 | if (path == NULL || extlist == NULL) jc_nullptr("match_extensions"); 116 | 117 | dot = NULL; 118 | /* Scan to end of path, save the last dot, reset on path separators */ 119 | while (*path != '\0') { 120 | if (*path == '.') dot = path; 121 | if (*path == '/' || *path == '\\') dot = NULL; 122 | path++; 123 | } 124 | /* No dots in the file name = no extension, so give up now */ 125 | if (dot == NULL) return 0; 126 | dot++; 127 | /* Handle a dot at the end of a file name */ 128 | if (*dot == '\0') return 0; 129 | 130 | /* Get the length of the file's extension for later checking */ 131 | extlen = strlen(dot); 132 | LOUD(fprintf(stderr, "match_extensions: file has extension '%s' with length %" PRIdMAX "\n", dot, (intmax_t)extlen);) 133 | 134 | /* dot is now at the location of the last file extension; check the list */ 135 | /* Skip any commas at the start of the list */ 136 | while (*extlist == ',') extlist++; 137 | ext = extlist; 138 | len = 0; 139 | while (1) { 140 | /* Reject upon hitting the end with no more extensions to process */ 141 | if (*extlist == '\0' && len == 0) return 0; 142 | /* Process extension once a comma or EOL is hit */ 143 | if (*extlist == ',' || *extlist == '\0') { 144 | /* Skip serial commas */ 145 | while (*extlist == ',') extlist++; 146 | if (extlist == ext) goto skip_empty; 147 | if (jc_strncaseeq(dot, ext, len) == 0 && extlen == len) { 148 | LOUD(fprintf(stderr, "match_extensions: matched on extension '%s' (len %" PRIdMAX ")\n", dot, (intmax_t)len);) 149 | return 1; 150 | } 151 | LOUD(fprintf(stderr, "match_extensions: no match: '%s' (%" PRIdMAX "), '%s' (%" PRIdMAX ")\n", dot, (intmax_t)len, ext, (intmax_t)extlen);) 152 | skip_empty: 153 | ext = extlist; 154 | len = 0; 155 | continue; 156 | } 157 | extlist++; len++; 158 | /* LOUD(fprintf(stderr, "match_extensions: DEBUG: '%s' : '%s' (%ld), '%s' (%ld)\n", extlist, dot, len, ext, extlen);) */ 159 | } 160 | return 0; 161 | } 162 | 163 | 164 | /* Add a filter to the filter stack */ 165 | void add_extfilter(const char *option) 166 | { 167 | char *opt, *p; 168 | time_t tt; 169 | struct extfilter *extf = extfilter_head; 170 | const struct extfilter_tags *tags = extfilter_tags; 171 | const struct jc_size_suffix *ss = jc_size_suffix; 172 | 173 | if (option == NULL) jc_nullptr("add_extfilter()"); 174 | 175 | LOUD(fprintf(stderr, "add_extfilter '%s'\n", option);) 176 | 177 | /* Invoke help text if requested */ 178 | if (jc_strcaseeq(option, "help") == 0) { help_text_extfilter(); exit(EXIT_SUCCESS); } 179 | 180 | opt = malloc(strlen(option) + 1); 181 | if (opt == NULL) jc_oom("add_extfilter option"); 182 | strcpy(opt, option); 183 | p = opt; 184 | 185 | while (*p != ':' && *p != '\0') p++; 186 | 187 | /* Split tag string into *opt (tag) and *p (value) */ 188 | if (*p == ':') { 189 | *p = '\0'; 190 | p++; 191 | } 192 | 193 | while (tags->tag != NULL && jc_streq(tags->tag, opt) != 0) tags++; 194 | if (tags->tag == NULL) goto error_bad_filter; 195 | 196 | /* Check for a tag that requires a value */ 197 | if (tags->flags & XF_REQ_VALUE && *p == '\0') goto error_value_missing; 198 | 199 | /* *p is now at the value, NOT the tag string! */ 200 | 201 | if (extfilter_head != NULL) { 202 | /* Add to end of exclusion stack if head is present */ 203 | while (extf->next != NULL) extf = extf->next; 204 | extf->next = malloc(sizeof(struct extfilter) + strlen(p) + 1); 205 | if (extf->next == NULL) jc_oom("add_extfilter alloc"); 206 | extf = extf->next; 207 | } else { 208 | /* Allocate extfilter_head if no exclusions exist yet */ 209 | extfilter_head = malloc(sizeof(struct extfilter) + strlen(p) + 1); 210 | if (extfilter_head == NULL) jc_oom("add_extfilter alloc"); 211 | extf = extfilter_head; 212 | } 213 | 214 | /* Set tag value from predefined tag array */ 215 | extf->flags = tags->flags; 216 | 217 | /* Initialize the new extfilter element */ 218 | extf->next = NULL; 219 | if (extf->flags & XF_REQ_NUMBER) { 220 | /* Exclude uses a number; handle it with possible suffixes */ 221 | *(extf->param) = '\0'; 222 | /* Get base size */ 223 | if (*p < '0' || *p > '9') goto error_bad_size_suffix; 224 | extf->size = strtoll(p, &p, 10); 225 | /* Handle suffix, if any */ 226 | if (*p != '\0') { 227 | while (ss->suffix != NULL && jc_strcaseeq(ss->suffix, p) != 0) ss++; 228 | if (ss->suffix == NULL) goto error_bad_size_suffix; 229 | extf->size *= ss->multiplier; 230 | } 231 | } else if (extf->flags & XF_REQ_DATE) { 232 | /* Exclude uses a date; convert it to seconds since the epoch */ 233 | *(extf->param) = '\0'; 234 | tt = jc_strtoepoch(p); 235 | LOUD(fprintf(stderr, "extfilter: jody_strtoepoch: '%s' -> %" PRIdMAX "\n", p, (intmax_t)tt);) 236 | if (tt == -1) goto error_bad_time; 237 | extf->size = tt; 238 | } else { 239 | /* Exclude uses string data; just copy it */ 240 | extf->size = 0; 241 | if (*p != '\0') strcpy(extf->param, p); 242 | else *(extf->param) = '\0'; 243 | } 244 | 245 | LOUD(fprintf(stderr, "Added extfilter: tag '%s', data '%s', size %lld, flags %d\n", opt, extf->param, (long long)extf->size, extf->flags);) 246 | free(opt); 247 | return; 248 | 249 | error_bad_time: 250 | fprintf(stderr, "Invalid extfilter date[time] was specified: -X filter:datetime\n"); 251 | goto extf_help_and_exit; 252 | error_value_missing: 253 | fprintf(stderr, "extfilter value missing or invalid: -X filter:value\n"); 254 | goto extf_help_and_exit; 255 | error_bad_filter: 256 | fprintf(stderr, "Invalid extfilter filter name was specified\n"); 257 | goto extf_help_and_exit; 258 | error_bad_size_suffix: 259 | fprintf(stderr, "Invalid extfilter size suffix specified; use B or KMGTPE[i][B]\n"); 260 | goto extf_help_and_exit; 261 | extf_help_and_exit: 262 | help_text_extfilter(); 263 | exit(EXIT_FAILURE); 264 | } 265 | 266 | 267 | /* Exclude single files based on extended filter stack; return 0 = exclude */ 268 | int extfilter_exclude(file_t * const restrict newfile) 269 | { 270 | for (struct extfilter *extf = extfilter_head; extf != NULL; extf = extf->next) { 271 | uint32_t sflag = extf->flags; 272 | LOUD(fprintf(stderr, "check_singlefile: extfilter check: %08x %" PRIdMAX " %" PRIdMAX " %s\n", sflag, (intmax_t)newfile->size, (intmax_t)extf->size, newfile->d_name);) 273 | if ( 274 | /* Any line that passes will result in file exclusion */ 275 | ((sflag == XF_SIZE_EQ) && (newfile->size != extf->size)) 276 | || ((sflag == XF_SIZE_LTEQ) && (newfile->size > extf->size)) 277 | || ((sflag == XF_SIZE_GTEQ) && (newfile->size < extf->size)) 278 | || ((sflag == XF_SIZE_GT) && (newfile->size <= extf->size)) 279 | || ((sflag == XF_SIZE_LT) && (newfile->size >= extf->size)) 280 | || ((sflag == XF_EXCL_EXT) && match_extensions(newfile->d_name, extf->param)) 281 | || ((sflag == XF_ONLY_EXT) && !match_extensions(newfile->d_name, extf->param)) 282 | || ((sflag == XF_EXCL_STR) && strstr(newfile->d_name, extf->param)) 283 | || ((sflag == XF_ONLY_STR) && !strstr(newfile->d_name, extf->param)) 284 | #ifndef NO_MTIME 285 | || ((sflag == XF_DATE_NEWER) && (newfile->mtime < extf->size)) 286 | || ((sflag == XF_DATE_OLDER) && (newfile->mtime >= extf->size)) 287 | #endif 288 | ) return 1; 289 | } 290 | return 0; 291 | } 292 | 293 | #endif /* NO_EXTFILTER */ 294 | -------------------------------------------------------------------------------- /extfilter.h: -------------------------------------------------------------------------------- 1 | /* jdupes extended filters 2 | * See jdupes.c for license information */ 3 | 4 | #ifndef JDUPES_EXTFILTER_H 5 | #define JDUPES_EXTFILTER_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #ifndef NO_EXTFILTER 12 | 13 | #include "jdupes.h" 14 | 15 | void add_extfilter(const char *option); 16 | int extfilter_exclude(file_t * const restrict newfile); 17 | 18 | #endif /* NO_EXTFILTER */ 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif /* JDUPES_EXTFILTER_H */ 25 | -------------------------------------------------------------------------------- /filehash.c: -------------------------------------------------------------------------------- 1 | /* jdupes file hashing function 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | 19 | #include "likely_unlikely.h" 20 | #include "filehash.h" 21 | #include "interrupt.h" 22 | #include "progress.h" 23 | #include "jdupes.h" 24 | #include "xxhash.h" 25 | 26 | const char *hash_algo_list[2] = { 27 | "xxHash64 v2", 28 | "jodyhash v7" 29 | }; 30 | 31 | 32 | /* Hash part or all of a file 33 | * 34 | * READ THIS BEFORE CHANGING THE HASH FUNCTION! 35 | * The hash function is only used to do fast exclusion. There is not much 36 | * benefit to using bigger or "better" hash functions. Upstream jdupes WILL 37 | * NOT accept any pull requests that change the hash function unless there 38 | * is an EXTREMELY compelling reason to do so. Do not waste your time with 39 | * swapping hash functions. If you want to do it for fun then that's fine. */ 40 | uint64_t *get_filehash(const file_t * const restrict checkfile, const size_t max_read, int algo) 41 | { 42 | off_t fsize; 43 | /* This is an array because we return a pointer to it */ 44 | static uint64_t hash[1]; 45 | static uint64_t *chunk = NULL; 46 | FILE *file = NULL; 47 | int hashing = 0; 48 | #ifndef NO_XXHASH2 49 | XXH64_state_t *xxhstate = NULL; 50 | #endif 51 | #ifdef __linux__ 52 | int filenum; 53 | #endif 54 | 55 | if (unlikely(checkfile == NULL || checkfile->d_name == NULL)) jc_nullptr("get_filehash()"); 56 | if (unlikely((algo > HASH_ALGO_COUNT - 1) || (algo < 0))) goto error_bad_hash_algo; 57 | LOUD(fprintf(stderr, "get_filehash('%s', %" PRIdMAX ")\n", checkfile->d_name, (intmax_t)max_read);) 58 | 59 | /* Allocate on first use */ 60 | if (unlikely(chunk == NULL)) { 61 | chunk = (uint64_t *)malloc(auto_chunk_size); 62 | if (unlikely(!chunk)) jc_oom("get_filehash() chunk"); 63 | } 64 | 65 | /* Get the file size. If we can't read it, bail out early */ 66 | if (unlikely(checkfile->size == -1)) { 67 | LOUD(fprintf(stderr, "get_filehash: not hashing because stat() info is bad\n")); 68 | return NULL; 69 | } 70 | fsize = checkfile->size; 71 | 72 | /* Do not read more than the requested number of bytes */ 73 | if (max_read > 0 && fsize > (off_t)max_read) 74 | fsize = (off_t)max_read; 75 | 76 | /* Initialize the hash and file read parameters (with filehash_partial skipped) 77 | * 78 | * If we already hashed the first chunk of this file, we don't want to 79 | * wastefully read and hash it again, so skip the first chunk and use 80 | * the computed hash for that chunk as our starting point. 81 | */ 82 | 83 | *hash = 0; 84 | if (ISFLAG(checkfile->flags, FF_HASH_PARTIAL)) { 85 | *hash = checkfile->filehash_partial; 86 | /* Don't bother going further if max_read is already fulfilled */ 87 | if (max_read != 0 && max_read <= PARTIAL_HASH_SIZE) { 88 | LOUD(fprintf(stderr, "Partial hash size (%d) >= max_read (%" PRIuMAX "), not hashing anymore\n", PARTIAL_HASH_SIZE, (uintmax_t)max_read);) 89 | return hash; 90 | } 91 | } 92 | errno = 0; 93 | file = jc_fopen(checkfile->d_name, JC_FILE_MODE_RDONLY_SEQ); 94 | if (file == NULL) { 95 | fprintf(stderr, "\n%s error opening file ", strerror(errno)); jc_fwprint(stderr, checkfile->d_name, 1); 96 | return NULL; 97 | } 98 | /* Actually seek past the first chunk if applicable 99 | * This is part of the filehash_partial skip optimization */ 100 | if (ISFLAG(checkfile->flags, FF_HASH_PARTIAL)) { 101 | if (fseeko(file, PARTIAL_HASH_SIZE, SEEK_SET) == -1) { 102 | fclose(file); 103 | fprintf(stderr, "\nerror seeking in file "); jc_fwprint(stderr, checkfile->d_name, 1); 104 | return NULL; 105 | } 106 | fsize -= PARTIAL_HASH_SIZE; 107 | #ifdef __linux__ 108 | filenum = fileno(file); 109 | posix_fadvise(filenum, PARTIAL_HASH_SIZE, fsize, POSIX_FADV_SEQUENTIAL); 110 | posix_fadvise(filenum, PARTIAL_HASH_SIZE, fsize, POSIX_FADV_WILLNEED); 111 | #endif /* __linux__ */ 112 | } else { 113 | #ifdef __linux__ 114 | filenum = fileno(file); 115 | posix_fadvise(filenum, 0, fsize, POSIX_FADV_SEQUENTIAL); 116 | posix_fadvise(filenum, 0, fsize, POSIX_FADV_WILLNEED); 117 | #endif /* __linux__ */ 118 | } 119 | 120 | /* WARNING: READ NOTICE ABOVE get_filehash() BEFORE CHANGING HASH FUNCTIONS! */ 121 | #ifndef NO_XXHASH2 122 | if (algo == HASH_ALGO_XXHASH2_64) { 123 | xxhstate = XXH64_createState(); 124 | if (unlikely(xxhstate == NULL)) jc_nullptr("xxhstate"); 125 | XXH64_reset(xxhstate, 0); 126 | } 127 | #endif /* NO_XXHASH2 */ 128 | 129 | /* Read the file in chunks until we've read it all. */ 130 | while (fsize > 0) { 131 | size_t bytes_to_read; 132 | 133 | if (interrupt) return 0; 134 | bytes_to_read = (fsize >= (off_t)auto_chunk_size) ? auto_chunk_size : (size_t)fsize; 135 | if (unlikely(fread((void *)chunk, bytes_to_read, 1, file) != 1)) goto error_reading_file; 136 | 137 | switch (algo) { 138 | #ifndef NO_XXHASH2 139 | case HASH_ALGO_XXHASH2_64: 140 | if (unlikely(XXH64_update(xxhstate, chunk, bytes_to_read) != XXH_OK)) goto error_reading_file; 141 | break; 142 | #endif 143 | case HASH_ALGO_JODYHASH64: 144 | if (unlikely(jc_block_hash(chunk, hash, bytes_to_read) != 0)) goto error_reading_file; 145 | break; 146 | default: 147 | goto error_bad_hash_algo; 148 | } 149 | 150 | if ((off_t)bytes_to_read > fsize) break; 151 | else fsize -= (off_t)bytes_to_read; 152 | 153 | check_sigusr1(); 154 | if (jc_alarm_ring != 0) { 155 | jc_alarm_ring = 0; 156 | /* Only show "hashing" part if hashing one file updates progress at least twice */ 157 | if (hashing == 1) { 158 | update_phase2_progress("hashing", (int)(((checkfile->size - fsize) * 100) / checkfile->size)); 159 | } else { 160 | update_phase2_progress(NULL, -1); 161 | hashing = 1; 162 | } 163 | } 164 | continue; 165 | } 166 | 167 | fclose(file); 168 | 169 | #ifndef NO_XXHASH2 170 | if (algo == HASH_ALGO_XXHASH2_64) { 171 | *hash = XXH64_digest(xxhstate); 172 | XXH64_freeState(xxhstate); 173 | } 174 | #endif /* NO_XXHASH2 */ 175 | 176 | LOUD(fprintf(stderr, "get_filehash: returning hash: 0x%016jx\n", (uintmax_t)*hash)); 177 | return hash; 178 | error_reading_file: 179 | fprintf(stderr, "\nerror reading from file "); jc_fwprint(stderr, checkfile->d_name, 1); 180 | fclose(file); 181 | return NULL; 182 | error_bad_hash_algo: 183 | if ((hash_algo > HASH_ALGO_COUNT) || (hash_algo < 0)) 184 | fprintf(stderr, "\nerror: requested hash algorithm %d is not available", hash_algo); 185 | else 186 | fprintf(stderr, "\nerror: requested hash algorithm %s [%d] is not available", hash_algo_list[hash_algo], hash_algo); 187 | fclose(file); 188 | return NULL; 189 | } 190 | -------------------------------------------------------------------------------- /filehash.h: -------------------------------------------------------------------------------- 1 | /* jdupes file hashing function 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef JDUPES_FILEHASH_H 5 | #define JDUPES_FILEHASH_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #define HASH_ALGO_COUNT 2 12 | extern const char *hash_algo_list[HASH_ALGO_COUNT]; 13 | #define HASH_ALGO_XXHASH2_64 0 14 | #define HASH_ALGO_JODYHASH64 1 15 | 16 | #include "jdupes.h" 17 | 18 | uint64_t *get_filehash(const file_t * const restrict checkfile, const size_t max_read, int algo); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif /* JDUPES_FILEHASH_H */ 25 | -------------------------------------------------------------------------------- /filestat.c: -------------------------------------------------------------------------------- 1 | /* jdupes (C) 2015-2023 Jody Bruchon 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation files 5 | (the "Software"), to deal in the Software without restriction, 6 | including without limitation the rights to use, copy, modify, merge, 7 | publish, distribute, sublicense, and/or sell copies of the Software, 8 | and to permit persons to whom the Software is furnished to do so, 9 | subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 17 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 18 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 19 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 20 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ 21 | 22 | #include 23 | #include 24 | #include "jdupes.h" 25 | #include "likely_unlikely.h" 26 | 27 | /* Check file's stat() info to make sure nothing has changed 28 | * Returns 1 if changed, 0 if not changed, negative if error */ 29 | int file_has_changed(file_t * const restrict file) 30 | { 31 | struct JC_STAT s; 32 | 33 | /* If -t/--no-change-check specified then completely bypass this code */ 34 | if (ISFLAG(flags, F_NOCHANGECHECK)) return 0; 35 | 36 | if (unlikely(file == NULL || file->d_name == NULL)) jc_nullptr("file_has_changed()"); 37 | LOUD(fprintf(stderr, "file_has_changed('%s')\n", file->d_name);) 38 | 39 | if (!ISFLAG(file->flags, FF_VALID_STAT)) return -66; 40 | 41 | if (jc_stat(file->d_name, &s) != 0) return -2; 42 | if (file->inode != s.st_ino) return 1; 43 | if (file->size != s.st_size) return 1; 44 | if (file->device != s.st_dev) return 1; 45 | if (file->mode != s.st_mode) return 1; 46 | #ifndef NO_MTIME 47 | if (file->mtime != s.st_mtime) return 1; 48 | #endif 49 | #ifndef NO_PERMS 50 | if (file->uid != s.st_uid) return 1; 51 | if (file->gid != s.st_gid) return 1; 52 | #endif 53 | #ifndef NO_SYMLINKS 54 | if (lstat(file->d_name, &s) != 0) return -3; 55 | if ((JC_S_ISLNK(s.st_mode) > 0) ^ ISFLAG(file->flags, FF_IS_SYMLINK)) return 1; 56 | #endif 57 | 58 | return 0; 59 | } 60 | 61 | 62 | int getfilestats(file_t * const restrict file) 63 | { 64 | struct JC_STAT s; 65 | 66 | if (unlikely(file == NULL || file->d_name == NULL)) jc_nullptr("getfilestats()"); 67 | LOUD(fprintf(stderr, "getfilestats('%s')\n", file->d_name);) 68 | 69 | /* Don't stat the same file more than once */ 70 | if (ISFLAG(file->flags, FF_VALID_STAT)) return 0; 71 | SETFLAG(file->flags, FF_VALID_STAT); 72 | 73 | if (jc_stat(file->d_name, &s) != 0) return -1; 74 | file->size = s.st_size; 75 | file->inode = s.st_ino; 76 | file->device = s.st_dev; 77 | #ifndef NO_MTIME 78 | file->mtime = s.st_mtime; 79 | #endif 80 | #ifndef NO_ATIME 81 | file->atime = s.st_atime; 82 | #endif 83 | file->mode = s.st_mode; 84 | #ifndef NO_HARDLINKS 85 | file->nlink = s.st_nlink; 86 | #endif 87 | #ifndef NO_PERMS 88 | file->uid = s.st_uid; 89 | file->gid = s.st_gid; 90 | #endif 91 | #ifndef NO_SYMLINKS 92 | if (lstat(file->d_name, &s) != 0) return -1; 93 | if (JC_S_ISLNK(s.st_mode) > 0) SETFLAG(file->flags, FF_IS_SYMLINK); 94 | #endif 95 | return 0; 96 | } 97 | 98 | 99 | /* Returns -1 if stat() fails, 0 if it's a directory, 1 if it's not */ 100 | int getdirstats(const char * const restrict name, 101 | jdupes_ino_t * const restrict inode, dev_t * const restrict dev, 102 | jdupes_mode_t * const restrict mode) 103 | { 104 | struct JC_STAT s; 105 | 106 | if (unlikely(name == NULL || inode == NULL || dev == NULL)) jc_nullptr("getdirstats"); 107 | LOUD(fprintf(stderr, "getdirstats('%s', %p, %p)\n", name, (void *)inode, (void *)dev);) 108 | 109 | if (jc_stat(name, &s) != 0) return -1; 110 | *inode = s.st_ino; 111 | *dev = s.st_dev; 112 | *mode = s.st_mode; 113 | if (!JC_S_ISDIR(s.st_mode)) return 1; 114 | return 0; 115 | } 116 | -------------------------------------------------------------------------------- /filestat.h: -------------------------------------------------------------------------------- 1 | /* jdupes file/dir stat()-related functions 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef JDUPES_FILESTAT_H 5 | #define JDUPES_FILESTAT_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include "jdupes.h" 12 | 13 | int file_has_changed(file_t * const restrict file); 14 | int getfilestats(file_t * const restrict file); 15 | /* Returns -1 if stat() fails, 0 if it's a directory, 1 if it's not */ 16 | int getdirstats(const char * const restrict name, 17 | jdupes_ino_t * const restrict inode, dev_t * const restrict dev, 18 | jdupes_mode_t * const restrict mode); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif /* JDUPES_FILESTAT_H */ 25 | -------------------------------------------------------------------------------- /generate_packages.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Generate package folders with variant builds 4 | 5 | # Number of parallel make processes 6 | if [ -z "$PM" ] 7 | then PM=12 8 | [ -d /sys/devices/system/cpu ] && \ 9 | PM=$(find /sys/devices/system/cpu -maxdepth 1 -mindepth 1 -type d | grep '/cpu[0-9][0-9]*' | wc -l) && \ 10 | PM=$((PM * 2)) 11 | fi 12 | 13 | NAME="jdupes" 14 | 15 | VER="$(cat version.h | grep '#define VER "' | cut -d\" -f2)" 16 | echo "Program version: $VER" 17 | 18 | [ -z "$TA" ] && TA=__NONE__ 19 | [ ! -z "$1" ] && ARCH="$1" 20 | [[ "$ARCH" = "linux-x64" || "$ARCH" = "x86_64" || "$ARCH" = "x86-64" ]] && TA=linux && ARCH=x86_64 && CF=-m64 21 | [[ "$ARCH" = "linux-x32" || "$ARCH" = "x32" ]] && TA=linux && ARCH=x32 && CF=-mx32 22 | [[ "$ARCH" = "linux-i686" || "$ARCH" = "linux-i386" || "$ARCH" = "i686" || "$ARCH" = "i386" ]] && TA=linux && ARCH=i386 && CF=-m32 23 | 24 | 25 | UNAME_S="$(uname -s | tr '[:upper:]' '[:lower:]')" 26 | UNAME_P="$(uname -p)" 27 | UNAME_M="$(uname -m)" 28 | 29 | # Detect macOS 30 | if [[ "$TA" = "macos" || "$UNAME_S" = "darwin" ]] 31 | then 32 | PKGTYPE=zip 33 | TA=mac32 34 | test "$UNAME_M" = "x86_64" && TA=mac64 35 | fi 36 | 37 | # Detect Power Macs under macOS 38 | if [[ "$TA" = "macppc" || "$UNAME_P" = "Power Macintosh" || "$UNAME_P" = "powerpc" ]] 39 | then 40 | TA=macppc32 41 | test "$(sysctl hw.cpu64bit_capable)" = "hw.cpu64bit_capable: 1" && TA=macppc64 42 | [ -z "$PKGTYPE" ] && PKGTYPE=zip 43 | fi 44 | 45 | # Detect Linux 46 | if [[ "$TA" = "linux" || "$UNAME_S" = "linux" ]] 47 | then 48 | TA="linux-$UNAME_M" 49 | [ ! -z "$ARCH" ] && TA="linux-$ARCH" 50 | [ -z "$PKGTYPE" ] && PKGTYPE=xz 51 | fi 52 | 53 | # Fall through - assume Windows 54 | if [[ "$TA" = "windows" || "$TA" = "__NONE__" ]] 55 | then 56 | [ -z "$PKGTYPE" ] && PKGTYPE=zip 57 | [ -z "$ARCH" ] && ARCH=$(gcc -v 2>&1 | grep Target | cut -d\ -f2- | cut -d- -f1) 58 | [[ "$ARCH" = "i686" || "$ARCH" = "i386" ]] && TA=win32 59 | [ "$ARCH" = "x86_64" ] && TA=win64 60 | [ "$UNAME_S" = "MINGW32_NT-5.1" ] && TA=winxp 61 | EXT=".exe" 62 | fi 63 | 64 | echo "Target architecture: $TA" 65 | test "$TA" = "__NONE__" && echo "Failed to detect system type" && exit 1 66 | PKGNAME="${NAME}-${VER}-$TA" 67 | 68 | echo "Generating package for: $PKGNAME" 69 | mkdir -p "$PKGNAME" || exit 1 70 | test ! -d "$PKGNAME" && echo "Can't create directory for package" && exit 1 71 | cp CHANGES.txt README.md LICENSE.txt $PKGNAME/ || exit 1 72 | if [ -d "../libjodycode" ] 73 | then 74 | echo "Rebuilding nearby libjodycode first" 75 | WD="$(pwd)" 76 | cd ../libjodycode 77 | make clean && make -j$PM CFLAGS_EXTRA="$CF" 78 | cd "$WD" 79 | fi 80 | E1=1; E2=1; E3=1; E4=1 81 | make clean && make CFLAGS_EXTRA="$CF" -j$PM ENABLE_DEDUPE=1 static_jc stripped && cp $NAME$EXT $PKGNAME/$NAME$EXT && E1=0 82 | make clean && make CFLAGS_EXTRA="$CF" -j$PM ENABLE_DEDUPE=1 LOUD=1 static_jc stripped && cp $NAME$EXT $PKGNAME/${NAME}-loud$EXT && E2=0 83 | make clean && make CFLAGS_EXTRA="$CF" -j$PM LOW_MEMORY=1 static_jc stripped && cp $NAME$EXT $PKGNAME/${NAME}-lowmem$EXT && E3=0 84 | make clean && make CFLAGS_EXTRA="$CF" -j$PM BARE_BONES=1 static_jc stripped && cp $NAME$EXT $PKGNAME/${NAME}-barebones$EXT && E4=0 85 | strip ${PKGNAME}/${NAME}*$EXT 86 | make clean 87 | test $((E1 + E2 + E3 + E4)) -gt 0 && echo "Error building packages; aborting." && exit 1 88 | # Make a fat binary on macOS x86_64 if possible 89 | if [ "$TA" = "mac64" ] && ld -v 2>&1 | grep -q 'archs:.*i386' 90 | then 91 | ERR=0 92 | TYPE=-i386; CE=-m32 93 | # On macOS Big Sur (Darwin 20) or higher, try to build a x86_64 + arm64 binary 94 | [ $(uname -r | cut -d. -f1) -ge 20 ] && TYPE=-arm64 && CE="-target arm64-apple-macos11" 95 | if [ -d "../libjodycode" ] 96 | then 97 | echo "Rebuilding nearby libjodycode first" 98 | WD="$(pwd)" 99 | cd ../libjodycode 100 | make clean && make -j$PM CFLAGS_EXTRA="$CE" 101 | cd "$WD" 102 | fi 103 | for X in '' '-loud' '-lowmem' '-barebones' 104 | do make clean && make -j$PM CFLAGS_EXTRA="$CE" stripped && cp $NAME$EXT $PKGNAME/$NAME$X$EXT$TYPE || ERR=1 105 | [ $ERR -eq 0 ] && lipo -create -output $PKGNAME/jdupes_temp $PKGNAME/$NAME$X$EXT$TYPE $PKGNAME/$NAME$X$EXT && mv $PKGNAME/jdupes_temp $PKGNAME/$NAME$X$EXT 106 | done 107 | make clean 108 | test $ERR -gt 0 && echo "Error building packages; aborting." && exit 1 109 | rm -f $PKGNAME/$NAME$EXT$TYPE $PKGNAME/$NAME-loud$EXT$TYPE $PKGNAME/$NAME-lowmem$EXT$TYPE $PKGNAME/$NAME-barebones$EXT$TYPE 110 | fi 111 | test "$PKGTYPE" = "zip" && zip -9r $PKGNAME.zip $PKGNAME/ 112 | test "$PKGTYPE" = "tar" && tar -c $PKGNAME/ > $PKGNAME.pkg.tar 113 | test "$PKGTYPE" = "gz" && tar -c $PKGNAME/ | gzip -9 > $PKGNAME.pkg.tar.gz 114 | test "$PKGTYPE" = "xz" && tar -c $PKGNAME/ | xz -e > $PKGNAME.pkg.tar.xz 115 | echo "Package generation complete." 116 | -------------------------------------------------------------------------------- /hashdb.h: -------------------------------------------------------------------------------- 1 | /* File hash database management 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef JDUPES_HASHDB_H 5 | #define JDUPES_HASHDB_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include 12 | #include "jdupes.h" 13 | 14 | typedef struct _hashdb { 15 | struct _hashdb *left; 16 | struct _hashdb *right; 17 | uint64_t path_hash; 18 | char *path; 19 | uint64_t partialhash; 20 | uint64_t fullhash; 21 | jdupes_ino_t inode; 22 | off_t size; 23 | time_t mtime; 24 | uint_fast8_t hashcount; 25 | } hashdb_t; 26 | 27 | extern int save_hash_database(const char * const restrict dbname, const int destroy); 28 | extern hashdb_t *add_hashdb_entry(char *in_path, const int in_pathlen, const file_t *check); 29 | extern int64_t load_hash_database(const char * const restrict dbname); 30 | extern int read_hashdb_entry(file_t *file); 31 | extern uint64_t dump_hashdb(void); 32 | extern int cleanup_hashdb(uint64_t *cnt, hashdb_t *cur); 33 | 34 | #ifdef __cplusplus 35 | } 36 | #endif 37 | 38 | #endif /* JDUPES_HASHDB_H */ 39 | -------------------------------------------------------------------------------- /hashdb_util.c: -------------------------------------------------------------------------------- 1 | /* File hash database management 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "jdupes.h" 13 | #include "libjodycode.h" 14 | #include "likely_unlikely.h" 15 | #include "hashdb.h" 16 | #include "version.h" 17 | 18 | int hash_algo = 0; 19 | uint64_t flags = 0; 20 | 21 | #ifdef UNICODE 22 | int wmain(int argc, wchar_t **wargv) 23 | #else 24 | int main(int argc, char **argv) 25 | #endif 26 | { 27 | const char * const default_name = "jdupes_hashdb.txt"; 28 | const char *dbname, *action; 29 | int64_t hdbsize; 30 | uint64_t cnt; 31 | 32 | if (argc != 3) goto util_usage; 33 | 34 | #ifdef UNICODE 35 | /* Create a UTF-8 **argv from the wide version */ 36 | static char **argv; 37 | int wa_err; 38 | argv = (char **)malloc(sizeof(char *) * (size_t)argc); 39 | if (!argv) jc_oom("main() unicode argv"); 40 | wa_err = jc_widearg_to_argv(argc, wargv, argv); 41 | if (wa_err != 0) { 42 | jc_print_error(wa_err); 43 | exit(EXIT_FAILURE); 44 | } 45 | /* fix up __argv so getopt etc. don't crash */ 46 | __argv = argv; 47 | jc_set_output_modes(JC_MODE_UTF16_TTY, JC_MODE_UTF16_TTY); 48 | #endif /* UNICODE */ 49 | 50 | dbname = argv[1]; 51 | action = argv[2]; 52 | 53 | if (strcmp(dbname, ".") == 0) dbname = default_name; 54 | hdbsize = load_hash_database(dbname); 55 | if (hdbsize < 0) goto error_load_hashdb; 56 | if (hdbsize > 0 && !ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "%" PRId64 " entries loaded.\n", hdbsize); 57 | 58 | fprintf(stderr, "name %s, action %s\n", dbname, action); 59 | if (strcmp(action, "dump") == 0) { 60 | dump_hashdb(); 61 | return 0; 62 | } else if (strcmp(action, "clean") == 0) { 63 | fprintf(stderr, "Cleaning entries\n"); 64 | if (cleanup_hashdb(&cnt, NULL) != 0) goto error_hashdb_cleanup; 65 | } else goto error_action; 66 | 67 | return 0; 68 | 69 | util_usage: 70 | printf("jdupes hashdb utility %s (%s)\n", VER, VERDATE); 71 | printf("usage: %s hash_database_name action\n", argv[0]); 72 | printf("If the name is a period '.' then 'jdupes_hashdb.txt' will be used\n"); 73 | printf("Actions: none yet\n"); 74 | exit(EXIT_FAILURE); 75 | error_hashdb_cleanup: 76 | fprintf(stderr, "error cleaning up hash database '%s'\n", dbname); 77 | exit(EXIT_FAILURE); 78 | error_load_hashdb: 79 | fprintf(stderr, "error: cannot open hash database '%s'\n", dbname); 80 | exit(EXIT_FAILURE); 81 | error_action: 82 | fprintf(stderr, "error: unknown action '%s'\n", action); 83 | exit(EXIT_FAILURE); 84 | } 85 | -------------------------------------------------------------------------------- /helptext.c: -------------------------------------------------------------------------------- 1 | /* Help text and version information 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include "filehash.h" 9 | #include "helptext.h" 10 | #include "jdupes.h" 11 | #include "version.h" 12 | 13 | 14 | #ifndef NO_HELPTEXT 15 | /* Assemble feature flag string from compile-time options */ 16 | const char *feature_flags[] = { 17 | #ifdef ENABLE_DEDUPE 18 | "dedupe", 19 | #endif 20 | #ifdef DEBUG 21 | "debug", 22 | #endif 23 | #ifdef __FAST_MATH__ 24 | "fastmath", 25 | #endif 26 | #ifdef LOUD_DEBUG 27 | "loud", 28 | #endif 29 | #ifdef LOW_MEMORY 30 | "lowmem", 31 | #endif 32 | #ifdef NO_CHUNKSIZE 33 | "nochunk", 34 | #endif 35 | #ifdef NO_DELETE 36 | "nodel", 37 | #endif 38 | #ifdef NO_ERRORONDUPE 39 | "noeod", 40 | #endif 41 | #ifdef NO_EXTFILTER 42 | "noxf", 43 | #endif 44 | #ifdef NO_HARDLINKS 45 | "nohlink", 46 | #endif 47 | #ifdef NO_HASHDB 48 | "nohashdb", 49 | #endif 50 | #ifdef NO_NUMSORT 51 | "nojsort", 52 | #endif 53 | #ifdef NO_JSON 54 | "nojson", 55 | #endif 56 | #ifdef NO_GETOPT_LONG 57 | "nolongopt", 58 | #endif 59 | #ifdef NO_MTIME 60 | "nomtime", 61 | #endif 62 | #ifdef NO_PERMS 63 | "noperm", 64 | #endif 65 | #ifdef NO_SYMLINKS 66 | "noslink", 67 | #endif 68 | #ifdef NO_TRAVCHECK 69 | "notrav", 70 | #endif 71 | #ifdef NO_USER_ORDER 72 | "nouorder", 73 | #endif 74 | #ifdef NO_UNICODE 75 | "nounicode", 76 | #endif 77 | #ifdef UNICODE 78 | "unicode", 79 | #endif 80 | #ifdef ON_WINDOWS 81 | "windows", 82 | #endif 83 | NULL 84 | }; 85 | #endif /* NO_HELPTEXT */ 86 | 87 | 88 | void help_text(void) 89 | { 90 | #ifndef NO_HELPTEXT 91 | printf("Usage: jdupes [options] FILES and/or DIRECTORIES...\n\n"); 92 | 93 | printf("Duplicate file sets will be printed by default unless a different action\n"); 94 | printf("option is specified (delete, summarize, link, dedupe, etc.)\n"); 95 | #ifdef NO_GETOPT_LONG 96 | printf("\nWARNING: getopt_long disabled in this build! Long options will not work.\n\n"); 97 | #endif 98 | #ifdef LOUD 99 | printf(" -@ --loud \toutput annoying low-level debug info while running\n"); 100 | #endif 101 | printf(" -0 --print-null \toutput nulls instead of CR/LF (like 'find -print0')\n"); 102 | printf(" -1 --one-file-system\tdo not match files on different filesystems/devices\n"); 103 | printf(" -A --no-hidden \texclude hidden files from consideration\n"); 104 | #ifdef ENABLE_DEDUPE 105 | printf(" -B --dedupe \tdo a copy-on-write (reflink/clone) deduplication\n"); 106 | #endif 107 | #ifndef NO_CHUNKSIZE 108 | printf(" -C --chunk-size=#\toverride I/O chunk size in KiB (min %d, max %d)\n", MIN_CHUNK_SIZE / 1024, MAX_CHUNK_SIZE / 1024); 109 | #endif /* NO_CHUNKSIZE */ 110 | #ifndef NO_DELETE 111 | printf(" -d --delete \tprompt user for files to preserve and delete all\n"); 112 | printf(" \tothers; important: under particular circumstances,\n"); 113 | printf(" \tdata may be lost when using this option together\n"); 114 | printf(" \twith -s or --symlinks, or when specifying a\n"); 115 | printf(" \tparticular directory more than once; refer to the\n"); 116 | printf(" \tdocumentation for additional information\n"); 117 | #endif /* NO_DELETE */ 118 | #ifdef DEBUG 119 | printf(" -D --debug \toutput debug statistics after completion\n"); 120 | #endif 121 | #ifndef NO_ERRORONDUPE 122 | printf(" -e --error-on-dupe\texit on any duplicate found with status code 255\n"); 123 | #endif 124 | printf(" -f --omit-first \tomit the first file in each set of matches\n"); 125 | printf(" -h --help \tdisplay this help message\n"); 126 | #ifndef NO_HARDLINKS 127 | printf(" -H --hard-links \ttreat any linked files as duplicate files. Normally\n"); 128 | printf(" \tlinked files are treated as non-duplicates for safety\n"); 129 | #endif 130 | printf(" -i --reverse \treverse (invert) the match sort order\n"); 131 | #ifndef NO_USER_ORDER 132 | printf(" -I --isolate \tfiles in the same specified directory won't match\n"); 133 | #endif 134 | #ifndef NO_JSON 135 | printf(" -j --json \tproduce JSON (machine-readable) output\n"); 136 | #endif /* NO_JSON */ 137 | /* printf(" -K --skip-hash \tskip full file hashing (may be faster; 100%% safe)\n"); 138 | printf(" \tWARNING: in development, not fully working yet!\n"); */ 139 | #ifndef NO_SYMLINKS 140 | printf(" -l --link-soft \tmake relative symlinks for duplicates w/o prompting\n"); 141 | #endif 142 | #ifndef NO_HARDLINKS 143 | printf(" -L --link-hard \thard link all duplicate files without prompting\n"); 144 | #ifdef ON_WINDOWS 145 | printf(" \tWindows allows a maximum of 1023 hard links per file;\n"); 146 | printf(" \tlinking large match sets will result in multiple sets\n"); 147 | printf(" \tof hard linked files due to this limit.\n"); 148 | #endif /* ON_WINDOWS */ 149 | #endif /* NO_HARDLINKS */ 150 | printf(" -m --summarize \tsummarize dupe information\n"); 151 | printf(" -M --print-summarize\tprint match sets and --summarize at the end\n"); 152 | #ifndef NO_DELETE 153 | printf(" -N --no-prompt \ttogether with --delete, preserve the first file in\n"); 154 | printf(" \teach set of duplicates and delete the rest without\n"); 155 | printf(" \tprompting the user\n"); 156 | #endif /* NO_DELETE */ 157 | #ifndef NO_MTIME 158 | printf(" -o --order=BY \tselect sort order for output, linking and deleting; by\n"); 159 | printf(" \tmtime (BY=time) or filename (BY=name, the default)\n"); 160 | #endif 161 | #ifndef NO_USER_ORDER 162 | printf(" -O --param-order \tParameter order is more important than selected -o sort\n"); 163 | #endif 164 | #ifndef NO_PERMS 165 | printf(" -p --permissions \tdon't consider files with different owner/group or\n"); 166 | printf(" \tpermission bits as duplicates\n"); 167 | #endif 168 | printf(" -P --print=type \tprint extra info (partial, early, fullhash)\n"); 169 | printf(" -q --quiet \thide progress indicator\n"); 170 | printf(" -Q --quick \tskip byte-for-byte confirmation for quick matching\n"); 171 | printf(" \tWARNING: -Q can result in data loss! Be very careful!\n"); 172 | printf(" -r --recurse \tfor every directory, process its subdirectories too\n"); 173 | printf(" -R --recurse: \tfor each directory given after this option follow\n"); 174 | printf(" \tsubdirectories encountered within (note the ':' at\n"); 175 | printf(" \tthe end of the option, manpage for more details)\n"); 176 | #ifndef NO_SYMLINKS 177 | printf(" -s --symlinks \tfollow symlinks\n"); 178 | #endif 179 | printf(" -S --size \tshow size of duplicate files\n"); 180 | printf(" -t --no-change-check\tdisable security check for file changes (aka TOCTTOU)\n"); 181 | printf(" -T --partial-only \tmatch based on partial hashes only. WARNING:\n"); 182 | printf(" \tEXTREMELY DANGEROUS paired with destructive actions!\n"); 183 | printf(" -u --print-unique\tprint only a list of unique (non-matched) files\n"); 184 | printf(" -U --no-trav-check\tdisable double-traversal safety check (BE VERY CAREFUL)\n"); 185 | printf(" \tThis fixes a Google Drive File Stream recursion issue\n"); 186 | printf(" -v --version \tdisplay jdupes version and license information\n"); 187 | #ifndef NO_EXTFILTER 188 | printf(" -X --ext-filter=x:y\tfilter files based on specified criteria\n"); 189 | printf(" \tUse '-X help' for detailed extfilter help\n"); 190 | #endif /* NO_EXTFILTER */ 191 | printf(" -y --hash-db=file\tuse a hash database text file to speed up repeat runs\n"); 192 | printf(" \tPassing '-y .' will expand to '-y jdupes_hashdb.txt'\n"); 193 | printf(" -z --zero-match \tconsider zero-length files to be duplicates\n"); 194 | printf(" -Z --soft-abort \tIf the user aborts (i.e. CTRL-C) act on matches so far\n"); 195 | #ifndef ON_WINDOWS 196 | printf(" \tYou can send SIGUSR1 to the program to toggle this\n"); 197 | #endif 198 | 199 | #else /* NO_HELPTEXT */ 200 | version_text(0); 201 | #endif /* NO_HELPTEXT */ 202 | return; 203 | } 204 | 205 | 206 | void version_text(int short_version) 207 | { 208 | printf("jdupes %s (%s) ", VER, VERDATE); 209 | 210 | #ifndef NO_HELPTEXT 211 | /* Indicate bitness information */ 212 | if (sizeof(uintptr_t) == 8) { 213 | if (sizeof(long) == 4) printf("64-bit i32"); 214 | else if (sizeof(long) == 8) printf("64-bit"); 215 | } else if (sizeof(uintptr_t) == 4) { 216 | if (sizeof(long) == 4) printf("32-bit"); 217 | else if (sizeof(long) == 8) printf("32-bit i64"); 218 | #if defined(__x86_64__) && SIZE_MAX == 0xffffffff 219 | printf(" (x32 ABI)"); 220 | #endif 221 | } else printf("%u-bit i%u", (unsigned int)(sizeof(uintptr_t) * 8), 222 | (unsigned int)(sizeof(long) * 8)); 223 | if (!short_version) { 224 | printf(", linked to libjodycode %s (%s)\n", jc_version, jc_verdate); 225 | printf("Hash algorithms available:"); 226 | for (int i = 0; i < HASH_ALGO_COUNT; i++) printf(" %s%c", hash_algo_list[i], i == (HASH_ALGO_COUNT - 1) ? '\n' : ','); 227 | } else printf("\n"); 228 | 229 | printf("Compile-time feature flags:"); 230 | if (*feature_flags != NULL) { 231 | int c = 0; 232 | while (feature_flags[c] != NULL) { 233 | printf(" %s", feature_flags[c]); 234 | c++; 235 | } 236 | } else printf(" none"); 237 | printf("\n"); 238 | if (short_version) return; 239 | printf("Copyright (C) 2015-2023 by Jody Bruchon and contributors\n\n"); 240 | printf("Permission is hereby granted, free of charge, to any person obtaining a copy of\n"); 241 | printf("this software and associated documentation files (the \"Software\"), to deal in\n"); 242 | printf("the Software without restriction, including without limitation the rights to\n"); 243 | printf("use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies\n"); 244 | printf("of the Software, and to permit persons to whom the Software is furnished to do\n"); 245 | printf("so, subject to the following conditions:\n\n"); 246 | 247 | printf("The above copyright notice and this permission notice shall be included in all\n"); 248 | printf("copies or substantial portions of the Software.\n\n"); 249 | printf("THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"); 250 | printf("IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"); 251 | printf("FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"); 252 | printf("AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"); 253 | printf("LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"); 254 | printf("OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n"); 255 | printf("SOFTWARE.\n"); 256 | printf("\nIf you find this software useful, please consider financially supporting\n"); 257 | printf("its development through the author's home page: https://www.jodybruchon.com/\n"); 258 | printf("Report bugs, get new releases, or learn about jdupes: https://www.jdupes.com/\n"); 259 | #else 260 | (void)short_version; 261 | printf("\nBuilt with no help text. You're on your own.\n"); 262 | #endif /* NO_HELPTEXT */ 263 | return; 264 | } 265 | -------------------------------------------------------------------------------- /helptext.h: -------------------------------------------------------------------------------- 1 | /* Help text and version information 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef JDUPES_HELPTEXT_H 5 | #define JDUPES_HELPTEXT_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | void version_text(int short_version); 12 | void help_text(void); 13 | 14 | #ifdef __cplusplus 15 | } 16 | #endif 17 | 18 | #endif /* JDUPES_HELPTEXT_H */ 19 | -------------------------------------------------------------------------------- /icon/icon_jdupes_256.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/jdupes/a8b0812a7c54d21f910a15d8a90a9b5001a48e2c/icon/icon_jdupes_256.ico -------------------------------------------------------------------------------- /icon/icon_jdupes_256.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/jdupes/a8b0812a7c54d21f910a15d8a90a9b5001a48e2c/icon/icon_jdupes_256.xcf -------------------------------------------------------------------------------- /icon/icon_jdupes_all.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/jdupes/a8b0812a7c54d21f910a15d8a90a9b5001a48e2c/icon/icon_jdupes_all.ico -------------------------------------------------------------------------------- /icon/icon_jdupes_all.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/jdupes/a8b0812a7c54d21f910a15d8a90a9b5001a48e2c/icon/icon_jdupes_all.xcf -------------------------------------------------------------------------------- /icon/jdupes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/jdupes/a8b0812a7c54d21f910a15d8a90a9b5001a48e2c/icon/jdupes.png -------------------------------------------------------------------------------- /interrupt.c: -------------------------------------------------------------------------------- 1 | /* Signal handler/interruption functions 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include "likely_unlikely.h" 11 | #include "jdupes.h" 12 | 13 | /* CTRL-C */ 14 | int interrupt = 0; 15 | 16 | #ifndef ON_WINDOWS 17 | static int usr1_toggle = 0; 18 | #endif 19 | 20 | /* Catch CTRL-C and either notify or terminate */ 21 | void catch_interrupt(const int signum) 22 | { 23 | (void)signum; 24 | interrupt = 1; 25 | exit_status = EXIT_FAILURE; 26 | return; 27 | } 28 | 29 | 30 | /* SIGUSR1 for -Z toggle; not available on Windows */ 31 | #ifndef ON_WINDOWS 32 | void catch_sigusr1(const int signum) 33 | { 34 | (void)signum; 35 | if (!ISFLAG(flags, F_SOFTABORT)) { 36 | SETFLAG(flags, F_SOFTABORT); 37 | usr1_toggle = 1; 38 | } else { 39 | CLEARFLAG(flags, F_SOFTABORT); 40 | usr1_toggle = 2; 41 | } 42 | return; 43 | } 44 | 45 | 46 | void check_sigusr1(void) 47 | { 48 | /* Notify of change to soft abort status if SIGUSR1 received */ 49 | if (unlikely(usr1_toggle != 0)) { 50 | fprintf(stderr, "\njdupes received a USR1 signal; soft abort (-Z) is now %s\n", usr1_toggle == 1 ? "ON" : "OFF" ); 51 | usr1_toggle = 0; 52 | } 53 | return; 54 | } 55 | #else 56 | #define check_sigusr1() 57 | #endif 58 | -------------------------------------------------------------------------------- /interrupt.h: -------------------------------------------------------------------------------- 1 | /* Signal handler/interruption functions 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef JDUPES_INTERRUPT_H 5 | #define JDUPES_INTERRUPT_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include "jdupes.h" 12 | 13 | extern int interrupt; 14 | 15 | void catch_interrupt(const int signum); 16 | void start_progress_alarm(void); 17 | void stop_progress_alarm(void); 18 | #ifdef ON_WINDOWS 19 | #define check_sigusr1() 20 | #else 21 | void catch_sigusr1(const int signum); 22 | void catch_sigalrm(const int signum); 23 | void check_sigusr1(void); 24 | #endif /* ON_WINDOWS */ 25 | 26 | #ifdef __cplusplus 27 | } 28 | #endif 29 | 30 | #endif /* JDUPES_INTERRUPT_H */ 31 | -------------------------------------------------------------------------------- /jdupes.h: -------------------------------------------------------------------------------- 1 | /* jdupes main program header 2 | * See jdupes.c for license information */ 3 | 4 | #ifndef JDUPES_H 5 | #define JDUPES_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | /* Detect Windows and modify as needed */ 12 | #if defined _WIN32 || defined __MINGW32__ 13 | #ifndef ON_WINDOWS 14 | #define ON_WINDOWS 1 15 | #endif 16 | #define NO_SYMLINKS 1 17 | #define NO_PERMS 1 18 | #define NO_SIGACTION 1 19 | #ifndef WIN32_LEAN_AND_MEAN 20 | #define WIN32_LEAN_AND_MEAN 21 | #endif 22 | #include 23 | #include 24 | #endif /* Win32 */ 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | /* Some types are different on Windows */ 31 | #if defined _WIN32 || defined __MINGW32__ 32 | typedef uint64_t jdupes_ino_t; 33 | typedef uint32_t jdupes_mode_t; 34 | 35 | #else /* Not Windows */ 36 | #include 37 | typedef ino_t jdupes_ino_t; 38 | typedef mode_t jdupes_mode_t; 39 | #ifdef UNICODE 40 | #error Do not define UNICODE on non-Windows platforms. 41 | #undef UNICODE 42 | #endif 43 | #endif /* _WIN32 || __MINGW32__ */ 44 | 45 | #ifndef PATHBUF_SIZE 46 | #ifdef UNICODE 47 | #define PATHBUF_SIZE 8192 48 | #else 49 | #define PATHBUF_SIZE 4096 50 | #endif /* UNICODE */ 51 | #endif /* PATHBUF_SIZE */ 52 | 53 | /* Maximum path buffer size to use; must be large enough for a path plus 54 | * any work that might be done to the array it's stored in. PATH_MAX is 55 | * not always true. Read this article on the false promises of PATH_MAX: 56 | * http://insanecoding.blogspot.com/2007/11/pathmax-simply-isnt.html 57 | * Windows + Unicode needs a lot more space than UTF-8 in Linux/Mac OS X 58 | */ 59 | /* Complain if PATHBUF_SIZE is too small */ 60 | #ifdef PATH_MAX 61 | #if PATHBUF_SIZE < PATH_MAX 62 | #if !defined LOW_MEMORY && !defined BARE_BONES 63 | #warning "PATHBUF_SIZE is less than PATH_MAX" 64 | #endif 65 | #endif 66 | #endif 67 | 68 | /* Debugging stats */ 69 | #ifdef DEBUG 70 | extern unsigned int small_file, partial_hash, partial_elim; 71 | extern unsigned int full_hash, partial_to_full, hash_fail; 72 | extern uintmax_t comparisons; 73 | #ifdef ON_WINDOWS 74 | #ifndef NO_HARDLINKS 75 | extern unsigned int hll_exclude; 76 | #endif 77 | #endif 78 | #endif /* DEBUG */ 79 | 80 | 81 | #define ISFLAG(a,b) ((a & b) == b) 82 | #define SETFLAG(a,b) (a |= b) 83 | #define CLEARFLAG(a,b) (a &= (~b)) 84 | 85 | /* Chunk sizing */ 86 | #ifndef CHUNK_SIZE 87 | #define CHUNK_SIZE 65536 88 | #endif 89 | #ifndef NO_CHUNKSIZE 90 | extern size_t auto_chunk_size; 91 | /* Larger chunk size makes large files process faster but uses more RAM */ 92 | #define MIN_CHUNK_SIZE 4096 93 | #define MAX_CHUNK_SIZE 1048576 * 256 94 | #else 95 | /* If automatic chunk sizing is disabled, just use a fixed value */ 96 | #define auto_chunk_size CHUNK_SIZE 97 | #endif /* NO_CHUNKSIZE */ 98 | 99 | /* Low memory option overrides */ 100 | #ifdef LOW_MEMORY 101 | #ifndef NO_PERMS 102 | #define NO_PERMS 1 103 | #endif 104 | #endif 105 | 106 | /* Aggressive verbosity for deep debugging */ 107 | #ifdef LOUD_DEBUG 108 | #ifndef DEBUG 109 | #define DEBUG 110 | #endif 111 | #define LOUD(...) if ISFLAG(flags, F_LOUD) __VA_ARGS__ 112 | #else 113 | #define LOUD(a) 114 | #endif 115 | 116 | /* Compile out debugging stat counters unless requested */ 117 | #ifdef DEBUG 118 | #define DBG(a) a 119 | #else 120 | #define DBG(a) 121 | #endif 122 | 123 | 124 | /* Compare two hashes like memcmp() */ 125 | #define HASH_COMPARE(a,b) ((a > b) ? 1:((a == b) ? 0:-1)) 126 | 127 | /* Extend an allocation length to the next 64-bit (8-byte) boundary */ 128 | #define EXTEND64(a) ((a & 0x7) > 0 ? ((a & (~0x7)) + 8) : a) 129 | 130 | /* Behavior modification flags */ 131 | extern uint64_t flags, a_flags, p_flags; 132 | #define F_RECURSE (1ULL << 0) 133 | #define F_HIDEPROGRESS (1ULL << 1) 134 | #define F_SOFTABORT (1ULL << 2) 135 | #define F_FOLLOWLINKS (1ULL << 3) 136 | #define F_INCLUDEEMPTY (1ULL << 4) 137 | #define F_CONSIDERHARDLINKS (1ULL << 5) 138 | #define F_RECURSEAFTER (1ULL << 6) 139 | #define F_NOPROMPT (1ULL << 7) 140 | #define F_EXCLUDEHIDDEN (1ULL << 8) 141 | #define F_PERMISSIONS (1ULL << 9) 142 | #define F_EXCLUDESIZE (1ULL << 10) 143 | #define F_QUICKCOMPARE (1ULL << 11) 144 | #define F_USEPARAMORDER (1ULL << 12) 145 | #define F_REVERSESORT (1ULL << 13) 146 | #define F_ISOLATE (1ULL << 14) 147 | #define F_ONEFS (1ULL << 15) 148 | #define F_PARTIALONLY (1ULL << 16) 149 | #define F_NOCHANGECHECK (1ULL << 17) 150 | #define F_NOTRAVCHECK (1ULL << 18) 151 | #define F_SKIPHASH (1ULL << 19) 152 | #define F_BENCHMARKSTOP (1ULL << 29) 153 | #define F_HASHDB (1ULL << 30) 154 | 155 | #define F_LOUD (1ULL << 62) 156 | #define F_DEBUG (1ULL << 63) 157 | 158 | /* Action-related flags */ 159 | #define FA_PRINTMATCHES (1U << 0) 160 | #define FA_PRINTUNIQUE (1U << 1) 161 | #define FA_OMITFIRST (1U << 2) 162 | #define FA_SUMMARIZEMATCHES (1U << 3) 163 | #define FA_DELETEFILES (1U << 4) 164 | #define FA_SHOWSIZE (1U << 5) 165 | #define FA_HARDLINKFILES (1U << 6) 166 | #define FA_DEDUPEFILES (1U << 7) 167 | #define FA_MAKESYMLINKS (1U << 8) 168 | #define FA_PRINTNULL (1U << 9) 169 | #define FA_PRINTJSON (1U << 10) 170 | #define FA_ERRORONDUPE (1U << 11) 171 | 172 | /* Per-file true/false flags */ 173 | #define FF_VALID_STAT (1U << 0) 174 | #define FF_HASH_PARTIAL (1U << 1) 175 | #define FF_HASH_FULL (1U << 2) 176 | #define FF_HAS_DUPES (1U << 3) 177 | #define FF_IS_SYMLINK (1U << 4) 178 | #define FF_NOT_UNIQUE (1U << 5) 179 | 180 | /* Extra print flags */ 181 | #define PF_PARTIAL (1U << 0) 182 | #define PF_EARLYMATCH (1U << 1) 183 | #define PF_FULLHASH (1U << 2) 184 | 185 | typedef enum { 186 | ORDER_NAME = 0, 187 | ORDER_TIME 188 | } ordertype_t; 189 | 190 | #ifndef PARTIAL_HASH_SIZE 191 | #define PARTIAL_HASH_SIZE 4096 192 | #endif 193 | 194 | /* Per-file information */ 195 | typedef struct _file { 196 | struct _file *duplicates; 197 | struct _file *next; 198 | char *d_name; 199 | uint64_t filehash_partial; 200 | uint64_t filehash; 201 | jdupes_ino_t inode; 202 | off_t size; 203 | #ifndef NO_MTIME 204 | time_t mtime; 205 | #endif 206 | dev_t device; 207 | uint32_t flags; /* Status flags */ 208 | jdupes_mode_t mode; 209 | #ifndef NO_ATIME 210 | time_t atime; 211 | #endif 212 | #ifndef NO_USER_ORDER 213 | unsigned int user_order; /* Order of the originating command-line parameter */ 214 | #endif 215 | #ifndef NO_HARDLINKS 216 | #ifdef ON_WINDOWS 217 | uint32_t nlink; /* link count on Windows is always a DWORD */ 218 | #else 219 | nlink_t nlink; 220 | #endif /* ON_WINDOWS */ 221 | #endif 222 | #ifndef NO_PERMS 223 | uid_t uid; 224 | gid_t gid; 225 | #endif 226 | } file_t; 227 | 228 | typedef struct _filetree { 229 | file_t *file; 230 | struct _filetree *left; 231 | struct _filetree *right; 232 | } filetree_t; 233 | 234 | /* Progress indicator variables */ 235 | extern uintmax_t filecount, progress, item_progress, dupecount; 236 | 237 | extern int hash_algo; 238 | extern unsigned int user_item_count; 239 | extern int sort_direction; 240 | extern char tempname[]; 241 | extern const char *feature_flags[]; 242 | extern const char *s_no_dupes; 243 | extern int exit_status; 244 | 245 | int file_has_changed(file_t * const restrict file); 246 | 247 | #ifdef __cplusplus 248 | } 249 | #endif 250 | 251 | #endif /* JDUPES_H */ 252 | -------------------------------------------------------------------------------- /libjodycode_check.c: -------------------------------------------------------------------------------- 1 | /* libjodycode version checks 2 | * 3 | * Code to embed the libjodycode version info and check against the currently 4 | * linked libjodycode to check for and report incompatibilities 5 | * 6 | * Copyright (C) 2023 by Jody Bruchon 7 | * Licensed under The MIT License */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include "libjodycode_check.h" 13 | 14 | #ifdef JC_TEST 15 | #define JC_TEST_ONLY(a) a 16 | #else 17 | #define JC_TEST_ONLY(a) 18 | #endif 19 | 20 | const char *jc_build_version = LIBJODYCODE_VER; 21 | const int jc_build_api_version = LIBJODYCODE_API_VERSION; 22 | const int jc_build_api_featurelevel = LIBJODYCODE_API_FEATURE_LEVEL; 23 | const int jc_build_windows_unicode = LIBJODYCODE_WINDOWS_UNICODE; 24 | const int jc_build_min_featurelevel = MY_FEATURELEVEL_REQ; 25 | 26 | int libjodycode_version_check(int verbose, int bail) 27 | { 28 | JC_TEST_ONLY(if (verbose > 1) fprintf(stderr, "libjodycode version check test code\n\n");) 29 | JC_TEST_ONLY(if (verbose > 1) goto incompatible_version;) 30 | if (jc_build_api_version != jc_api_version) goto incompatible_version; 31 | if (jc_build_min_featurelevel > jc_api_featurelevel) goto incompatible_version; 32 | if (jc_build_windows_unicode != jc_windows_unicode) goto incompatible_version; 33 | return 0; 34 | 35 | incompatible_version: 36 | if (verbose) { 37 | fprintf(stderr, "\n==============================================================================\n"); 38 | fprintf(stderr, "internal error: libjodycode on this system is an incompatible version\n\n"); 39 | fprintf(stderr, "Currently using libjodycode v%s, API %d, feature level %d\n", 40 | jc_version, jc_api_version, jc_api_featurelevel); 41 | fprintf(stderr, " Built against libjodycode v%s, API %d, feature level %d\n\n", 42 | jc_build_version, jc_build_api_version, jc_build_api_featurelevel); 43 | if (jc_windows_unicode != jc_build_windows_unicode) 44 | fprintf(stderr, "libjodycode was built with%s Windows Unicode but %sUnicode is required.\n\n", 45 | jc_windows_unicode == 1 ? "" : "out", 46 | jc_build_windows_unicode == 1 ? "" : "non-"); 47 | if (jc_build_min_featurelevel > jc_build_api_featurelevel) 48 | fprintf(stderr, "libjodycode feature level >= %d is required but linked library is level %d\n\n", 49 | jc_build_min_featurelevel, jc_build_api_featurelevel); 50 | fprintf(stderr, "==============================================================================\n\n"); 51 | fprintf(stderr, "\nUpdate libjodycode on your system and try again. If you continue to get this\n"); 52 | fprintf(stderr, "error, contact the package or distribution maintainer. If all else fails, send\n"); 53 | fprintf(stderr, "an email to jody@jodybruchon.com for help (but only as a last resort, please.)\n\n"); 54 | } 55 | if (bail) exit(EXIT_FAILURE); 56 | return 1; 57 | } 58 | 59 | #ifdef JC_TEST 60 | int main(void) 61 | { 62 | libjodycode_version_check(2, 0); 63 | return 0; 64 | } 65 | #endif 66 | -------------------------------------------------------------------------------- /libjodycode_check.h: -------------------------------------------------------------------------------- 1 | /* libjodycode version check headear 2 | * See libjodycode_check.c for license information */ 3 | 4 | #ifndef LIBJODYCODE_CHECK_H 5 | #define LIBJODYCODE_CHECK_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | /* Set this to the minimum feature level required by your program */ 12 | #define MY_FEATURELEVEL_REQ 4 13 | 14 | extern const int jc_build_api_major; 15 | extern const int jc_build_api_minor; 16 | extern const char *jc_build_version; 17 | extern const char *jc_build_featurelevel; 18 | extern const unsigned char jc_build_api_versiontable[]; 19 | 20 | extern int libjodycode_version_check(int verbose, int bail); 21 | 22 | #ifdef __cplusplus 23 | } 24 | #endif 25 | 26 | #endif /* LIBJODYCODE_CHECK_H */ 27 | -------------------------------------------------------------------------------- /likely_unlikely.h: -------------------------------------------------------------------------------- 1 | /* likely()/unlikely() macros for branch optimization 2 | * By Jody Bruchon 3 | * Released to the public domain */ 4 | 5 | #ifndef LIKELY_UNLIKELY_H 6 | #define LIKELY_UNLIKELY_H 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | /* Un-define if already defined */ 13 | #if !defined NO_LIKELY_UNLIKELY && (defined __GNUC__ || defined __clang__) 14 | #ifdef likely 15 | #undef likely 16 | #endif 17 | #ifdef unlikely 18 | #undef unlikely 19 | #endif 20 | 21 | #define likely(a) __builtin_expect((a), 1) 22 | #define unlikely(a) __builtin_expect((a), 0) 23 | 24 | #else /* no GCC/Clang */ 25 | #define likely(a) a 26 | #define unlikely(a) a 27 | #endif 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif /* LIKELY_UNLIKELY_H */ 34 | -------------------------------------------------------------------------------- /linux-dedupe-static.h: -------------------------------------------------------------------------------- 1 | /* Bare header for Linux dedupe API */ 2 | #ifndef JDUPES_DEDUPESTATIC_H 3 | #define JDUPES_DEDUPESTATIC_H 4 | #include 5 | #include 6 | #define FILE_DEDUPE_RANGE_SAME 0 7 | #define FILE_DEDUPE_RANGE_DIFFERS 1 8 | struct file_dedupe_range_info { 9 | __s64 dest_fd; 10 | __u64 dest_offset; 11 | __u64 bytes_deduped; 12 | __s32 status; 13 | __u32 reserved; 14 | }; 15 | struct file_dedupe_range { 16 | __u64 src_offset; 17 | __u64 src_length; 18 | __u16 dest_count; 19 | __u16 reserved1; 20 | __u32 reserved2; 21 | struct file_dedupe_range_info info[0]; 22 | }; 23 | #define FIDEDUPERANGE _IOWR(0x94, 54, struct file_dedupe_range) 24 | #endif /* JDUPES_DEDUPESTATIC_H */ 25 | -------------------------------------------------------------------------------- /loaddir.c: -------------------------------------------------------------------------------- 1 | /* jdupes directory scanning code 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include "likely_unlikely.h" 14 | #include "jdupes.h" 15 | #include "checks.h" 16 | #include "filestat.h" 17 | #ifndef NO_HASHDB 18 | #include "hashdb.h" 19 | #endif 20 | #include "progress.h" 21 | #include "interrupt.h" 22 | #ifndef NO_TRAVCHECK 23 | #include "travcheck.h" 24 | #endif 25 | 26 | /* Detect Windows and modify as needed */ 27 | #if defined _WIN32 || defined __MINGW32__ 28 | const char dir_sep = '\\'; 29 | #else /* Not Windows */ 30 | const char dir_sep = '/'; 31 | #endif /* _WIN32 || __MINGW32__ */ 32 | 33 | static file_t *init_newfile(const size_t len, file_t * restrict * const restrict filelistp) 34 | { 35 | file_t * const restrict newfile = (file_t *)calloc(1, sizeof(file_t)); 36 | 37 | if (unlikely(!newfile)) jc_oom("init_newfile() file structure"); 38 | if (unlikely(!filelistp)) jc_nullptr("init_newfile() filelistp"); 39 | 40 | LOUD(fprintf(stderr, "init_newfile(len %" PRIuMAX ", filelistp %p)\n", (uintmax_t)len, filelistp)); 41 | 42 | newfile->d_name = (char *)malloc(EXTEND64(len)); 43 | if (!newfile->d_name) jc_oom("init_newfile() filename"); 44 | 45 | newfile->next = *filelistp; 46 | #ifndef NO_USER_ORDER 47 | newfile->user_order = user_item_count; 48 | #endif 49 | newfile->size = -1; 50 | newfile->duplicates = NULL; 51 | return newfile; 52 | } 53 | 54 | 55 | /* This is disabled until a check is in place to make it safe */ 56 | #if 0 57 | /* Add a single file to the file tree */ 58 | file_t *grokfile(const char * const restrict name, file_t * restrict * const restrict filelistp) 59 | { 60 | file_t * restrict newfile; 61 | 62 | if (!name || !filelistp) jc_nullptr("grokfile()"); 63 | LOUD(fprintf(stderr, "grokfile: '%s' %p\n", name, filelistp)); 64 | 65 | /* Allocate the file_t and the d_name entries */ 66 | newfile = init_newfile(strlen(name) + 2, filelistp); 67 | 68 | strcpy(newfile->d_name, name); 69 | 70 | /* Single-file [l]stat() and exclusion condition check */ 71 | if (check_singlefile(newfile) != 0) { 72 | LOUD(fprintf(stderr, "grokfile: check_singlefile rejected file\n")); 73 | free(newfile->d_name); 74 | free(newfile); 75 | return NULL; 76 | } 77 | return newfile; 78 | } 79 | #endif 80 | 81 | /* Load a directory's contents into the file tree, recursing as needed */ 82 | void loaddir(char * const restrict dir, 83 | file_t * restrict * const restrict filelistp, 84 | int recurse) 85 | { 86 | file_t * restrict newfile; 87 | JC_DIRENT *dirinfo; 88 | size_t dirlen, dirpos; 89 | int i; 90 | // single = 0; 91 | jdupes_ino_t inode; 92 | dev_t device, n_device; 93 | jdupes_mode_t mode; 94 | JC_DIR *cd; 95 | static int sf_warning = 0; /* single file warning should only appear once */ 96 | 97 | if (unlikely(dir == NULL || filelistp == NULL)) jc_nullptr("loaddir()"); 98 | LOUD(fprintf(stderr, "loaddir: scanning '%s' (order %d, recurse %d)\n", dir, user_item_count, recurse)); 99 | 100 | if (unlikely(interrupt != 0)) return; 101 | 102 | /* Convert forward slashes to backslashes if on Windows */ 103 | jc_slash_convert(dir); 104 | 105 | /* Get directory stats (or file stats if it's a file) */ 106 | i = getdirstats(dir, &inode, &device, &mode); 107 | if (unlikely(i < 0)) goto error_stat_dir; 108 | 109 | /* if dir is actually a file, just add it to the file tree */ 110 | if (i == 1) { 111 | /* Single file addition is disabled for now because there is no safeguard 112 | * against the file being compared against itself if it's added in both a 113 | * recursion and explicitly on the command line. */ 114 | #if 0 115 | LOUD(fprintf(stderr, "loaddir -> grokfile '%s'\n", dir)); 116 | newfile = grokfile(dir, filelistp); 117 | if (newfile == NULL) { 118 | LOUD(fprintf(stderr, "grokfile rejected '%s'\n", dir)); 119 | return; 120 | } 121 | single = 1; 122 | goto add_single_file; 123 | #endif 124 | if (sf_warning == 0) { 125 | fprintf(stderr, "\nFile specs on command line disabled in this version for safety\n"); 126 | fprintf(stderr, "This should be restored (and safe) in a future release\n"); 127 | fprintf(stderr, "More info at jdupes.com or email jody@jodybruchon.com\n"); 128 | sf_warning = 1; 129 | } 130 | return; /* Remove when single file is restored */ 131 | } 132 | 133 | /* Double traversal prevention tree */ 134 | #ifndef NO_TRAVCHECK 135 | if (likely(!ISFLAG(flags, F_NOTRAVCHECK))) { 136 | i = traverse_check(device, inode); 137 | if (unlikely(i == 1)) return; 138 | if (unlikely(i == 2)) goto error_stat_dir; 139 | } 140 | #endif /* NO_TRAVCHECK */ 141 | 142 | item_progress++; 143 | 144 | cd = jc_opendir(dir); 145 | if (unlikely(!cd)) goto error_cd; 146 | dirlen = strlen(dir); 147 | 148 | while ((dirinfo = jc_readdir(cd)) != NULL) { 149 | char * restrict tp = tempname; 150 | size_t d_name_len; 151 | 152 | if (unlikely(interrupt != 0)) return; 153 | LOUD(fprintf(stderr, "loaddir: readdir: '%s'\n", dirinfo->d_name)); 154 | if (unlikely(!jc_streq(dirinfo->d_name, ".") || !jc_streq(dirinfo->d_name, ".."))) continue; 155 | check_sigusr1(); 156 | if (jc_alarm_ring != 0) { 157 | jc_alarm_ring = 0; 158 | update_phase1_progress("dirs"); 159 | } 160 | 161 | /* Assemble the file's full path name, optimized to avoid strcat() */ 162 | dirpos = dirlen; 163 | d_name_len = strlen(dirinfo->d_name); 164 | memcpy(tp, dir, dirpos + 1); 165 | if (dirpos != 0 && tp[dirpos - 1] != dir_sep) { 166 | tp[dirpos] = dir_sep; 167 | dirpos++; 168 | } 169 | if (unlikely(dirpos + d_name_len + 1 >= (PATHBUF_SIZE * 2))) goto error_overflow; 170 | tp += dirpos; 171 | memcpy(tp, dirinfo->d_name, d_name_len); 172 | tp += d_name_len; 173 | *tp = '\0'; 174 | d_name_len++; 175 | 176 | /* Allocate the file_t and the d_name entries */ 177 | newfile = init_newfile(dirpos + d_name_len + 2, filelistp); 178 | 179 | tp = tempname; 180 | memcpy(newfile->d_name, tp, dirpos + d_name_len); 181 | 182 | /*** WARNING: tempname global gets reused by check_singlefile here! ***/ 183 | 184 | /* Single-file [l]stat() and exclusion condition check */ 185 | if (check_singlefile(newfile) != 0) { 186 | LOUD(fprintf(stderr, "loaddir: check_singlefile rejected file\n")); 187 | free(newfile->d_name); 188 | free(newfile); 189 | continue; 190 | } 191 | 192 | /* Optionally recurse directories, including symlinked ones if requested */ 193 | if (JC_S_ISDIR(newfile->mode)) { 194 | if (recurse) { 195 | /* --one-file-system - WARNING: this clobbers inode/mode */ 196 | if (ISFLAG(flags, F_ONEFS) 197 | && (getdirstats(newfile->d_name, &inode, &n_device, &mode) == 0) 198 | && (device != n_device)) { 199 | LOUD(fprintf(stderr, "loaddir: directory: not recursing (--one-file-system)\n")); 200 | free(newfile->d_name); 201 | free(newfile); 202 | continue; 203 | } 204 | #ifndef NO_SYMLINKS 205 | else if (ISFLAG(flags, F_FOLLOWLINKS) || !ISFLAG(newfile->flags, FF_IS_SYMLINK)) { 206 | LOUD(fprintf(stderr, "loaddir: directory(symlink): recursing (-r/-R)\n")); 207 | loaddir(newfile->d_name, filelistp, recurse); 208 | } 209 | #else 210 | else { 211 | LOUD(fprintf(stderr, "loaddir: directory: recursing (-r/-R)\n")); 212 | loaddir(newfile->d_name, filelistp, recurse); 213 | } 214 | #endif /* NO_SYMLINKS */ 215 | } else { LOUD(fprintf(stderr, "loaddir: directory: not recursing\n")); } 216 | free(newfile->d_name); 217 | free(newfile); 218 | if (unlikely(interrupt != 0)) return; 219 | continue; 220 | } else { 221 | //add_single_file: 222 | /* Add regular files to list, including symlink targets if requested */ 223 | #ifndef NO_SYMLINKS 224 | if (!ISFLAG(newfile->flags, FF_IS_SYMLINK) || (ISFLAG(newfile->flags, FF_IS_SYMLINK) && ISFLAG(flags, F_FOLLOWLINKS))) { 225 | #else 226 | if (JC_S_ISREG(newfile->mode)) { 227 | #endif 228 | #ifndef NO_HASHDB 229 | if (ISFLAG(flags, F_HASHDB)) read_hashdb_entry(newfile); 230 | #endif 231 | *filelistp = newfile; 232 | filecount++; 233 | progress++; 234 | 235 | } else { 236 | LOUD(fprintf(stderr, "loaddir: not a regular file: %s\n", newfile->d_name);) 237 | free(newfile->d_name); 238 | free(newfile); 239 | // if (single == 1) return; 240 | continue; 241 | } 242 | } 243 | /* Skip directory stuff if adding only a single file */ 244 | // if (single == 1) return; 245 | } 246 | 247 | jc_closedir(cd); 248 | 249 | return; 250 | 251 | error_stat_dir: 252 | fprintf(stderr, "\ncould not stat dir "); jc_fwprint(stderr, dir, 1); 253 | exit_status = EXIT_FAILURE; 254 | return; 255 | error_cd: 256 | fprintf(stderr, "\ncould not chdir to "); jc_fwprint(stderr, dir, 1); 257 | exit_status = EXIT_FAILURE; 258 | return; 259 | error_overflow: 260 | fprintf(stderr, "\nerror: a path overflowed (longer than PATHBUF_SIZE) cannot continue\n"); 261 | exit(EXIT_FAILURE); 262 | } 263 | -------------------------------------------------------------------------------- /loaddir.h: -------------------------------------------------------------------------------- 1 | /* jdupes directory scanning code 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef JDUPES_LOADDIR_H 5 | #define JDUPES_LOADDIR_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | //file_t *grokfile(const char * const restrict name, file_t * restrict * const restrict filelistp); 12 | void loaddir(char * const restrict dir, file_t * restrict * const restrict filelistp, int recurse); 13 | 14 | #ifdef __cplusplus 15 | } 16 | #endif 17 | 18 | #endif /* JDUPES_LOADDIR_H */ 19 | -------------------------------------------------------------------------------- /match.h: -------------------------------------------------------------------------------- 1 | /* jdupes file matching functions 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef JDUPES_MATCH_H 5 | #define JDUPES_MATCH_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include 12 | #include "jdupes.h" 13 | 14 | /* registerfile() direction options */ 15 | enum tree_direction { NONE, LEFT, RIGHT }; 16 | 17 | void registerpair(file_t **matchlist, file_t *newmatch, int (*comparef)(file_t *f1, file_t *f2)); 18 | void registerfile(filetree_t * restrict * const restrict nodeptr, const enum tree_direction d, file_t * const restrict file); 19 | file_t **checkmatch(filetree_t * restrict tree, file_t * const restrict file); 20 | int confirmmatch(const char * const restrict file1, const char * const restrict file2, const off_t size); 21 | 22 | #ifdef __cplusplus 23 | } 24 | #endif 25 | 26 | #endif /* JDUPES_MATCH_H */ 27 | -------------------------------------------------------------------------------- /progress.c: -------------------------------------------------------------------------------- 1 | /* jdupes progress indicator 2 | see jdupes.c for licensing information */ 3 | 4 | #include 5 | #include 6 | #include "jdupes.h" 7 | #include "likely_unlikely.h" 8 | 9 | 10 | void update_phase1_progress(const char * const restrict type) 11 | { 12 | fprintf(stderr, "\rScanning: %" PRIuMAX " files, %" PRIuMAX " %s (in %u specified)", 13 | progress, item_progress, type, user_item_count); 14 | // fflush(stderr); 15 | } 16 | 17 | /* Update progress indicator if requested */ 18 | void update_phase2_progress(const char * const restrict msg, const int file_percent) 19 | { 20 | static int did_fpct = 0; 21 | 22 | fprintf(stderr, "\rProgress [%" PRIuMAX "/%" PRIuMAX ", %" PRIuMAX " pairs matched] %" PRIuMAX "%%", 23 | progress, filecount, dupecount, (progress * 100) / filecount); 24 | if (file_percent > -1 && msg != NULL) { 25 | fprintf(stderr, " (%s: %d%%) ", msg, file_percent); 26 | did_fpct = 1; 27 | } else if (did_fpct != 0) { 28 | fprintf(stderr, " "); 29 | did_fpct = 0; 30 | } 31 | // fflush(stderr); 32 | return; 33 | } 34 | -------------------------------------------------------------------------------- /progress.h: -------------------------------------------------------------------------------- 1 | /* jdupes argument functions 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef JDUPES_PROGRESS_H 5 | #define JDUPES_PROGRESS_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | void update_phase1_progress(const char * const restrict type); 12 | void update_phase2_progress(const char * const restrict msg, const int file_percent); 13 | 14 | #ifdef __cplusplus 15 | } 16 | #endif 17 | 18 | #endif /* JDUPES_PROGRESS_H */ 19 | -------------------------------------------------------------------------------- /remove_hashdb_dead_entries.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | [[ -z "$1" || ! -e "$1" ]] && echo "Specify a hash database to clean" >&2 && exit 1 4 | 5 | HASHDB="$1" 6 | TEMPDB="_jdupes_hashdb_clean.tmp" 7 | ERR=0; CNT=0 8 | LINELEN=87 9 | 10 | [ "$HASHDB" = "." ] && HASHDB="jdupes_hashdb.txt" 11 | 12 | clean_exit () { 13 | echo "Terminated, cleaning up." >&2 14 | rm -f "$TEMPDB" 15 | exit 1 16 | } 17 | 18 | trap clean_exit INT TERM HUP ABRT QUIT 19 | 20 | if ! grep -q -m 1 '^jdupes hashdb:2,' "$HASHDB" 21 | then echo "Must be a version 2 database, exiting" >&2 22 | exit 1 23 | fi 24 | 25 | SRCLINES="$(wc -l "$HASHDB" | cut -d' ' -f1)" 26 | SRCLINES="$((SRCLINES - 1))" 27 | 28 | echo "Cleaning out hash database $HASHDB [$SRCLINES entries]" >&2 29 | 30 | head -n 1 "$HASHDB" > "$TEMPDB" || ERR=1 31 | 32 | echo "Sorting items (this may take a little time)..." >&2 33 | 34 | while read LINE 35 | do 36 | NAME="${LINE:$LINELEN}" 37 | [ ! -e "$NAME" ] && echo "$LINE" >&2 && continue 38 | echo "$LINE" >> "$TEMPDB" || ERR=1 39 | CNT=$((CNT + 1)) 40 | echo -n "Processed $CNT/$SRCLINES lines ($((CNT * 100 / SRCLINES))%)"$'\r' 41 | done < <(grep -v '^jdupes hashdb:' "$HASHDB" | sort -k7 -t,) 42 | 43 | if [ $ERR -eq 1 ] 44 | then echo "Error writing out lines, not overwriting hash database" >&2 45 | rm -f "$TEMPDB" 46 | exit 1 47 | 48 | else 49 | mv -f "$TEMPDB" "$HASHDB" 50 | echo "Wrote $CNT entries; cleaned out $((SRCLINES - CNT)) entries" >&2 51 | fi 52 | -------------------------------------------------------------------------------- /sort.c: -------------------------------------------------------------------------------- 1 | /* File order sorting functions 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include "likely_unlikely.h" 9 | #include "jdupes.h" 10 | 11 | 12 | #ifndef NO_USER_ORDER 13 | static int sort_pairs_by_param_order(file_t *f1, file_t *f2) 14 | { 15 | if (!ISFLAG(flags, F_USEPARAMORDER)) return 0; 16 | if (unlikely(f1 == NULL || f2 == NULL)) jc_nullptr("sort_pairs_by_param_order()"); 17 | if (f1->user_order < f2->user_order) return -sort_direction; 18 | if (f1->user_order > f2->user_order) return sort_direction; 19 | return 0; 20 | } 21 | #endif 22 | 23 | 24 | #ifndef NO_MTIME 25 | int sort_pairs_by_mtime(file_t *f1, file_t *f2) 26 | { 27 | if (unlikely(f1 == NULL || f2 == NULL)) jc_nullptr("sort_pairs_by_mtime()"); 28 | 29 | #ifndef NO_USER_ORDER 30 | int po = sort_pairs_by_param_order(f1, f2); 31 | if (po != 0) return po; 32 | #endif /* NO_USER_ORDER */ 33 | 34 | if (f1->mtime < f2->mtime) return -sort_direction; 35 | else if (f1->mtime > f2->mtime) return sort_direction; 36 | 37 | #ifndef NO_NUMSORT 38 | /* If the mtimes match, use the names to break the tie */ 39 | return jc_numeric_strcmp(f1->d_name, f2->d_name) > 0 ? -sort_direction : -sort_direction; 40 | #else 41 | return strcmp(f1->d_name, f2->d_name) > 0 ? sort_direction : -sort_direction; 42 | #endif /* NO_NUMSORT */ 43 | } 44 | #endif 45 | 46 | 47 | int sort_pairs_by_filename(file_t *f1, file_t *f2) 48 | { 49 | if (unlikely(f1 == NULL || f2 == NULL)) jc_nullptr("sort_pairs_by_filename()"); 50 | 51 | #ifndef NO_USER_ORDER 52 | int po = sort_pairs_by_param_order(f1, f2); 53 | if (po != 0) return po; 54 | #endif /* NO_USER_ORDER */ 55 | 56 | #ifndef NO_NUMSORT 57 | return jc_numeric_strcmp(f1->d_name, f2->d_name) > 0 ? sort_direction : -sort_direction; 58 | #else 59 | return strcmp(f1->d_name, f2->d_name) > 0 ? sort_direction : -sort_direction; 60 | #endif /* NO_NUMSORT */ 61 | } 62 | -------------------------------------------------------------------------------- /sort.h: -------------------------------------------------------------------------------- 1 | /* File order sorting functions 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef JDUPES_SORT_H 5 | #define JDUPES_SORT_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include "jdupes.h" 12 | 13 | #ifndef NO_MTIME 14 | int sort_pairs_by_mtime(file_t *f1, file_t *f2); 15 | #endif 16 | int sort_pairs_by_filename(file_t *f1, file_t *f2); 17 | 18 | #ifdef __cplusplus 19 | } 20 | #endif 21 | 22 | #endif /* JDUPES_SORT_H */ 23 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # This is a dummy test script meant for automated builds to succeed. 4 | echo "OK" 5 | -------------------------------------------------------------------------------- /testdir/.hidden_dir/hiddendir_two: -------------------------------------------------------------------------------- 1 | two 2 | -------------------------------------------------------------------------------- /testdir/.hidden_two: -------------------------------------------------------------------------------- 1 | two 2 | -------------------------------------------------------------------------------- /testdir/Stilltinydupe1: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /testdir/Tinydupe3: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /testdir/Zero_C: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/jdupes/a8b0812a7c54d21f910a15d8a90a9b5001a48e2c/testdir/Zero_C -------------------------------------------------------------------------------- /testdir/atinydupe0: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /testdir/block_size_tests/4095b_file1: -------------------------------------------------------------------------------- 1 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 2 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 3 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 4 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 5 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 6 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 7 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 8 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 9 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 10 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 11 | This file is a larger file than the other testdir files. Its purpose is to trigger code that wo -------------------------------------------------------------------------------- /testdir/block_size_tests/4095b_file2: -------------------------------------------------------------------------------- 1 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 2 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 3 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 4 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 5 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 6 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 7 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 8 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 9 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 10 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 11 | This file is a larger file than the other testdir files. Its purpose is to trigger code that wo -------------------------------------------------------------------------------- /testdir/block_size_tests/4096b_file1: -------------------------------------------------------------------------------- 1 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 2 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 3 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 4 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 5 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 6 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 7 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 8 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 9 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 10 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 11 | This file is a larger file than the other testdir files. Its purpose is to trigger code that wor -------------------------------------------------------------------------------- /testdir/block_size_tests/4096b_file2: -------------------------------------------------------------------------------- 1 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 2 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 3 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 4 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 5 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 6 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 7 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 8 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 9 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 10 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 11 | This file is a larger file than the other testdir files. Its purpose is to trigger code that wor -------------------------------------------------------------------------------- /testdir/block_size_tests/4097b_file1: -------------------------------------------------------------------------------- 1 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 2 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 3 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 4 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 5 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 6 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 7 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 8 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 9 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 10 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 11 | This file is a larger file than the other testdir files. Its purpose is to trigger code that work -------------------------------------------------------------------------------- /testdir/block_size_tests/4097b_file2: -------------------------------------------------------------------------------- 1 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 2 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 3 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 4 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 5 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 6 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 7 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 8 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 9 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 10 | This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! 11 | This file is a larger file than the other testdir files. Its purpose is to trigger code that work -------------------------------------------------------------------------------- /testdir/extensions/fake_doc_001.doc: -------------------------------------------------------------------------------- 1 | fake mp3 file 2 | -------------------------------------------------------------------------------- /testdir/extensions/fake_doc_002.doc: -------------------------------------------------------------------------------- 1 | fake mp3 file 2 | -------------------------------------------------------------------------------- /testdir/extensions/fake_mp3_001.mp3: -------------------------------------------------------------------------------- 1 | fake mp3 file 2 | -------------------------------------------------------------------------------- /testdir/extensions/fake_mp3_002.mp3: -------------------------------------------------------------------------------- 1 | fake mp3 file 2 | -------------------------------------------------------------------------------- /testdir/extensions/fake_mp4_001.mp4: -------------------------------------------------------------------------------- 1 | fake mp3 file 2 | -------------------------------------------------------------------------------- /testdir/extensions/fake_mp4_002.mp4: -------------------------------------------------------------------------------- 1 | fake mp3 file 2 | -------------------------------------------------------------------------------- /testdir/isolate/1/1.txt: -------------------------------------------------------------------------------- 1 | isolate 2 | -------------------------------------------------------------------------------- /testdir/isolate/1/2.txt: -------------------------------------------------------------------------------- 1 | isolate 2 | -------------------------------------------------------------------------------- /testdir/isolate/2/3.txt: -------------------------------------------------------------------------------- 1 | isolate 2 | -------------------------------------------------------------------------------- /testdir/isolate/2/4.txt: -------------------------------------------------------------------------------- 1 | isolate 2 | -------------------------------------------------------------------------------- /testdir/isolate/3/5.txt: -------------------------------------------------------------------------------- 1 | isolate 2 | -------------------------------------------------------------------------------- /testdir/isolate/3/6.txt: -------------------------------------------------------------------------------- 1 | isolate 2 | -------------------------------------------------------------------------------- /testdir/isolate/3/7.txt: -------------------------------------------------------------------------------- 1 | isolate 2 | -------------------------------------------------------------------------------- /testdir/isolate/4/8.txt: -------------------------------------------------------------------------------- 1 | isolate 2 | -------------------------------------------------------------------------------- /testdir/nine_upsidedown: -------------------------------------------------------------------------------- 1 | six 2 | -------------------------------------------------------------------------------- /testdir/notsotinydupe1: -------------------------------------------------------------------------------- 1 | This is not quite such a small duplicate as the other duplicates. 2 | -------------------------------------------------------------------------------- /testdir/notsotinydupe2: -------------------------------------------------------------------------------- 1 | This is not quite such a small duplicate as the other duplicates. 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_copysuffixes/file1-0 (1).jpg: -------------------------------------------------------------------------------- 1 | bar 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_copysuffixes/file1-0#1.jpg: -------------------------------------------------------------------------------- 1 | bar 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_copysuffixes/file1-0.jpg: -------------------------------------------------------------------------------- 1 | bar 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_copysuffixes/file1-1 (Copy) (2) (2).jpg: -------------------------------------------------------------------------------- 1 | bar 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_copysuffixes/file1-1 (Copy) (2).jpg: -------------------------------------------------------------------------------- 1 | bar 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_copysuffixes/file1-1 (Copy).jpg: -------------------------------------------------------------------------------- 1 | bar 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_copysuffixes/file1-1.jpg: -------------------------------------------------------------------------------- 1 | bar 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_copysuffixes/file1-10.jpg: -------------------------------------------------------------------------------- 1 | bar 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_copysuffixes/file1-2.jpg: -------------------------------------------------------------------------------- 1 | bar 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_digitsafter/file001: -------------------------------------------------------------------------------- 1 | foo 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_digitsafter/file001a: -------------------------------------------------------------------------------- 1 | foo 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_digitsafter/file002: -------------------------------------------------------------------------------- 1 | foo 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_digitsafter/file020: -------------------------------------------------------------------------------- 1 | foo 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_digitsafter/file021: -------------------------------------------------------------------------------- 1 | foo 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_digitsafter/file030: -------------------------------------------------------------------------------- 1 | foo 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_digitsafter/file1: -------------------------------------------------------------------------------- 1 | foo 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_digitsafter/file10: -------------------------------------------------------------------------------- 1 | foo 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_digitsafter/file100: -------------------------------------------------------------------------------- 1 | foo 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_digitsafter/file10a: -------------------------------------------------------------------------------- 1 | foo 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_digitsafter/file1a2: -------------------------------------------------------------------------------- 1 | foo 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_digitsafter/file2: -------------------------------------------------------------------------------- 1 | foo 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_digitsafter/file3: -------------------------------------------------------------------------------- 1 | foo 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_startwithzero/00file4: -------------------------------------------------------------------------------- 1 | pair4 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_startwithzero/00file5: -------------------------------------------------------------------------------- 1 | pair5 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_startwithzero/00file5a: -------------------------------------------------------------------------------- 1 | pair5 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_startwithzero/01file4: -------------------------------------------------------------------------------- 1 | pair4 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_startwithzero/0file1: -------------------------------------------------------------------------------- 1 | pair1 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_startwithzero/0file2: -------------------------------------------------------------------------------- 1 | pair1 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_startwithzero/0file3: -------------------------------------------------------------------------------- 1 | pair3 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_startwithzero/1file1: -------------------------------------------------------------------------------- 1 | pair2 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_startwithzero/1file2: -------------------------------------------------------------------------------- 1 | pair2 2 | -------------------------------------------------------------------------------- /testdir/numeric_sort_startwithzero/1file3: -------------------------------------------------------------------------------- 1 | pair3 2 | -------------------------------------------------------------------------------- /testdir/recursed_a/five: -------------------------------------------------------------------------------- 1 | five 2 | -------------------------------------------------------------------------------- /testdir/recursed_a/five_2: -------------------------------------------------------------------------------- 1 | five 2 | -------------------------------------------------------------------------------- /testdir/recursed_a/one: -------------------------------------------------------------------------------- 1 | one 2 | -------------------------------------------------------------------------------- /testdir/recursed_a/one_2: -------------------------------------------------------------------------------- 1 | one 2 | -------------------------------------------------------------------------------- /testdir/recursed_a/symlink_infinite_loop: -------------------------------------------------------------------------------- 1 | ../recursed_a -------------------------------------------------------------------------------- /testdir/recursed_a/two: -------------------------------------------------------------------------------- 1 | two 2 | -------------------------------------------------------------------------------- /testdir/recursed_a/two_2: -------------------------------------------------------------------------------- 1 | two 2 | -------------------------------------------------------------------------------- /testdir/recursed_b/four: -------------------------------------------------------------------------------- 1 | four 2 | -------------------------------------------------------------------------------- /testdir/recursed_b/one: -------------------------------------------------------------------------------- 1 | one 2 | -------------------------------------------------------------------------------- /testdir/recursed_b/three: -------------------------------------------------------------------------------- 1 | three 2 | -------------------------------------------------------------------------------- /testdir/recursed_b/two_plus_one: -------------------------------------------------------------------------------- 1 | three 2 | -------------------------------------------------------------------------------- /testdir/recursed_c/five: -------------------------------------------------------------------------------- 1 | five 2 | -------------------------------------------------------------------------------- /testdir/recursed_c/level2/five: -------------------------------------------------------------------------------- 1 | five 2 | -------------------------------------------------------------------------------- /testdir/recursed_c/level2/one: -------------------------------------------------------------------------------- 1 | one 2 | -------------------------------------------------------------------------------- /testdir/recursed_c/level2/two: -------------------------------------------------------------------------------- 1 | two 2 | -------------------------------------------------------------------------------- /testdir/recursed_c/one: -------------------------------------------------------------------------------- 1 | one 2 | -------------------------------------------------------------------------------- /testdir/recursed_c/two: -------------------------------------------------------------------------------- 1 | two 2 | -------------------------------------------------------------------------------- /testdir/symlink_dir: -------------------------------------------------------------------------------- 1 | recursed_a -------------------------------------------------------------------------------- /testdir/symlink_test/regular_file: -------------------------------------------------------------------------------- 1 | symlink test file 2 | -------------------------------------------------------------------------------- /testdir/symlink_test/symlinked_file: -------------------------------------------------------------------------------- 1 | regular_file -------------------------------------------------------------------------------- /testdir/symlink_twice_one: -------------------------------------------------------------------------------- 1 | two -------------------------------------------------------------------------------- /testdir/symlink_two: -------------------------------------------------------------------------------- 1 | two -------------------------------------------------------------------------------- /testdir/tinydupe2: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /testdir/tinydupe4: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /testdir/twice_one: -------------------------------------------------------------------------------- 1 | two 2 | -------------------------------------------------------------------------------- /testdir/two: -------------------------------------------------------------------------------- 1 | two 2 | -------------------------------------------------------------------------------- /testdir/unicode_dirnames/Ελληνιά/Unicode testfile.txt: -------------------------------------------------------------------------------- 1 | до свидания -------------------------------------------------------------------------------- /testdir/unicode_dirnames/до свидания/Unicode testfile.txt: -------------------------------------------------------------------------------- 1 | до свидания -------------------------------------------------------------------------------- /testdir/unicode_dirnames/दसविदानिया/Unicode testfile.txt: -------------------------------------------------------------------------------- 1 | до свидания -------------------------------------------------------------------------------- /testdir/unicode_dirnames/怖い/Unicode testfile.txt: -------------------------------------------------------------------------------- 1 | до свидания -------------------------------------------------------------------------------- /testdir/unicode_dirnames/행운을 빈다/Unicode testfile.txt: -------------------------------------------------------------------------------- 1 | до свидания -------------------------------------------------------------------------------- /testdir/unicode_filenames/cassé: -------------------------------------------------------------------------------- 1 | oh hi, this file has a Japanese name for testing this program against! -------------------------------------------------------------------------------- /testdir/unicode_filenames/Ελληνιά: -------------------------------------------------------------------------------- 1 | oh hi, this file has a Japanese name for testing this program against! -------------------------------------------------------------------------------- /testdir/unicode_filenames/до свидания: -------------------------------------------------------------------------------- 1 | oh hi, this file has a Japanese name for testing this program against! -------------------------------------------------------------------------------- /testdir/unicode_filenames/दसविदानिया: -------------------------------------------------------------------------------- 1 | oh hi, this file has a Japanese name for testing this program against! -------------------------------------------------------------------------------- /testdir/unicode_filenames/怖い: -------------------------------------------------------------------------------- 1 | oh hi, this file has a Japanese name for testing this program against! -------------------------------------------------------------------------------- /testdir/unicode_filenames/행운을 빈다: -------------------------------------------------------------------------------- 1 | oh hi, this file has a Japanese name for testing this program against! -------------------------------------------------------------------------------- /testdir/with spaces a: -------------------------------------------------------------------------------- 1 | with spaces 2 | -------------------------------------------------------------------------------- /testdir/with spaces b: -------------------------------------------------------------------------------- 1 | with spaces 2 | -------------------------------------------------------------------------------- /testdir/zero_a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/jdupes/a8b0812a7c54d21f910a15d8a90a9b5001a48e2c/testdir/zero_a -------------------------------------------------------------------------------- /testdir/zero_b: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/jdupes/a8b0812a7c54d21f910a15d8a90a9b5001a48e2c/testdir/zero_b -------------------------------------------------------------------------------- /travcheck.c: -------------------------------------------------------------------------------- 1 | /* jdupes double-traversal prevention tree 2 | * See jdupes.c for license information */ 3 | 4 | #ifndef NO_TRAVCHECK 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include "jdupes.h" 11 | #include "travcheck.h" 12 | 13 | /* Simple traversal balancing hash - scrambles inode number */ 14 | #define TRAVHASH(device,inode) (((inode << 55 | (inode >> 9)) + (device << 13))) 15 | 16 | static struct travcheck *travcheck_head = NULL; 17 | 18 | /* Create a new traversal check object and initialize its values */ 19 | static struct travcheck *travcheck_alloc(const dev_t device, const jdupes_ino_t inode, uintmax_t hash) 20 | { 21 | struct travcheck *trav; 22 | 23 | LOUD(fprintf(stderr, "travcheck_alloc(dev %" PRIdMAX ", ino %" PRIdMAX ", hash %" PRIuMAX ")\n", (intmax_t)device, (intmax_t)inode, hash);) 24 | 25 | trav = (struct travcheck *)malloc(sizeof(struct travcheck)); 26 | if (trav == NULL) { 27 | LOUD(fprintf(stderr, "travcheck_alloc: malloc failed\n");) 28 | return NULL; 29 | } 30 | trav->left = NULL; 31 | trav->right = NULL; 32 | trav->hash = hash; 33 | trav->device = device; 34 | trav->inode = inode; 35 | LOUD(fprintf(stderr, "travcheck_alloc returned %p\n", (void *)trav);) 36 | return trav; 37 | } 38 | 39 | 40 | /* De-allocate the travcheck tree */ 41 | void travcheck_free(struct travcheck *cur) 42 | { 43 | LOUD(fprintf(stderr, "travcheck_free(%p)\n", cur);) 44 | 45 | if (cur == NULL) { 46 | if (travcheck_head == NULL) return; 47 | cur = travcheck_head; 48 | travcheck_head = NULL; 49 | } 50 | if (cur->left == cur) goto error_travcheck_ptr; 51 | if (cur->right == cur) goto error_travcheck_ptr; 52 | if (cur->left != NULL) travcheck_free(cur->left); 53 | if (cur->right != NULL) travcheck_free(cur->right); 54 | if (cur != NULL) free(cur); 55 | return; 56 | error_travcheck_ptr: 57 | fprintf(stderr, "internal error: invalid pointer in travcheck_free(), report this\n"); 58 | exit(EXIT_FAILURE); 59 | } 60 | 61 | 62 | /* Check to see if device:inode pair has already been traversed */ 63 | int traverse_check(const dev_t device, const jdupes_ino_t inode) 64 | { 65 | struct travcheck *traverse = travcheck_head; 66 | uintmax_t travhash; 67 | 68 | LOUD(fprintf(stderr, "traverse_check(dev %" PRIuMAX ", ino %" PRIuMAX "\n", (uintmax_t)device, (uintmax_t)inode);) 69 | travhash = TRAVHASH(device, inode); 70 | if (travcheck_head == NULL) { 71 | travcheck_head = travcheck_alloc(device, inode, TRAVHASH(device, inode)); 72 | if (travcheck_head == NULL) return 2; 73 | } else { 74 | traverse = travcheck_head; 75 | while (1) { 76 | if (traverse == NULL) jc_nullptr("traverse_check()"); 77 | /* Don't re-traverse directories we've already seen */ 78 | if (inode == traverse->inode && device == traverse->device) { 79 | LOUD(fprintf(stderr, "traverse_check: already seen: %" PRIuMAX ":%" PRIuMAX "\n", (uintmax_t)device, (uintmax_t)inode);) 80 | return 1; 81 | } else { 82 | if (travhash > traverse->hash) { 83 | /* Traverse right */ 84 | if (traverse->right == NULL) { 85 | LOUD(fprintf(stderr, "traverse_check add right: %" PRIuMAX ", %" PRIuMAX"\n", (uintmax_t)device, (uintmax_t)inode);) 86 | traverse->right = travcheck_alloc(device, inode, travhash); 87 | if (traverse->right == NULL) return 2; 88 | break; 89 | } 90 | traverse = traverse->right; 91 | continue; 92 | } else { 93 | /* Traverse left */ 94 | if (traverse->left == NULL) { 95 | LOUD(fprintf(stderr, "traverse_check add left: %" PRIuMAX ", %" PRIuMAX "\n", (uintmax_t)device, (uintmax_t)inode);) 96 | traverse->left = travcheck_alloc(device, inode, travhash); 97 | if (traverse->left == NULL) return 2; 98 | break; 99 | } 100 | traverse = traverse->left; 101 | continue; 102 | } 103 | } 104 | } 105 | } 106 | return 0; 107 | } 108 | #endif /* NO_TRAVCHECK */ 109 | -------------------------------------------------------------------------------- /travcheck.h: -------------------------------------------------------------------------------- 1 | /* jdupes double-traversal prevention tree 2 | * See jdupes.c for license information */ 3 | 4 | #ifndef JDUPES_TRAVCHECK_H 5 | #define JDUPES_TRAVCHECK_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #ifndef NO_TRAVCHECK 12 | 13 | /* Tree to track each directory traversed */ 14 | struct travcheck { 15 | struct travcheck *left; 16 | struct travcheck *right; 17 | uintmax_t hash; 18 | jdupes_ino_t inode; 19 | dev_t device; 20 | }; 21 | 22 | /* De-allocate the travcheck tree */ 23 | void travcheck_free(struct travcheck *cur); 24 | int traverse_check(const dev_t device, const jdupes_ino_t inode); 25 | 26 | #endif /* NO_TRAVCHECK */ 27 | 28 | #ifdef __cplusplus 29 | } 30 | #endif 31 | 32 | #endif /* JDUPES_TRAVCHECK_H */ 33 | -------------------------------------------------------------------------------- /tune_winres.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | WINRES="winres.rc" 4 | WINRES_XP="winres_xp.rc" 5 | WINRES_MAN="winres.manifest.xml" 6 | 7 | # Get version number components 8 | VER="$(grep '^#define VER "' version.h | cut -d\" -f2)" 9 | V1="$(echo "$VER" | cut -d. -f1)"; test -z "$V1" && V1=0 10 | V2="$(echo "$VER" | cut -d. -f2)"; test -z "$V2" && V2=0 11 | V3="$(echo "$VER" | cut -d. -f3)"; test -z "$V3" && V3=0 12 | V4="$(echo "$VER" | cut -d. -f4)"; test -z "$V4" && V4=0 13 | # Build VS_VERSION_INFO product version string with commas 14 | PRODVER="$V1,$V2,$V3,$V4" 15 | # Extend version to include four discrete numbers 16 | XVER="$V1.$V2.$V3.$V4" 17 | echo "$VER = $PRODVER ($XVER)" 18 | 19 | # Actually change the manifest version information 20 | sed -i 's/\([A-Z]*\)VERSION [0-9],.*/\1VERSION '"$PRODVER/"';s/"\([A-Za-z]*\)Version", "[0-9],.*"/"\1Version", '"\"$PRODVER\"/" "$WINRES" 21 | sed -i 's/\([A-Z]*\)VERSION [0-9],.*/\1VERSION '"$PRODVER/"';s/"\([A-Za-z]*\)Version", "[0-9],.*"/"\1Version", '"\"$PRODVER\"/" "$WINRES_XP" 22 | sed -i 's/assemblyIdentity type="win32" name="jdupes" version="[^"]*/assemblyIdentity type="win32" name="jdupes" version="'$XVER/ "$WINRES_MAN" 23 | -------------------------------------------------------------------------------- /version.h: -------------------------------------------------------------------------------- 1 | /* VERSION determines the program's version number 2 | * This file is part of jdupes; see jdupes.c for license information */ 3 | 4 | #ifndef JDUPES_VERSION_H 5 | #define JDUPES_VERSION_H 6 | 7 | #define VER "1.27.3" 8 | #define VERDATE "2023-08-26" 9 | 10 | #endif /* JDUPES_VERSION_H */ 11 | -------------------------------------------------------------------------------- /winres.manifest.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | true 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /winres.rc: -------------------------------------------------------------------------------- 1 | #include "winver.h" 2 | 3 | 1 24 winres.manifest.xml 4 | 2 ICON icon/icon_jdupes_all.ico 5 | 6 | VS_VERSION_INFO VERSIONINFO 7 | FILEVERSION 1,27,3,0 8 | PRODUCTVERSION 1,27,3,0 9 | FILEFLAGSMASK 0x3fL 10 | FILEFLAGS 0x0L 11 | FILEOS 0x40004L 12 | FILETYPE 0x1L 13 | FILESUBTYPE 0x0L 14 | BEGIN 15 | BLOCK "StringFileInfo" 16 | BEGIN 17 | BLOCK "040904b0" 18 | BEGIN 19 | VALUE "Comments", "(C) 2015-2023 Jody Bruchon , published under The MIT License" 20 | VALUE "CompanyName", "Jody Bruchon" 21 | VALUE "FileDescription", "jdupes Duplicate File Finder Tool" 22 | VALUE "FileVersion", "1,27,3,0" 23 | VALUE "InternalName", "jdupes" 24 | VALUE "LegalCopyright", "(C) 2015-2023 Jody Bruchon " 25 | VALUE "OriginalFilename", "jdupes.exe" 26 | VALUE "ProductName", "jdupes" 27 | VALUE "ProductVersion", "1,27,3,0" 28 | END 29 | END 30 | BLOCK "VarFileInfo" 31 | BEGIN 32 | VALUE "Translation", 0x409, 1200 33 | END 34 | END 35 | -------------------------------------------------------------------------------- /winres_xp.rc: -------------------------------------------------------------------------------- 1 | #include "winver.h" 2 | 3 | 2 ICON icon/icon_jdupes_all.ico 4 | 5 | VS_VERSION_INFO VERSIONINFO 6 | FILEVERSION 1,27,3,0 7 | PRODUCTVERSION 1,27,3,0 8 | FILEFLAGSMASK 0x3fL 9 | FILEFLAGS 0x0L 10 | FILEOS 0x40004L 11 | FILETYPE 0x1L 12 | FILESUBTYPE 0x0L 13 | BEGIN 14 | BLOCK "StringFileInfo" 15 | BEGIN 16 | BLOCK "040904b0" 17 | BEGIN 18 | VALUE "Comments", "(C) 2015-2023 Jody Bruchon , published under The MIT License" 19 | VALUE "CompanyName", "Jody Bruchon" 20 | VALUE "FileDescription", "jdupes Duplicate File Finder Tool" 21 | VALUE "FileVersion", "1,27,3,0" 22 | VALUE "InternalName", "jdupes" 23 | VALUE "LegalCopyright", "(C) 2015-2023 Jody Bruchon " 24 | VALUE "OriginalFilename", "jdupes.exe" 25 | VALUE "ProductName", "jdupes" 26 | VALUE "ProductVersion", "1,27,3,0" 27 | END 28 | END 29 | BLOCK "VarFileInfo" 30 | BEGIN 31 | VALUE "Translation", 0x409, 1200 32 | END 33 | END 34 | -------------------------------------------------------------------------------- /xxhash.h: -------------------------------------------------------------------------------- 1 | /* 2 | xxHash - Extremely Fast Hash algorithm 3 | Header File 4 | Copyright (C) 2012-2016, Yann Collet. 5 | 6 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions are 10 | met: 11 | 12 | * Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | * Redistributions in binary form must reproduce the above 15 | copyright notice, this list of conditions and the following disclaimer 16 | in the documentation and/or other materials provided with the 17 | distribution. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | You can contact the author at : 32 | - xxHash source repository : https://github.com/Cyan4973/xxHash 33 | */ 34 | 35 | /* Notice extracted from xxHash homepage : 36 | 37 | xxHash is an extremely fast Hash algorithm, running at RAM speed limits. 38 | It also successfully passes all tests from the SMHasher suite. 39 | 40 | Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) 41 | 42 | Name Speed Q.Score Author 43 | xxHash 5.4 GB/s 10 44 | CrapWow 3.2 GB/s 2 Andrew 45 | MumurHash 3a 2.7 GB/s 10 Austin Appleby 46 | SpookyHash 2.0 GB/s 10 Bob Jenkins 47 | SBox 1.4 GB/s 9 Bret Mulvey 48 | Lookup3 1.2 GB/s 9 Bob Jenkins 49 | SuperFastHash 1.2 GB/s 1 Paul Hsieh 50 | CityHash64 1.05 GB/s 10 Pike & Alakuijala 51 | FNV 0.55 GB/s 5 Fowler, Noll, Vo 52 | CRC32 0.43 GB/s 9 53 | MD5-32 0.33 GB/s 10 Ronald L. Rivest 54 | SHA1-32 0.28 GB/s 10 55 | 56 | Q.Score is a measure of quality of the hash function. 57 | It depends on successfully passing SMHasher test set. 58 | 10 is a perfect score. 59 | 60 | A 64-bits version, named XXH64, is available since r35. 61 | It offers much better speed, but for 64-bits applications only. 62 | Name Speed on 64 bits Speed on 32 bits 63 | XXH64 13.8 GB/s 1.9 GB/s 64 | XXH32 6.8 GB/s 6.0 GB/s 65 | */ 66 | 67 | #ifndef XXHASH_H_5627135585666179 68 | #define XXHASH_H_5627135585666179 1 69 | 70 | #if defined (__cplusplus) 71 | extern "C" { 72 | #endif 73 | 74 | 75 | /* **************************** 76 | * Definitions 77 | ******************************/ 78 | #include /* size_t */ 79 | typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; 80 | 81 | 82 | /* **************************** 83 | * API modifier 84 | ******************************/ 85 | /** XXH_PRIVATE_API 86 | * This is useful to include xxhash functions in `static` mode 87 | * in order to inline them, and remove their symbol from the public list. 88 | * Methodology : 89 | * #define XXH_PRIVATE_API 90 | * #include "xxhash.h" 91 | * `xxhash.c` is automatically included. 92 | * It's not useful to compile and link it as a separate module. 93 | */ 94 | #ifdef XXH_PRIVATE_API 95 | # ifndef XXH_STATIC_LINKING_ONLY 96 | # define XXH_STATIC_LINKING_ONLY 97 | # endif 98 | # if defined(__GNUC__) 99 | # define XXH_PUBLIC_API static __inline __attribute__((unused)) 100 | # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 101 | # define XXH_PUBLIC_API static inline 102 | # elif defined(_MSC_VER) 103 | # define XXH_PUBLIC_API static __inline 104 | # else 105 | /* this version may generate warnings for unused static functions */ 106 | # define XXH_PUBLIC_API static 107 | # endif 108 | #else 109 | # define XXH_PUBLIC_API /* do nothing */ 110 | #endif /* XXH_PRIVATE_API */ 111 | 112 | /*!XXH_NAMESPACE, aka Namespace Emulation : 113 | 114 | If you want to include _and expose_ xxHash functions from within your own library, 115 | but also want to avoid symbol collisions with other libraries which may also include xxHash, 116 | 117 | you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library 118 | with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values). 119 | 120 | Note that no change is required within the calling program as long as it includes `xxhash.h` : 121 | regular symbol name will be automatically translated by this header. 122 | */ 123 | #ifdef XXH_NAMESPACE 124 | # define XXH_CAT(A,B) A##B 125 | # define XXH_NAME2(A,B) XXH_CAT(A,B) 126 | # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) 127 | # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) 128 | # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) 129 | # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) 130 | # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) 131 | # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) 132 | # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) 133 | # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) 134 | # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) 135 | #endif 136 | 137 | 138 | /* ************************************* 139 | * Version 140 | ***************************************/ 141 | #define XXH_VERSION_MAJOR 0 142 | #define XXH_VERSION_MINOR 6 143 | #define XXH_VERSION_RELEASE 3 144 | #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) 145 | XXH_PUBLIC_API unsigned XXH_versionNumber (void); 146 | 147 | 148 | #ifndef XXH_NO_LONG_LONG 149 | /*-********************************************************************** 150 | * 64-bits hash 151 | ************************************************************************/ 152 | typedef unsigned long long XXH64_hash_t; 153 | 154 | /*! XXH64() : 155 | Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". 156 | "seed" can be used to alter the result predictably. 157 | This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark). 158 | */ 159 | XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); 160 | 161 | /*====== Streaming ======*/ 162 | typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ 163 | XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); 164 | XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); 165 | XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state); 166 | 167 | XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); 168 | XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); 169 | XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); 170 | 171 | /*====== Canonical representation ======*/ 172 | typedef struct { unsigned char digest[8]; } XXH64_canonical_t; 173 | XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); 174 | XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); 175 | #endif /* XXH_NO_LONG_LONG */ 176 | 177 | 178 | #ifdef XXH_STATIC_LINKING_ONLY 179 | 180 | /* ================================================================================================ 181 | This section contains declarations which are not guaranteed to remain stable. 182 | They may change in future versions, becoming incompatible with a different version of the library. 183 | These declarations should only be used with static linking. 184 | Never use them in association with dynamic linking ! 185 | =================================================================================================== */ 186 | 187 | /* These definitions are only meant to make possible 188 | static allocation of XXH state, on stack or in a struct for example. 189 | Never use members directly. */ 190 | 191 | #ifndef XXH_NO_LONG_LONG /* remove 64-bits support */ 192 | struct XXH64_state_s { 193 | unsigned long long total_len; 194 | unsigned long long v1; 195 | unsigned long long v2; 196 | unsigned long long v3; 197 | unsigned long long v4; 198 | unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ 199 | unsigned memsize; 200 | unsigned reserved[2]; /* never read nor write, will be removed in a future version */ 201 | }; /* typedef'd to XXH64_state_t */ 202 | #endif 203 | 204 | #ifdef XXH_PRIVATE_API 205 | # include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */ 206 | #endif 207 | 208 | #endif /* XXH_STATIC_LINKING_ONLY */ 209 | 210 | 211 | #if defined (__cplusplus) 212 | } 213 | #endif 214 | 215 | #endif /* XXHASH_H_5627135585666179 */ 216 | --------------------------------------------------------------------------------