├── LICENSE ├── README.md └── clone_checker.c /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Dyorgio Nascimento 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # apfs-clone-checker 2 | An utility to check if two files are clones in macOs APFS. 3 | 4 | 5 | ## How it works (and motivation) 6 | 7 | Some years ago (2017-09-26) I asked in [stackoverflow](https://stackoverflow.com/questions/46417747/apple-file-system-apfs-check-if-file-is-a-clone-on-terminal-shell) and in [Apple Developer Forum](https://developer.apple.com/forums/thread/81100) if there is a way to identify if a file is a clone. 8 | 9 | 3 years later, no response or update of macOs tools to get this. 10 | 11 | My motivation, like others who also want an answer ( I guess :smile: ), is create a tool that analyze entire disk and create clones of files with same content. (use clone APFS feature at maximum possible). 12 | 13 | Many tools are created in this space of time, but all of then works creating hash of file content (expensive operation). 14 | 15 | This tool use another aproach: file blocks physical location. 16 | 17 | All cloned files point to the same blocks, this utility get this info from both files and compare. 18 | 19 | It also make some validations and fast checking: 20 | * Both files are in an APFS device. 21 | * Files are in same device.(CLONE only supported in same device) 22 | * Files are normal files. 23 | * Files are not the same. 24 | * Files have same size and blocks count. 25 | 26 | In initial tests it can speedup clone checking in at least 200% (compared with shasum or md5) full file verification. 27 | 28 | But the optimization is much better for no full cloned files, it can stops on first different block, and it can be the first one :nerd_face:. 29 | 30 | ## Usage 31 | ```.sh 32 | ./clone_checker [-fqv] pathOfFileA pathOfFileB 33 | ``` 34 | If exit code 0 (OK) than print in stdout: 35 | * 1 = Clones 36 | * 0 = Not clones (maybe partial if -q option is used) 37 | 38 | If exit code not 0 (NOK) than: 39 | * Print in stderr what was the problem. 40 | 41 | ## Options 42 | 43 | * -f (forced mode): Ignore read/validation errors and return 0 (not clones). 44 | * -q (quick mode): Just verify first and last blocks (fast, but not 100%). 45 | * -v: Print version. 46 | * -?,h: Print usage. 47 | 48 | ## Compilation 49 | Have gcc installed (XCode).
50 | Copy clone_checker.c (or content) to your computer.
51 | Run gcc: 52 | ```.sh 53 | gcc clone_checker.c -o clone_checker 54 | ``` 55 | Optional (mark binary executable): 56 | ```.sh 57 | chmod +x clone_checker 58 | ``` 59 | 60 | ## Work TODO 61 | * ~~Investigate about directory cloning.~~ 2021-11-24 - Not supported, directories are always new inodes, but files inside can be all clones. 62 | * ~~Investigate how to get j_inode_flags and check INODE_WAS_CLONED flag (optimization) [Apple APFS Reference](https://developer.apple.com/support/downloads/Apple-File-System-Reference.pdf)~~ 2020-12-12 - To get it is necessary to use reverse engineer and parse all device fs structure. 63 | * Support to percentual of clone mode (A clone can have only some blocks altered). 64 | -------------------------------------------------------------------------------- /clone_checker.c: -------------------------------------------------------------------------------- 1 | // 2 | // clone_checker.c 3 | // 4 | // To compile: 5 | // gcc clone_checker.c -o clone_checker 6 | // 7 | // Created by Dyorgio Nascimento on 2020-12-10. 8 | // 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | // declare methods 20 | void printUsage(char* executable); 21 | int compare_blocks(int block_size, char *filenameA, char *filenameB, int fdA, int fdB); 22 | int compare_boundary_blocks(char *filenameA, char *filenameB, int fdA, int fdB); 23 | void check_disk_fs(char *filename, bool is_forced_mode); 24 | struct stat check_file(char *filename, bool is_forced_mode); 25 | 26 | // entrypoint 27 | int main(int args_count, char **args) { 28 | 29 | bool is_forced_mode = false; 30 | bool is_quick_mode = false; 31 | int opt; 32 | while ( (opt = getopt(args_count, args, "fqv?h")) != -1) { 33 | switch ( opt ) { 34 | case 'f': is_forced_mode = true; break; 35 | case 'q': is_quick_mode = true; break; 36 | case 'v': fprintf(stderr, "APFS Clone Checker - Version: 1.0.0.0\n"); exit(EXIT_SUCCESS); break; 37 | case '?': 38 | case 'h': printUsage(args[0]); 39 | default: 40 | printUsage(args[0]); 41 | } 42 | } 43 | if ( args_count - optind < 2 ) { 44 | printUsage(args[0]); 45 | } 46 | 47 | char* filenameA = args[optind]; 48 | char* filenameB = args[optind + 1]; 49 | 50 | check_disk_fs(filenameA, is_forced_mode); 51 | check_disk_fs(filenameB, is_forced_mode); 52 | 53 | struct stat statA = check_file(filenameA, is_forced_mode); 54 | struct stat statB = check_file(filenameB, is_forced_mode); 55 | 56 | if (statA.st_dev != statB.st_dev || statA.st_size != statB.st_size || statA.st_size < 1 57 | || statA.st_blocks != statB.st_blocks || statA.st_ino == statB.st_ino) { 58 | // clones only are supported on same device and have same size em blocks count, a file cannot be a clone of itself 59 | fprintf(stdout,"0\n"); 60 | exit(EXIT_SUCCESS); 61 | } 62 | 63 | int fdA = open(filenameA, O_RDONLY); 64 | if (fdA < 0 ) { 65 | fprintf(stderr,"%s: Cannot open. %s\n", filenameA, strerror(errno)); 66 | if ( is_forced_mode ) { 67 | fprintf(stdout,"0\n"); 68 | exit(EXIT_SUCCESS); 69 | } else { 70 | exit(EXIT_FAILURE); 71 | } 72 | } 73 | 74 | int fdB = open(filenameB, O_RDONLY); 75 | if ( fdB < 0 ) { 76 | fprintf(stderr,"%s: Cannot open. %s\n", filenameB, strerror(errno)); 77 | close(fdA); 78 | if ( is_forced_mode ) { 79 | fprintf(stdout,"0\n"); 80 | exit(EXIT_SUCCESS); 81 | } else { 82 | exit(EXIT_FAILURE); 83 | } 84 | } 85 | 86 | int result; 87 | if ( is_quick_mode ) { 88 | result = compare_boundary_blocks(filenameA, filenameB, fdA, fdB); 89 | } else { 90 | result = compare_blocks(statA.st_blksize, filenameA, filenameB, fdA, fdB); 91 | } 92 | 93 | close(fdA); 94 | close(fdB); 95 | 96 | if ( result != -1 ) { 97 | fprintf(stdout,"%i\n", result); 98 | exit(EXIT_SUCCESS); 99 | } else { 100 | if ( is_forced_mode ) { 101 | fprintf(stdout,"0\n"); 102 | exit(EXIT_SUCCESS); 103 | } else { 104 | exit(EXIT_FAILURE); 105 | } 106 | } 107 | } 108 | 109 | void printUsage(char* executable){ 110 | fprintf(stderr, "Usage: %s [-fqv] fileA fileB\n", executable); 111 | exit(EXIT_FAILURE); 112 | } 113 | 114 | int compare_blocks(int block_size, char *filenameA, char *filenameB, int fdA, int fdB) { 115 | 116 | long sts = 0; 117 | struct log2phys physA; 118 | struct log2phys physB; 119 | 120 | for ( off_t offset = 0; sts >= 0; offset += block_size ) { 121 | physA.l2p_devoffset = offset; 122 | // get current blocks physical location 123 | sts = fcntl(fdA, F_LOG2PHYS_EXT, &physA); 124 | if ( sts < 0 && errno == ERANGE ) { 125 | physB.l2p_devoffset = offset; 126 | sts = fcntl(fdB, F_LOG2PHYS_EXT, &physB); 127 | if ( sts < 0 && errno == ERANGE ) { 128 | // both files seeked to the end with same offsets 129 | return true; 130 | } else if ( sts < 0 ) { 131 | fprintf(stderr,"%s: Cannot convert logical to physical offset. %i %s\n", filenameB, errno, strerror(errno)); 132 | return -1; 133 | } 134 | break; 135 | } else if ( sts < 0 ) { 136 | fprintf(stderr,"%s: Cannot convert logical to physical offset. %i %s\n", filenameA, errno, strerror(errno)); 137 | return -1; 138 | } 139 | 140 | physB.l2p_devoffset = offset; 141 | sts = fcntl(fdB, F_LOG2PHYS_EXT, &physB); 142 | if ( sts < 0 && errno == ERANGE ) { 143 | // insanity check, size of files already verified before 144 | break; 145 | } else if ( sts < 0 ) { 146 | fprintf(stderr,"%s: Cannot convert logical to physical offset. %i %s\n", filenameB, errno, strerror(errno)); 147 | return -1; 148 | } 149 | 150 | if ( physA.l2p_devoffset != physB.l2p_devoffset ) { 151 | // found a diff block 152 | break; 153 | } 154 | } 155 | 156 | // not a clone (check loop breaked) 157 | return false; 158 | } 159 | 160 | int compare_boundary_blocks(char *filenameA, char *filenameB, int fdA, int fdB) { 161 | 162 | long sts = 0; 163 | struct log2phys physA; 164 | struct log2phys physB; 165 | // get initial blocks physical location 166 | sts = fcntl(fdA, F_LOG2PHYS, &physA); 167 | if ( sts < 0 ) { 168 | fprintf(stderr,"%s: Cannot convert logical to physical offset. %i %s\n", filenameA, errno, strerror(errno)); 169 | return -1; 170 | } 171 | 172 | sts = fcntl(fdB, F_LOG2PHYS, &physB); 173 | if ( sts < 0 ) { 174 | fprintf(stderr,"%s: Cannot convert logical to physical offset. %i %s\n", filenameB, errno, strerror(errno)); 175 | return -1; 176 | } 177 | 178 | if ( physA.l2p_devoffset == physB.l2p_devoffset ) { 179 | // Move to end of files 180 | sts = lseek(fdA, -1, SEEK_END); 181 | if ( sts < 0 ) { 182 | fprintf(stderr,"%s: Cannot seek. %ld %s\n", filenameA, sts, strerror(errno)); 183 | return -1; 184 | } 185 | sts = lseek(fdB, -1, SEEK_END); 186 | if ( sts < 0 ) { 187 | fprintf(stderr,"%s: Cannot seek. %ld %s\n", filenameB, sts, strerror(errno)); 188 | return -1; 189 | } 190 | 191 | // get last blocks physical location 192 | sts = fcntl(fdA, F_LOG2PHYS, &physA); 193 | if ( sts < 0 ) { 194 | fprintf(stderr,"%s: Cannot convert logical to physical offset. %i %s\n", filenameA, errno, strerror(errno)); 195 | return -1; 196 | } 197 | 198 | sts = fcntl(fdB, F_LOG2PHYS, &physB); 199 | if ( sts < 0 ) { 200 | fprintf(stderr,"%s: Cannot convert logical to physical offset. %i %s\n", filenameB, errno, strerror(errno)); 201 | return -1; 202 | } 203 | 204 | return physA.l2p_devoffset == physB.l2p_devoffset; 205 | } 206 | 207 | return false; 208 | } 209 | 210 | void check_disk_fs(char *filename, bool is_forced_mode) { 211 | struct statfs fs; 212 | if( statfs(filename, &fs) == 0 ) { 213 | if( strcmp(fs.f_fstypename, "apfs") != 0) { 214 | fprintf(stderr, "%s: Only APFS is supported: %s\n", filename, fs.f_fstypename); 215 | if ( is_forced_mode ) { 216 | fprintf(stdout,"0\n"); 217 | exit(EXIT_SUCCESS); 218 | } else { 219 | exit(EXIT_FAILURE); 220 | } 221 | } 222 | } 223 | } 224 | 225 | struct stat check_file(char *filename, bool is_forced_mode) { 226 | struct stat st; 227 | if ( stat(filename, &st) < 0 ) { 228 | fprintf(stderr, "%s: No such file\n", filename); 229 | if ( is_forced_mode ) { 230 | fprintf(stdout,"0\n"); 231 | exit(EXIT_SUCCESS); 232 | } else { 233 | exit(EXIT_FAILURE); 234 | } 235 | } 236 | 237 | if ( (st.st_mode & S_IFMT) != S_IFREG ) { 238 | fprintf(stderr, "%s: Not a regular file\n", filename); 239 | if ( is_forced_mode ) { 240 | fprintf(stdout,"0\n"); 241 | exit(EXIT_SUCCESS); 242 | } else { 243 | exit(EXIT_FAILURE); 244 | } 245 | } 246 | return st; 247 | } 248 | --------------------------------------------------------------------------------