├── LICENSE
├── README.md
└── clone_checker.c
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Dyorgio Nascimento
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # apfs-clone-checker
2 | An utility to check if two files are clones in macOs APFS.
3 |
4 |
5 | ## How it works (and motivation)
6 |
7 | Some years ago (2017-09-26) I asked in [stackoverflow](https://stackoverflow.com/questions/46417747/apple-file-system-apfs-check-if-file-is-a-clone-on-terminal-shell) and in [Apple Developer Forum](https://developer.apple.com/forums/thread/81100) if there is a way to identify if a file is a clone.
8 |
9 | 3 years later, no response or update of macOs tools to get this.
10 |
11 | My motivation, like others who also want an answer ( I guess :smile: ), is create a tool that analyze entire disk and create clones of files with same content. (use clone APFS feature at maximum possible).
12 |
13 | Many tools are created in this space of time, but all of then works creating hash of file content (expensive operation).
14 |
15 | This tool use another aproach: file blocks physical location.
16 |
17 | All cloned files point to the same blocks, this utility get this info from both files and compare.
18 |
19 | It also make some validations and fast checking:
20 | * Both files are in an APFS device.
21 | * Files are in same device.(CLONE only supported in same device)
22 | * Files are normal files.
23 | * Files are not the same.
24 | * Files have same size and blocks count.
25 |
26 | In initial tests it can speedup clone checking in at least 200% (compared with shasum or md5) full file verification.
27 |
28 | But the optimization is much better for no full cloned files, it can stops on first different block, and it can be the first one :nerd_face:.
29 |
30 | ## Usage
31 | ```.sh
32 | ./clone_checker [-fqv] pathOfFileA pathOfFileB
33 | ```
34 | If exit code 0 (OK) than print in stdout:
35 | * 1 = Clones
36 | * 0 = Not clones (maybe partial if -q option is used)
37 |
38 | If exit code not 0 (NOK) than:
39 | * Print in stderr what was the problem.
40 |
41 | ## Options
42 |
43 | * -f (forced mode): Ignore read/validation errors and return 0 (not clones).
44 | * -q (quick mode): Just verify first and last blocks (fast, but not 100%).
45 | * -v: Print version.
46 | * -?,h: Print usage.
47 |
48 | ## Compilation
49 | Have gcc installed (XCode).
50 | Copy clone_checker.c (or content) to your computer.
51 | Run gcc:
52 | ```.sh
53 | gcc clone_checker.c -o clone_checker
54 | ```
55 | Optional (mark binary executable):
56 | ```.sh
57 | chmod +x clone_checker
58 | ```
59 |
60 | ## Work TODO
61 | * ~~Investigate about directory cloning.~~ 2021-11-24 - Not supported, directories are always new inodes, but files inside can be all clones.
62 | * ~~Investigate how to get j_inode_flags and check INODE_WAS_CLONED flag (optimization) [Apple APFS Reference](https://developer.apple.com/support/downloads/Apple-File-System-Reference.pdf)~~ 2020-12-12 - To get it is necessary to use reverse engineer and parse all device fs structure.
63 | * Support to percentual of clone mode (A clone can have only some blocks altered).
64 |
--------------------------------------------------------------------------------
/clone_checker.c:
--------------------------------------------------------------------------------
1 | //
2 | // clone_checker.c
3 | //
4 | // To compile:
5 | // gcc clone_checker.c -o clone_checker
6 | //
7 | // Created by Dyorgio Nascimento on 2020-12-10.
8 | //
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 |
19 | // declare methods
20 | void printUsage(char* executable);
21 | int compare_blocks(int block_size, char *filenameA, char *filenameB, int fdA, int fdB);
22 | int compare_boundary_blocks(char *filenameA, char *filenameB, int fdA, int fdB);
23 | void check_disk_fs(char *filename, bool is_forced_mode);
24 | struct stat check_file(char *filename, bool is_forced_mode);
25 |
26 | // entrypoint
27 | int main(int args_count, char **args) {
28 |
29 | bool is_forced_mode = false;
30 | bool is_quick_mode = false;
31 | int opt;
32 | while ( (opt = getopt(args_count, args, "fqv?h")) != -1) {
33 | switch ( opt ) {
34 | case 'f': is_forced_mode = true; break;
35 | case 'q': is_quick_mode = true; break;
36 | case 'v': fprintf(stderr, "APFS Clone Checker - Version: 1.0.0.0\n"); exit(EXIT_SUCCESS); break;
37 | case '?':
38 | case 'h': printUsage(args[0]);
39 | default:
40 | printUsage(args[0]);
41 | }
42 | }
43 | if ( args_count - optind < 2 ) {
44 | printUsage(args[0]);
45 | }
46 |
47 | char* filenameA = args[optind];
48 | char* filenameB = args[optind + 1];
49 |
50 | check_disk_fs(filenameA, is_forced_mode);
51 | check_disk_fs(filenameB, is_forced_mode);
52 |
53 | struct stat statA = check_file(filenameA, is_forced_mode);
54 | struct stat statB = check_file(filenameB, is_forced_mode);
55 |
56 | if (statA.st_dev != statB.st_dev || statA.st_size != statB.st_size || statA.st_size < 1
57 | || statA.st_blocks != statB.st_blocks || statA.st_ino == statB.st_ino) {
58 | // clones only are supported on same device and have same size em blocks count, a file cannot be a clone of itself
59 | fprintf(stdout,"0\n");
60 | exit(EXIT_SUCCESS);
61 | }
62 |
63 | int fdA = open(filenameA, O_RDONLY);
64 | if (fdA < 0 ) {
65 | fprintf(stderr,"%s: Cannot open. %s\n", filenameA, strerror(errno));
66 | if ( is_forced_mode ) {
67 | fprintf(stdout,"0\n");
68 | exit(EXIT_SUCCESS);
69 | } else {
70 | exit(EXIT_FAILURE);
71 | }
72 | }
73 |
74 | int fdB = open(filenameB, O_RDONLY);
75 | if ( fdB < 0 ) {
76 | fprintf(stderr,"%s: Cannot open. %s\n", filenameB, strerror(errno));
77 | close(fdA);
78 | if ( is_forced_mode ) {
79 | fprintf(stdout,"0\n");
80 | exit(EXIT_SUCCESS);
81 | } else {
82 | exit(EXIT_FAILURE);
83 | }
84 | }
85 |
86 | int result;
87 | if ( is_quick_mode ) {
88 | result = compare_boundary_blocks(filenameA, filenameB, fdA, fdB);
89 | } else {
90 | result = compare_blocks(statA.st_blksize, filenameA, filenameB, fdA, fdB);
91 | }
92 |
93 | close(fdA);
94 | close(fdB);
95 |
96 | if ( result != -1 ) {
97 | fprintf(stdout,"%i\n", result);
98 | exit(EXIT_SUCCESS);
99 | } else {
100 | if ( is_forced_mode ) {
101 | fprintf(stdout,"0\n");
102 | exit(EXIT_SUCCESS);
103 | } else {
104 | exit(EXIT_FAILURE);
105 | }
106 | }
107 | }
108 |
109 | void printUsage(char* executable){
110 | fprintf(stderr, "Usage: %s [-fqv] fileA fileB\n", executable);
111 | exit(EXIT_FAILURE);
112 | }
113 |
114 | int compare_blocks(int block_size, char *filenameA, char *filenameB, int fdA, int fdB) {
115 |
116 | long sts = 0;
117 | struct log2phys physA;
118 | struct log2phys physB;
119 |
120 | for ( off_t offset = 0; sts >= 0; offset += block_size ) {
121 | physA.l2p_devoffset = offset;
122 | // get current blocks physical location
123 | sts = fcntl(fdA, F_LOG2PHYS_EXT, &physA);
124 | if ( sts < 0 && errno == ERANGE ) {
125 | physB.l2p_devoffset = offset;
126 | sts = fcntl(fdB, F_LOG2PHYS_EXT, &physB);
127 | if ( sts < 0 && errno == ERANGE ) {
128 | // both files seeked to the end with same offsets
129 | return true;
130 | } else if ( sts < 0 ) {
131 | fprintf(stderr,"%s: Cannot convert logical to physical offset. %i %s\n", filenameB, errno, strerror(errno));
132 | return -1;
133 | }
134 | break;
135 | } else if ( sts < 0 ) {
136 | fprintf(stderr,"%s: Cannot convert logical to physical offset. %i %s\n", filenameA, errno, strerror(errno));
137 | return -1;
138 | }
139 |
140 | physB.l2p_devoffset = offset;
141 | sts = fcntl(fdB, F_LOG2PHYS_EXT, &physB);
142 | if ( sts < 0 && errno == ERANGE ) {
143 | // insanity check, size of files already verified before
144 | break;
145 | } else if ( sts < 0 ) {
146 | fprintf(stderr,"%s: Cannot convert logical to physical offset. %i %s\n", filenameB, errno, strerror(errno));
147 | return -1;
148 | }
149 |
150 | if ( physA.l2p_devoffset != physB.l2p_devoffset ) {
151 | // found a diff block
152 | break;
153 | }
154 | }
155 |
156 | // not a clone (check loop breaked)
157 | return false;
158 | }
159 |
160 | int compare_boundary_blocks(char *filenameA, char *filenameB, int fdA, int fdB) {
161 |
162 | long sts = 0;
163 | struct log2phys physA;
164 | struct log2phys physB;
165 | // get initial blocks physical location
166 | sts = fcntl(fdA, F_LOG2PHYS, &physA);
167 | if ( sts < 0 ) {
168 | fprintf(stderr,"%s: Cannot convert logical to physical offset. %i %s\n", filenameA, errno, strerror(errno));
169 | return -1;
170 | }
171 |
172 | sts = fcntl(fdB, F_LOG2PHYS, &physB);
173 | if ( sts < 0 ) {
174 | fprintf(stderr,"%s: Cannot convert logical to physical offset. %i %s\n", filenameB, errno, strerror(errno));
175 | return -1;
176 | }
177 |
178 | if ( physA.l2p_devoffset == physB.l2p_devoffset ) {
179 | // Move to end of files
180 | sts = lseek(fdA, -1, SEEK_END);
181 | if ( sts < 0 ) {
182 | fprintf(stderr,"%s: Cannot seek. %ld %s\n", filenameA, sts, strerror(errno));
183 | return -1;
184 | }
185 | sts = lseek(fdB, -1, SEEK_END);
186 | if ( sts < 0 ) {
187 | fprintf(stderr,"%s: Cannot seek. %ld %s\n", filenameB, sts, strerror(errno));
188 | return -1;
189 | }
190 |
191 | // get last blocks physical location
192 | sts = fcntl(fdA, F_LOG2PHYS, &physA);
193 | if ( sts < 0 ) {
194 | fprintf(stderr,"%s: Cannot convert logical to physical offset. %i %s\n", filenameA, errno, strerror(errno));
195 | return -1;
196 | }
197 |
198 | sts = fcntl(fdB, F_LOG2PHYS, &physB);
199 | if ( sts < 0 ) {
200 | fprintf(stderr,"%s: Cannot convert logical to physical offset. %i %s\n", filenameB, errno, strerror(errno));
201 | return -1;
202 | }
203 |
204 | return physA.l2p_devoffset == physB.l2p_devoffset;
205 | }
206 |
207 | return false;
208 | }
209 |
210 | void check_disk_fs(char *filename, bool is_forced_mode) {
211 | struct statfs fs;
212 | if( statfs(filename, &fs) == 0 ) {
213 | if( strcmp(fs.f_fstypename, "apfs") != 0) {
214 | fprintf(stderr, "%s: Only APFS is supported: %s\n", filename, fs.f_fstypename);
215 | if ( is_forced_mode ) {
216 | fprintf(stdout,"0\n");
217 | exit(EXIT_SUCCESS);
218 | } else {
219 | exit(EXIT_FAILURE);
220 | }
221 | }
222 | }
223 | }
224 |
225 | struct stat check_file(char *filename, bool is_forced_mode) {
226 | struct stat st;
227 | if ( stat(filename, &st) < 0 ) {
228 | fprintf(stderr, "%s: No such file\n", filename);
229 | if ( is_forced_mode ) {
230 | fprintf(stdout,"0\n");
231 | exit(EXIT_SUCCESS);
232 | } else {
233 | exit(EXIT_FAILURE);
234 | }
235 | }
236 |
237 | if ( (st.st_mode & S_IFMT) != S_IFREG ) {
238 | fprintf(stderr, "%s: Not a regular file\n", filename);
239 | if ( is_forced_mode ) {
240 | fprintf(stdout,"0\n");
241 | exit(EXIT_SUCCESS);
242 | } else {
243 | exit(EXIT_FAILURE);
244 | }
245 | }
246 | return st;
247 | }
248 |
--------------------------------------------------------------------------------