└── analyzeEXT.pl


/analyzeEXT.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl
  2 | #
  3 | # AnalyzeEXT - parse data blocks for EXT directory data
  4 | #
  5 | # Detailed documentation on EXT4 can be found here:
  6 | # https://ext4.wiki.kernel.org/index.php/Ext4_Disk_Layout
  7 | # https://digital-forensics.sans.org/blog/tags/ext4
  8 | #
  9 | # Hal Pomeranz (hal@deer-run.com), 2017-06-12
 10 | #
 11 | # No warranty expressed or implied.
 12 | # Distributed under the Creative Commons "Attribution" (CC BY) License
 13 | # See https://creativecommons.org/licenses/
 14 | 
 15 | use strict;
 16 | use vars qw($opt_b $opt_D $opt_H $opt_P);
 17 | use Getopt::Std;
 18 | $Getopt::Std::STANDARD_HELP_VERSION = 1;       # Terminate after --help
 19 | 
 20 | sub HELP_MESSAGE { 
 21 |     die <<"EoUseMsg";
 22 | Usage: cat image | $0 [-DP] [-H] [-b blocksize]
 23 | 
 24 | -D        Output details of all directory entries found
 25 | -P        Output full directory paths (best guess)
 26 | -H        Output header labels
 27 | 
 28 | -b size   Specify an alternate block size (default 4096)
 29 | EoUseMsg
 30 | }
 31 | 
 32 | getopts('b:DHP') || HELP_MESSAGE();
 33 | my $Block_Size = $opt_b || 4096;
 34 | my $Show_Details = $opt_D;
 35 | my $Show_Paths = $opt_P;
 36 | my $Show_Headers = $opt_H;
 37 | 
 38 | $Show_Paths = 1 unless ($Show_Details);      # Default is compute path info
 39 | 
 40 | 
 41 | 
 42 | my %File_Type_Char = ( '1' => 'f', '2' => 'd', '3' => 'c', '4' => 'b', 
 43 | 		       '5' => 'p', '6' => 's', '7' => 'l' );
 44 | 
 45 | my $buffer;
 46 | my $blocknum = -1;
 47 | my($ref, $inode, %Parent_Inode, %File_Name);
 48 | 
 49 | 
 50 | print join("\t", 'Block', 'Offset', 'Filename', 'Inode', 
 51 | 	         'Type', 'Size of Entry', 'Filename Len', 
 52 |                  'Allocated', 'Notes'), "\n" 
 53 |     if ($Show_Details && $Show_Headers);
 54 | 
 55 | while (sysread(STDIN, $buffer, $Block_Size)) {
 56 |     $blocknum += 1;
 57 |     my $file_list = parse_dir_block($buffer);    # returns null list if not parsable
 58 |     next unless (@{$file_list});
 59 | 
 60 |     my $dir_start = ($$file_list[0]{'filename'} eq '.' && $$file_list[1]{'filename'} eq '..');
 61 |     my $htree_blocks = 0;
 62 |     my($this_inode, $parent) = ();
 63 | 
 64 |     if ($dir_start) {
 65 | 	my $dot_ref = shift(@{$file_list});
 66 | 	$this_inode = $$dot_ref{'inode'};
 67 | 	my $dotdot_ref = shift(@{$file_list});
 68 | 	$parent =  $$dotdot_ref{'inode'};
 69 | 	$Parent_Inode{$this_inode}{$parent} += 1;
 70 | 
 71 | 	$htree_blocks = parse_htree_root($buffer);
 72 | 
 73 | 	if ($Show_Details) {
 74 | 	    my $extra = $htree_blocks ? "$htree_blocks htree leaf blocks" : 'directory is one block';
 75 | 	    output_entry_info($dot_ref, $blocknum, $this_inode, $parent, $extra);
 76 | 	    output_entry_info($dotdot_ref, $blocknum, $this_inode, $parent);
 77 | 	}
 78 |     }
 79 | 
 80 |     foreach $ref (@${file_list}) {
 81 | 	output_entry_info($ref, $blocknum, $this_inode, $parent) if ($Show_Details);
 82 | 	next unless ($Show_Paths);
 83 | 
 84 | 	# %File_Name tracks a heuristic score for how likely a given file name is
 85 |         # to be associated with a particular inode. This score is used to create
 86 |         # paths back to the root in make_best_path().
 87 | 	#
 88 | 	# Three criteria determine the value of a particular directory entry:
 89 |         # 1. Is in in an initial directory block (with the "." and ".." links)?
 90 |         # 2. Did we carve it from the slack after an htree dx_root record?
 91 | 	# 3. Is is a deleted (carved) record or not?
 92 | 	# The score values are based on limited testing, and may need tweaking.
 93 | 	#
 94 | 	my $key = "$$ref{'filename'}/$$ref{'type'}/$this_inode";
 95 | 	$File_Name{$$ref{'inode'}}{$key} += ($dir_start) ? 2 : 1;
 96 | 	$File_Name{$$ref{'inode'}}{$key} += 2 if ($htree_blocks);
 97 | 	$File_Name{$$ref{'inode'}}{$key} += 3 if (!$htree_blocks && !$$ref{'carved'});
 98 |     }
 99 | }
100 | 
101 | 
102 | exit(0) unless ($Show_Paths);
103 | print "\n\n" if ($Show_Details && $Show_Headers);
104 | print "Inode\tPath Info\n" if ($Show_Headers);
105 | 
106 | 
107 | # %Max_Dir_Score is used as a secondary sort criteria in make_best_path().
108 | # It's the highest heuristic score for a directory type entry for a given inode.
109 | my %Max_Dir_Score = ();
110 | foreach $inode (keys(%File_Name)) {
111 |     my $best_dir = (sort { $File_Name{$inode}{$b} <=> $File_Name{$inode}{$a} } 
112 | 		    grep(m|/2/|, keys(%{$File_Name{$inode}})))[0];
113 |     $Max_Dir_Score{$inode} = $File_Name{$inode}{$best_dir};
114 | }
115 | 
116 | my %Paths = ( 2 => '' );
117 | foreach $inode (sort {$a <=> $b} keys(%Parent_Inode)) {
118 |     $Paths{$inode} = make_best_path($inode) unless (defined($Paths{$inode}));
119 |     print "$inode\t$Paths{$inode}\n";
120 | }
121 | 
122 | ###########################################################################################
123 | ###
124 | ### Program ends. Subroutines below.
125 | ###
126 | ###########################################################################################
127 | 
128 | 
129 | sub make_best_path {
130 |     my($inode) = @_;
131 |     my($parent, $dirname, $type, $key);
132 | 
133 |     # If we didn't find a directory entry for this inode, give up
134 |     return('???') if (!defined($File_Name{$inode}));
135 | 
136 |     # If we didn't find a "." link associated with this inode, 
137 |     # not a lot we can do.
138 |     goto failout if (!defined($Parent_Inode{$inode}));
139 | 
140 |     # We may have found multiple directory entries that associate
141 |     # different parent inodes with this inode. Pull out all of the
142 |     # %File_Name entries for this inode which are directory entries
143 |     # and which contain one of the possible parent inodes.
144 |     #
145 |     my @file_keys = ();
146 |     foreach $parent (keys(%{$Parent_Inode{$inode}})) {
147 | 	push(@file_keys, grep(m|/2/$parent$|, keys(%{$File_Name{$inode}})));
148 |     }
149 |     
150 |     # Now take all of the @file_keys we pulled out above and march
151 |     # through them in descending order by heuristic score. Recursively
152 |     # call make_best_path() on the parent inode. If we get back a path
153 |     # that goes to the root, then return that. Otherwise return the
154 |     # first unrooted path we get.
155 |     #
156 |     my @paths = ();
157 |     foreach $key (sort { $File_Name{$inode}{$b} <=> $File_Name{$inode}{$a} } @file_keys) {
158 | 	($dirname, $type, $parent) = split('/', $key);
159 | 	
160 | 	$Paths{$parent} = make_best_path($parent) unless (defined($Paths{$parent}));
161 | 
162 | 	return("$Paths{$parent}/$dirname") if ($Paths{$parent} =~ /^\//);
163 | 	push(@paths, "$Paths{$parent}/$dirname");
164 |     }
165 |     return($paths[0]) if (scalar(@paths));
166 | 
167 |     # If we get here then either we don't have a parent inode associated
168 |     # with this inode, or we failed to get any @file_keys entries assocaited
169 |     # with the known parent info we found.
170 |     #
171 |  failout:
172 | 
173 |     # Grab this directory type entry with the highest heuristic score 
174 |     # for this inode. Bail out if there aren't any directory type entries.
175 |     $dirname = (sort { $File_Name{$inode}{$b} <=> $File_Name{$inode}{$a} } grep(m|/2/|, keys(%{$File_Name{$inode}})))[0];
176 |     return('???') unless (length($dirname));
177 | 
178 |     # Bail out if we don't have any parent inode info for this inode.
179 |     $dirname =~ s|/.*||;
180 |     return("???/$dirname") if (!defined($Parent_Inode{$inode}));
181 | 
182 |     # Heuristically try to pick the best parent entry.
183 |     # Recursively call make_best_path().
184 |     # Return whatever we get.
185 |     #
186 |     $parent = (sort { $Parent_Inode{$inode}{$b} <=> $Parent_Inode{$inode}{$a} ||
187 | 			  $Max_Dir_Score{$b} <=> $Max_Dir_Score{$a} } keys(%{$Parent_Inode{$inode}}))[0];
188 |     $Paths{$parent} = make_best_path($parent) unless (defined($Paths{$parent}));
189 |     return("$Paths{$parent}/$dirname");
190 | }
191 | 
192 | 
193 | 
194 | # Inode 2 is the root directory. All other inodes <= 10 are reserved.
195 | # The entry length must be positive and <= the amount of data left.
196 | #     It must also be a multiple of 4 bytes.
197 | # The name length must be positive and <= the remaining space in the entry.
198 | # Valid file type values range from 1-8 (8 is a Solaris Door).
199 | # Slashes and nulls are not allowed in file names.
200 | #
201 | sub valid_dir_values {
202 |     my($buffer, $inode, $entry_len, $name_len, $file_type, $file_name) = @_;
203 | 
204 |     return(undef) unless ($inode == 2 || $inode > 10);
205 |     return(undef) unless ($entry_len <= length($buffer) && $entry_len > 0 && !($entry_len % 4));
206 |     return(undef) unless ($name_len > 0 && $name_len <= ($entry_len - 8));
207 |     return(undef) unless ($file_type > 0 && $file_type < 9);
208 |     return(undef) if ($file_name =~ /[\/\000]/);      # '/' and null not allowed in file names
209 |     return(1);
210 | }
211 | 
212 | 
213 | sub parse_dir_block {
214 |     my($buffer) = @_;
215 |     my $namelist = [];
216 | 
217 |     while (length($buffer)) {
218 | 
219 | 	# Optomistically try to parse the next entry.
220 |         # Bail out of we get invalid data.
221 | 	my($inode, $entry_len, $name_len, $file_type) = unpack("LSCC", $buffer);
222 | 	my $file_name = substr($buffer, 8, $name_len);
223 | 	return([]) unless (valid_dir_values($buffer, $inode, $entry_len, $name_len, $file_type, $file_name));
224 | 
225 | 	# Compute the offset of this entry and then carve the entry
226 |         # out of $buffer, reducing the size of $buffer.
227 | 	my $offset = $Block_Size - length($buffer);
228 | 	my $this_entry = substr($buffer, 0, $entry_len, '');
229 | 
230 | 	# Add a record to the list of entries we've found so far.
231 | 	push(@{$namelist}, { 'filename' => $file_name, 
232 | 			     'inode' => $inode,
233 | 			     'type' => $file_type,
234 | 			     'entrysize' => $entry_len,
235 | 			     'namesize' => $name_len,
236 | 			     'offset' => $offset });
237 | 
238 | 	# If there's enough slack space in this directory entry,
239 |         # try seeing if there are any deleted directory entries.
240 | 	my $extra = $entry_len - $name_len - 8;
241 | 	if ($extra >= 12) {
242 | 	    my $bytes_to_search = int($extra/4) * 4;
243 | 	    my $carved = [];
244 | 	    $carved = carve_deleted_entries(substr($this_entry, -$bytes_to_search), 
245 | 					    $offset + ($entry_len - $bytes_to_search));
246 | 	    push(@{$namelist}, @{$carved}) if (scalar(@{$carved}));    
247 | 	}
248 |     }
249 | 
250 |     return([]) unless (scalar(@{$namelist}) > 1);
251 |     return($namelist);
252 | }
253 | 
254 | 
255 | sub carve_deleted_entries {
256 |     my($buffer, $offset) = @_;
257 |     my $namelist = [];
258 | 
259 |     while (length($buffer) >= 12) {
260 | 
261 | 	# Optomistically try to carve out a directory entry.
262 | 	my($inode, $entry_len, $name_len, $file_type) = unpack("LSCC", $buffer);
263 | 	my $file_name = substr($buffer, 8, $name_len);
264 | 
265 | 	# If the data we carved is not valid, advance 4 bytes
266 |         # and try again (directory entries are 4 byte aligned).
267 | 	if (!valid_dir_values($buffer, $inode, $entry_len, $name_len, $file_type, $file_name)) {
268 | 	    $buffer = substr($buffer, 4);
269 | 	    $offset += 4;
270 | 	    next;
271 | 	}
272 | 
273 | 	# Good data? Make a record of what we found.
274 | 	push(@{$namelist}, { 'filename' => $file_name, 
275 | 			     'inode' => $inode,
276 | 			     'type' => $file_type,
277 | 			     'entrysize' => $entry_len,
278 | 			     'namesize' => $name_len,
279 | 			     'offset' => $offset,
280 | 			     'carved' => 1});
281 | 
282 | 	# Advance to the next 4 byte aligned location and
283 |         # start over again.
284 | 	#
285 | 	my $min_len = 8 + $name_len;
286 | 	my $remainder = $min_len % 4;
287 | 	$min_len += (4 - $remainder) if ($remainder);
288 | 	$buffer = substr($buffer, $min_len);
289 | 	$offset += $min_len;
290 |     }
291 | 
292 |     return($namelist);
293 | }
294 | 
295 | 
296 | sub parse_htree_root {
297 |     my($buffer) = @_;
298 |     my $i;
299 | 
300 |     $buffer = substr($buffer, 24);         # chop off "." and ".." entries
301 |     my($reserved, $hash_type, $info_len, $depth, $flags, $max_entries, $num_entries) = unpack("LCCCCSS", $buffer);
302 | 
303 |     # Reserved bytes must be zero. 
304 |     # Hash type must be < 5.
305 |     # Size of dx_entry records is always 8.
306 |     # Unused flags field should be zero.
307 |     # Can't have more than the stated max entries.
308 |     return(undef) if ($reserved != 0 || $hash_type > 5 || $info_len != 8 || $flags != 0 || $num_entries > $max_entries);
309 |     return(undef) unless ($max_entries == ($Block_Size - 32)/8);   # not fully tested
310 |     print STDERR "SHOOT!\n" if ($depth != 0);
311 |     return(undef) if ($depth != 0);                                # TODO: deal with deep trees
312 | 
313 |     # Make sure that the block numbers in the htree hash array make sense.
314 |     #
315 |     # There are some shenanigans here. The block number for the "zero hash"
316 |     # is at offset 36 from the front of the hash. Se we throw away the first
317 |     # 32 bytes of the block (24 for the substr() above, and 8 more below).
318 |     # This leaves 4 bytes of $max_entries and $num_entries before the zero
319 |     # hash block number, which makes the unpack() in the loop below work correctly.
320 |     # It's a hack.
321 |     #
322 |     $buffer = substr($buffer, 8);
323 |     for ($i = 0; $i < $num_entries; $i++) {
324 | 	my($hash, $block) = unpack("LL", $buffer);
325 | 	return(undef) unless ($block <= $num_entries);
326 | 	$buffer = substr($buffer, 8);
327 |     }
328 | 
329 |     return($num_entries);
330 | }
331 | 
332 | 
333 | sub output_entry_info {
334 |     my($ref, $blocknum, $dot_inode, $dotdot_inode, $extra) = @_;
335 | 
336 |     my $allocated = defined($$ref{'carved'}) ? 'n' : 'y';
337 |     print join("\t", $blocknum, 
338 | 	             $$ref{'offset'}, 
339 |                      $$ref{'filename'}, 
340 |                      $$ref{'inode'},
341 | 	             $File_Type_Char{$$ref{'type'}}, 
342 |                      $$ref{'entrysize'}, 
343 |                      $$ref{'namesize'}, 
344 |                      $allocated,
345 |                      $extra), "\n";
346 | }
347 | 


--------------------------------------------------------------------------------