└── analyzeEXT.pl /analyzeEXT.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # 3 | # AnalyzeEXT - parse data blocks for EXT directory data 4 | # 5 | # Detailed documentation on EXT4 can be found here: 6 | # https://ext4.wiki.kernel.org/index.php/Ext4_Disk_Layout 7 | # https://digital-forensics.sans.org/blog/tags/ext4 8 | # 9 | # Hal Pomeranz (hal@deer-run.com), 2017-06-12 10 | # 11 | # No warranty expressed or implied. 12 | # Distributed under the Creative Commons "Attribution" (CC BY) License 13 | # See https://creativecommons.org/licenses/ 14 | 15 | use strict; 16 | use vars qw($opt_b $opt_D $opt_H $opt_P); 17 | use Getopt::Std; 18 | $Getopt::Std::STANDARD_HELP_VERSION = 1; # Terminate after --help 19 | 20 | sub HELP_MESSAGE { 21 | die <<"EoUseMsg"; 22 | Usage: cat image | $0 [-DP] [-H] [-b blocksize] 23 | 24 | -D Output details of all directory entries found 25 | -P Output full directory paths (best guess) 26 | -H Output header labels 27 | 28 | -b size Specify an alternate block size (default 4096) 29 | EoUseMsg 30 | } 31 | 32 | getopts('b:DHP') || HELP_MESSAGE(); 33 | my $Block_Size = $opt_b || 4096; 34 | my $Show_Details = $opt_D; 35 | my $Show_Paths = $opt_P; 36 | my $Show_Headers = $opt_H; 37 | 38 | $Show_Paths = 1 unless ($Show_Details); # Default is compute path info 39 | 40 | 41 | 42 | my %File_Type_Char = ( '1' => 'f', '2' => 'd', '3' => 'c', '4' => 'b', 43 | '5' => 'p', '6' => 's', '7' => 'l' ); 44 | 45 | my $buffer; 46 | my $blocknum = -1; 47 | my($ref, $inode, %Parent_Inode, %File_Name); 48 | 49 | 50 | print join("\t", 'Block', 'Offset', 'Filename', 'Inode', 51 | 'Type', 'Size of Entry', 'Filename Len', 52 | 'Allocated', 'Notes'), "\n" 53 | if ($Show_Details && $Show_Headers); 54 | 55 | while (sysread(STDIN, $buffer, $Block_Size)) { 56 | $blocknum += 1; 57 | my $file_list = parse_dir_block($buffer); # returns null list if not parsable 58 | next unless (@{$file_list}); 59 | 60 | my $dir_start = ($$file_list[0]{'filename'} eq '.' && $$file_list[1]{'filename'} eq '..'); 61 | my $htree_blocks = 0; 62 | my($this_inode, $parent) = (); 63 | 64 | if ($dir_start) { 65 | my $dot_ref = shift(@{$file_list}); 66 | $this_inode = $$dot_ref{'inode'}; 67 | my $dotdot_ref = shift(@{$file_list}); 68 | $parent = $$dotdot_ref{'inode'}; 69 | $Parent_Inode{$this_inode}{$parent} += 1; 70 | 71 | $htree_blocks = parse_htree_root($buffer); 72 | 73 | if ($Show_Details) { 74 | my $extra = $htree_blocks ? "$htree_blocks htree leaf blocks" : 'directory is one block'; 75 | output_entry_info($dot_ref, $blocknum, $this_inode, $parent, $extra); 76 | output_entry_info($dotdot_ref, $blocknum, $this_inode, $parent); 77 | } 78 | } 79 | 80 | foreach $ref (@${file_list}) { 81 | output_entry_info($ref, $blocknum, $this_inode, $parent) if ($Show_Details); 82 | next unless ($Show_Paths); 83 | 84 | # %File_Name tracks a heuristic score for how likely a given file name is 85 | # to be associated with a particular inode. This score is used to create 86 | # paths back to the root in make_best_path(). 87 | # 88 | # Three criteria determine the value of a particular directory entry: 89 | # 1. Is in in an initial directory block (with the "." and ".." links)? 90 | # 2. Did we carve it from the slack after an htree dx_root record? 91 | # 3. Is is a deleted (carved) record or not? 92 | # The score values are based on limited testing, and may need tweaking. 93 | # 94 | my $key = "$$ref{'filename'}/$$ref{'type'}/$this_inode"; 95 | $File_Name{$$ref{'inode'}}{$key} += ($dir_start) ? 2 : 1; 96 | $File_Name{$$ref{'inode'}}{$key} += 2 if ($htree_blocks); 97 | $File_Name{$$ref{'inode'}}{$key} += 3 if (!$htree_blocks && !$$ref{'carved'}); 98 | } 99 | } 100 | 101 | 102 | exit(0) unless ($Show_Paths); 103 | print "\n\n" if ($Show_Details && $Show_Headers); 104 | print "Inode\tPath Info\n" if ($Show_Headers); 105 | 106 | 107 | # %Max_Dir_Score is used as a secondary sort criteria in make_best_path(). 108 | # It's the highest heuristic score for a directory type entry for a given inode. 109 | my %Max_Dir_Score = (); 110 | foreach $inode (keys(%File_Name)) { 111 | my $best_dir = (sort { $File_Name{$inode}{$b} <=> $File_Name{$inode}{$a} } 112 | grep(m|/2/|, keys(%{$File_Name{$inode}})))[0]; 113 | $Max_Dir_Score{$inode} = $File_Name{$inode}{$best_dir}; 114 | } 115 | 116 | my %Paths = ( 2 => '' ); 117 | foreach $inode (sort {$a <=> $b} keys(%Parent_Inode)) { 118 | $Paths{$inode} = make_best_path($inode) unless (defined($Paths{$inode})); 119 | print "$inode\t$Paths{$inode}\n"; 120 | } 121 | 122 | ########################################################################################### 123 | ### 124 | ### Program ends. Subroutines below. 125 | ### 126 | ########################################################################################### 127 | 128 | 129 | sub make_best_path { 130 | my($inode) = @_; 131 | my($parent, $dirname, $type, $key); 132 | 133 | # If we didn't find a directory entry for this inode, give up 134 | return('???') if (!defined($File_Name{$inode})); 135 | 136 | # If we didn't find a "." link associated with this inode, 137 | # not a lot we can do. 138 | goto failout if (!defined($Parent_Inode{$inode})); 139 | 140 | # We may have found multiple directory entries that associate 141 | # different parent inodes with this inode. Pull out all of the 142 | # %File_Name entries for this inode which are directory entries 143 | # and which contain one of the possible parent inodes. 144 | # 145 | my @file_keys = (); 146 | foreach $parent (keys(%{$Parent_Inode{$inode}})) { 147 | push(@file_keys, grep(m|/2/$parent$|, keys(%{$File_Name{$inode}}))); 148 | } 149 | 150 | # Now take all of the @file_keys we pulled out above and march 151 | # through them in descending order by heuristic score. Recursively 152 | # call make_best_path() on the parent inode. If we get back a path 153 | # that goes to the root, then return that. Otherwise return the 154 | # first unrooted path we get. 155 | # 156 | my @paths = (); 157 | foreach $key (sort { $File_Name{$inode}{$b} <=> $File_Name{$inode}{$a} } @file_keys) { 158 | ($dirname, $type, $parent) = split('/', $key); 159 | 160 | $Paths{$parent} = make_best_path($parent) unless (defined($Paths{$parent})); 161 | 162 | return("$Paths{$parent}/$dirname") if ($Paths{$parent} =~ /^\//); 163 | push(@paths, "$Paths{$parent}/$dirname"); 164 | } 165 | return($paths[0]) if (scalar(@paths)); 166 | 167 | # If we get here then either we don't have a parent inode associated 168 | # with this inode, or we failed to get any @file_keys entries assocaited 169 | # with the known parent info we found. 170 | # 171 | failout: 172 | 173 | # Grab this directory type entry with the highest heuristic score 174 | # for this inode. Bail out if there aren't any directory type entries. 175 | $dirname = (sort { $File_Name{$inode}{$b} <=> $File_Name{$inode}{$a} } grep(m|/2/|, keys(%{$File_Name{$inode}})))[0]; 176 | return('???') unless (length($dirname)); 177 | 178 | # Bail out if we don't have any parent inode info for this inode. 179 | $dirname =~ s|/.*||; 180 | return("???/$dirname") if (!defined($Parent_Inode{$inode})); 181 | 182 | # Heuristically try to pick the best parent entry. 183 | # Recursively call make_best_path(). 184 | # Return whatever we get. 185 | # 186 | $parent = (sort { $Parent_Inode{$inode}{$b} <=> $Parent_Inode{$inode}{$a} || 187 | $Max_Dir_Score{$b} <=> $Max_Dir_Score{$a} } keys(%{$Parent_Inode{$inode}}))[0]; 188 | $Paths{$parent} = make_best_path($parent) unless (defined($Paths{$parent})); 189 | return("$Paths{$parent}/$dirname"); 190 | } 191 | 192 | 193 | 194 | # Inode 2 is the root directory. All other inodes <= 10 are reserved. 195 | # The entry length must be positive and <= the amount of data left. 196 | # It must also be a multiple of 4 bytes. 197 | # The name length must be positive and <= the remaining space in the entry. 198 | # Valid file type values range from 1-8 (8 is a Solaris Door). 199 | # Slashes and nulls are not allowed in file names. 200 | # 201 | sub valid_dir_values { 202 | my($buffer, $inode, $entry_len, $name_len, $file_type, $file_name) = @_; 203 | 204 | return(undef) unless ($inode == 2 || $inode > 10); 205 | return(undef) unless ($entry_len <= length($buffer) && $entry_len > 0 && !($entry_len % 4)); 206 | return(undef) unless ($name_len > 0 && $name_len <= ($entry_len - 8)); 207 | return(undef) unless ($file_type > 0 && $file_type < 9); 208 | return(undef) if ($file_name =~ /[\/\000]/); # '/' and null not allowed in file names 209 | return(1); 210 | } 211 | 212 | 213 | sub parse_dir_block { 214 | my($buffer) = @_; 215 | my $namelist = []; 216 | 217 | while (length($buffer)) { 218 | 219 | # Optomistically try to parse the next entry. 220 | # Bail out of we get invalid data. 221 | my($inode, $entry_len, $name_len, $file_type) = unpack("LSCC", $buffer); 222 | my $file_name = substr($buffer, 8, $name_len); 223 | return([]) unless (valid_dir_values($buffer, $inode, $entry_len, $name_len, $file_type, $file_name)); 224 | 225 | # Compute the offset of this entry and then carve the entry 226 | # out of $buffer, reducing the size of $buffer. 227 | my $offset = $Block_Size - length($buffer); 228 | my $this_entry = substr($buffer, 0, $entry_len, ''); 229 | 230 | # Add a record to the list of entries we've found so far. 231 | push(@{$namelist}, { 'filename' => $file_name, 232 | 'inode' => $inode, 233 | 'type' => $file_type, 234 | 'entrysize' => $entry_len, 235 | 'namesize' => $name_len, 236 | 'offset' => $offset }); 237 | 238 | # If there's enough slack space in this directory entry, 239 | # try seeing if there are any deleted directory entries. 240 | my $extra = $entry_len - $name_len - 8; 241 | if ($extra >= 12) { 242 | my $bytes_to_search = int($extra/4) * 4; 243 | my $carved = []; 244 | $carved = carve_deleted_entries(substr($this_entry, -$bytes_to_search), 245 | $offset + ($entry_len - $bytes_to_search)); 246 | push(@{$namelist}, @{$carved}) if (scalar(@{$carved})); 247 | } 248 | } 249 | 250 | return([]) unless (scalar(@{$namelist}) > 1); 251 | return($namelist); 252 | } 253 | 254 | 255 | sub carve_deleted_entries { 256 | my($buffer, $offset) = @_; 257 | my $namelist = []; 258 | 259 | while (length($buffer) >= 12) { 260 | 261 | # Optomistically try to carve out a directory entry. 262 | my($inode, $entry_len, $name_len, $file_type) = unpack("LSCC", $buffer); 263 | my $file_name = substr($buffer, 8, $name_len); 264 | 265 | # If the data we carved is not valid, advance 4 bytes 266 | # and try again (directory entries are 4 byte aligned). 267 | if (!valid_dir_values($buffer, $inode, $entry_len, $name_len, $file_type, $file_name)) { 268 | $buffer = substr($buffer, 4); 269 | $offset += 4; 270 | next; 271 | } 272 | 273 | # Good data? Make a record of what we found. 274 | push(@{$namelist}, { 'filename' => $file_name, 275 | 'inode' => $inode, 276 | 'type' => $file_type, 277 | 'entrysize' => $entry_len, 278 | 'namesize' => $name_len, 279 | 'offset' => $offset, 280 | 'carved' => 1}); 281 | 282 | # Advance to the next 4 byte aligned location and 283 | # start over again. 284 | # 285 | my $min_len = 8 + $name_len; 286 | my $remainder = $min_len % 4; 287 | $min_len += (4 - $remainder) if ($remainder); 288 | $buffer = substr($buffer, $min_len); 289 | $offset += $min_len; 290 | } 291 | 292 | return($namelist); 293 | } 294 | 295 | 296 | sub parse_htree_root { 297 | my($buffer) = @_; 298 | my $i; 299 | 300 | $buffer = substr($buffer, 24); # chop off "." and ".." entries 301 | my($reserved, $hash_type, $info_len, $depth, $flags, $max_entries, $num_entries) = unpack("LCCCCSS", $buffer); 302 | 303 | # Reserved bytes must be zero. 304 | # Hash type must be < 5. 305 | # Size of dx_entry records is always 8. 306 | # Unused flags field should be zero. 307 | # Can't have more than the stated max entries. 308 | return(undef) if ($reserved != 0 || $hash_type > 5 || $info_len != 8 || $flags != 0 || $num_entries > $max_entries); 309 | return(undef) unless ($max_entries == ($Block_Size - 32)/8); # not fully tested 310 | print STDERR "SHOOT!\n" if ($depth != 0); 311 | return(undef) if ($depth != 0); # TODO: deal with deep trees 312 | 313 | # Make sure that the block numbers in the htree hash array make sense. 314 | # 315 | # There are some shenanigans here. The block number for the "zero hash" 316 | # is at offset 36 from the front of the hash. Se we throw away the first 317 | # 32 bytes of the block (24 for the substr() above, and 8 more below). 318 | # This leaves 4 bytes of $max_entries and $num_entries before the zero 319 | # hash block number, which makes the unpack() in the loop below work correctly. 320 | # It's a hack. 321 | # 322 | $buffer = substr($buffer, 8); 323 | for ($i = 0; $i < $num_entries; $i++) { 324 | my($hash, $block) = unpack("LL", $buffer); 325 | return(undef) unless ($block <= $num_entries); 326 | $buffer = substr($buffer, 8); 327 | } 328 | 329 | return($num_entries); 330 | } 331 | 332 | 333 | sub output_entry_info { 334 | my($ref, $blocknum, $dot_inode, $dotdot_inode, $extra) = @_; 335 | 336 | my $allocated = defined($$ref{'carved'}) ? 'n' : 'y'; 337 | print join("\t", $blocknum, 338 | $$ref{'offset'}, 339 | $$ref{'filename'}, 340 | $$ref{'inode'}, 341 | $File_Type_Char{$$ref{'type'}}, 342 | $$ref{'entrysize'}, 343 | $$ref{'namesize'}, 344 | $allocated, 345 | $extra), "\n"; 346 | } 347 | --------------------------------------------------------------------------------