├── LICENSE ├── README.md ├── setup.py └── src └── yaffshiv /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 devttys0 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | About 2 | ===== 3 | 4 | A simple YAFFS file system parser and extractor, written in Python. 5 | 6 | Features 7 | ======== 8 | 9 | * List and/or extract regular files, folders, symlinks, hard links, and special device files 10 | * Automatic detection and/or brute force of YAFFS build parameters (page size, spare size, endianess, etc) 11 | * Support for both big and little endian YAFFS file systems 12 | * Compatible with both Python2 and Python3 13 | 14 | Installation 15 | ============ 16 | 17 | Use the included `setup.py` script to install: 18 | 19 | ```bash 20 | $ python setup.py install 21 | ``` 22 | 23 | There are no required pre-requisites, besides Python itself. 24 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | from distutils.core import setup, Command 4 | from distutils.dir_util import remove_tree 5 | 6 | SCRIPT_NAME = "yaffshiv" 7 | 8 | class CleanCommand(Command): 9 | description = "Clean Python build directories" 10 | user_options = [] 11 | 12 | def initialize_options(self): 13 | pass 14 | 15 | def finalize_options(self): 16 | pass 17 | 18 | def run(self): 19 | try: 20 | remove_tree("build") 21 | except KeyboardInterrupt as e: 22 | raise e 23 | except Exception: 24 | pass 25 | 26 | try: 27 | remove_tree("dist") 28 | except KeyboardInterrupt as e: 29 | raise e 30 | except Exception: 31 | pass 32 | 33 | setup(name = SCRIPT_NAME, 34 | version = "0.1", 35 | description = "YAFFS extraction tool", 36 | author = "Craig Heffner", 37 | url = "https://github.com/devttys0/%s" % SCRIPT_NAME, 38 | requires = [], 39 | scripts = [os.path.join("src", SCRIPT_NAME)], 40 | cmdclass = {'clean' : CleanCommand} 41 | ) 42 | 43 | -------------------------------------------------------------------------------- /src/yaffshiv: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import sys 5 | import struct 6 | import string 7 | 8 | def is_safe_path(basedir, path): 9 | matchpath = os.path.realpath(path) 10 | return basedir == os.path.commonpath((basedir, matchpath)) 11 | 12 | 13 | class Compat(object): 14 | ''' 15 | Python2/3 compatability methods. 16 | ''' 17 | 18 | @staticmethod 19 | def str2bytes(s): 20 | if isinstance(s, str): 21 | return s.encode('latin-1') 22 | else: 23 | return s 24 | 25 | @staticmethod 26 | def iterator(d): 27 | if sys.version_info[0] > 2: 28 | return d.items() 29 | else: 30 | return d.iteritems() 31 | 32 | @staticmethod 33 | def has_key(d, k): 34 | if sys.version_info[0] > 2: 35 | return k in d 36 | else: 37 | return d.has_key(k) 38 | 39 | class YAFFSException(Exception): 40 | pass 41 | 42 | class YAFFSConfig(object): 43 | ''' 44 | Container class for storing global configuration data. 45 | Also includes methods for automatic detection of the 46 | YAFFS configuration settings required for proper file 47 | system extraction. 48 | ''' 49 | 50 | # These are signatures that identify the start of a spare data section, 51 | # and hence, the end of a page. If they can be identified, then we can 52 | # determine the page size. Take the following hexdump for example: 53 | # 54 | # 00000800 00 10 00 00 01 01 00 00 00 00 00 00 ff ff ff ff |................| 55 | # 00000810 03 00 00 00 01 01 00 00 ff ff 62 61 72 00 00 00 |..........bar...| 56 | # 00000820 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| 57 | # 58 | # The page ends (and spare data begins) at offset 0x800; note that it starts 59 | # with the bytes 0x00100000. These represent the object's chunk ID and are reliable 60 | # for the first YAFFS object entry. These would, of course, be byte swapped on 61 | # a big endian target. Further, if ECC was not used, there would be two additional 62 | # bytes (0xFFFF) in front of the 0x00100000, so these signatures can also be 63 | # used to detect if ECC is used or not. 64 | # 65 | # Note that this should work for a typical Linux YAFFS rootfs, but not for all 66 | # possible YAFFS file system images. 67 | SPARE_START_BIG_ENDIAN_ECC = b"\x00\x00\x10\x00" 68 | SPARE_START_BIG_ENDIAN_NO_ECC = b"\xFF\xFF\x00\x00\x10\x00" 69 | SPARE_START_LITTLE_ENDIAN_ECC = b"\x00\x10\x00\x00" 70 | SPARE_START_LITTLE_ENDIAN_NO_ECC = b"\xFF\xFF\x00\x10\x00\x00" 71 | 72 | def __init__(self, **kwargs): 73 | self.endianess = YAFFS.LITTLE_ENDIAN 74 | self.page_size = YAFFS.DEFAULT_PAGE_SIZE 75 | self.spare_size = YAFFS.DEFAULT_SPARE_SIZE 76 | self.ecclayout = True 77 | self.preserve_mode = True 78 | self.preserve_owner = False 79 | self.debug = False 80 | self.auto = False 81 | self.sample_data = None 82 | 83 | for (k, v) in Compat.iterator(kwargs): 84 | if v is not None: 85 | setattr(self, k, v) 86 | 87 | if self.auto and self.sample_data: 88 | self._auto_detect_settings() 89 | 90 | def print_settings(self): 91 | if self.endianess == YAFFS.LITTLE_ENDIAN: 92 | endian_str = "Little" 93 | else: 94 | endian_str = "Big" 95 | 96 | sys.stdout.write("Page size: %d\n" % self.page_size) 97 | sys.stdout.write("Spare size: %d\n" % self.spare_size) 98 | sys.stdout.write("ECC layout: %s\n" % self.ecclayout) 99 | sys.stdout.write("Endianess: %s\n\n" % endian_str) 100 | 101 | def _auto_detect_settings(self): 102 | ''' 103 | This method attempts to identify the page size, spare size, and ECC configuration 104 | for the provided sample data. There are various methods of doing this, but here we 105 | rely on signature based detection. The other method I've seen used is to see if 106 | the file is an even multiple of the page size plus the spare size. This method 107 | usually assumes that the spare size is 1/32nd of the page size (it doesn't have 108 | to be), and also assumes that there is no trailing data on the end of the file 109 | system (there very well may be if it's been pulled from a firmware update or a 110 | live system). 111 | 112 | The signature method works even if assumptions about the relationship between 113 | page size and spare size are violated (in practice they are), and also if we are 114 | fed a file that has trailing garbage data. It also allows us to detect the ECC 115 | configuration, which is important if you want your YAFFS parsing to actually work. 116 | ''' 117 | 118 | # Some tools assume that the spare size is 1/32nd of the page size. 119 | # For example, if your page size is 4096, then your spare size must be 128. 120 | # While this is the default for mkyaffs, you can mix and match, and in 121 | # practice, that is exactly what is seen in the wild. 122 | # 123 | # Thus, we keep a list of valid page sizes and spare sizes, but there 124 | # is no restriction on their pairing. 125 | valid_page_sizes = YAFFS.PAGE_SIZES + [-1] 126 | valid_spare_sizes = YAFFS.SPARE_SIZES 127 | 128 | # Spare data should start at the end of the page. Assuming that the page starts 129 | # at the beginning of the data blob we're working with (if it doesn't, nothing 130 | # is going to work correctly anyway), if we can identify where the spare data starts 131 | # then we know the page size. 132 | for page_size in valid_page_sizes: 133 | 134 | if page_size == -1: 135 | raise YAFFSException("Auto-detection failed: Could not locate start of spare data section.") 136 | 137 | # Matching the spare data signatures not only tells us the page size, but also 138 | # endianess and ECC layout as well! 139 | if self.sample_data[page_size:].startswith(self.SPARE_START_LITTLE_ENDIAN_ECC): 140 | self.page_size = page_size 141 | self.ecclayout = True 142 | self.endianess = YAFFS.LITTLE_ENDIAN 143 | break 144 | elif self.sample_data[page_size:].startswith(self.SPARE_START_LITTLE_ENDIAN_NO_ECC): 145 | self.page_size = page_size 146 | self.ecclayout = False 147 | self.endianess = YAFFS.LITTLE_ENDIAN 148 | break 149 | elif self.sample_data[page_size:].startswith(self.SPARE_START_BIG_ENDIAN_ECC): 150 | self.page_size = page_size 151 | self.ecclayout = True 152 | self.endianess = YAFFS.BIG_ENDIAN 153 | break 154 | elif self.sample_data[page_size:].startswith(self.SPARE_START_BIG_ENDIAN_NO_ECC): 155 | self.page_size = page_size 156 | self.ecclayout = False 157 | self.endianess = YAFFS.BIG_ENDIAN 158 | break 159 | 160 | # Now to try to identify the spare data size... 161 | try: 162 | # If not using the ECC layout, there are 2 extra bytes at the beginning of the 163 | # spare data block. Ignore them. 164 | if not self.ecclayout: 165 | offset = 6 166 | else: 167 | offset = 4 168 | 169 | # The spare data signature is built dynamically, as there are repeating data patterns 170 | # that we can match on to find where the spare data ends. Take this hexdump for example: 171 | # 172 | # 00000800 00 10 00 00 01 01 00 00 00 00 00 00 ff ff ff ff |................| 173 | # 00000810 03 00 00 00 01 01 00 00 ff ff 62 61 72 00 00 00 |..........bar...| 174 | # 00000820 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| 175 | # 176 | # The spare data starts at offset 0x800 and is 16 bytes in size. The next page data then 177 | # starts at offset 0x810. Not that the four bytes at 0x804 (in the spare data section) and 178 | # the four bytes at 0x814 (in the next page data section) are identical. This is because 179 | # the four bytes at offset 0x804 represent the object ID of the previous object, and the four 180 | # bytes at offset 0x814 represent the parent object ID of the next object. Also, the 181 | # four bytes in the page data are always followed by 0xFFFF, as those are the unused name 182 | # checksum bytes. 183 | # 184 | # Thus, the signature for identifying the next page section (and hence, the end of the 185 | # spare data section) becomes: [the 4 bytes starting at offset 0x804] + 0xFFFF 186 | # 187 | # Note that this requires at least one non-empty subdirectory; in practice, any Linux 188 | # file system should meet this requirement, but one could create a file system that 189 | # does not meet this requirement. 190 | spare_sig = self.sample_data[self.page_size+offset:self.page_size+offset+4] + b"\xFF\xFF" 191 | 192 | # Spare section ends 4 bytes before the spare_sig signature 193 | self.spare_size = self.sample_data[self.page_size:].index(spare_sig) - 4 194 | except Exception as e: 195 | raise YAFFSException("Auto-detection failed: Could not locate end of spare data section.") 196 | 197 | # Sanity check the spare size, make sure it looks legit 198 | if self.spare_size not in valid_spare_sizes: 199 | raise YAFFSException("Auto-detection failed: Detected an unlikely spare size: %d" % self.spare_size) 200 | 201 | class YAFFS(object): 202 | ''' 203 | Main YAFFS class; all other YAFFS classes are subclassed from this. 204 | It contains some basic definitions and methods used throughout the subclasses. 205 | ''' 206 | 207 | BIG_ENDIAN = ">" 208 | LITTLE_ENDIAN = "<" 209 | 210 | # Valid page and spare sizes 211 | PAGE_SIZES = [512, 1024, 2048, 4096, 8192, 16384] 212 | SPARE_SIZES = [16, 32, 64, 128, 256, 512] 213 | 214 | # These are the default values used by mkyaffs 215 | DEFAULT_PAGE_SIZE = 2048 216 | DEFAULT_SPARE_SIZE = 64 217 | 218 | # These assume non-unicode YAFFS name lengths 219 | # NOTE: In the YAFFS code YAFFS_MAX_NAME_LENGTH is #defined as 255. 220 | # Although it does not say so, from observation this length 221 | # must include the two (unused) name checksum bytes, and as 222 | # such, it is defined here as 253. 223 | YAFFS_MAX_NAME_LENGTH = 255 - 2 224 | YAFFS_MAX_ALIAS_LENGTH = 159 225 | 226 | # Object type IDs 227 | YAFFS_OBJECT_TYPE_UNKNOWN = 0 228 | YAFFS_OBJECT_TYPE_FILE = 1 229 | YAFFS_OBJECT_TYPE_SYMLINK = 2 230 | YAFFS_OBJECT_TYPE_DIRECTORY = 3 231 | YAFFS_OBJECT_TYPE_HARDLINK = 4 232 | YAFFS_OBJECT_TYPE_SPECIAL = 5 233 | 234 | # Special parent IDs 235 | YAFFS_OBJECTID_ROOT = 1 236 | YAFFS_OBJECTID_LOSTNFOUND = 2 237 | YAFFS_OBJECTID_UNLINKED = 3 238 | YAFFS_OBJECTID_DELETED = 4 239 | 240 | # These must be overidden with valid data by any subclass wishing 241 | # to use the read_long, read_short, read_next or read_block methods. 242 | # 243 | # data - The data that the subclass needs to be read/parsed. 244 | # offset - This is initialized to zero and auto-incremented by the read_next method. 245 | # Usually no need for subclasses to touch this unless they want to know how 246 | # far into the data they've read so far. 247 | # config - An instance of the YAFFSConfig class. 248 | data = b'' 249 | offset = 0 250 | config = None 251 | 252 | def dbg_write(self, msg): 253 | ''' 254 | Prints debug message if self.config.debug is True. 255 | ''' 256 | if self.config.debug: 257 | sys.stderr.write(msg) 258 | 259 | def read_long(self): 260 | ''' 261 | Reads 4 bytes from the current self.offset location inside of self.data. 262 | Returns those 4 bytes as an integer. 263 | Endianess is determined by self.config.endianess. 264 | Does not increment self.offset. 265 | ''' 266 | return struct.unpack("%sL" % self.config.endianess, self.data[self.offset:self.offset+4])[0] 267 | 268 | def read_short(self): 269 | ''' 270 | Reads 2 bytes from the current self.offset location inside of self.data. 271 | Returns those 4 bytes as an integer. 272 | Endianess is determined by self.config.endianess. 273 | Does not increment self.offset. 274 | ''' 275 | return struct.unpack("%sH" % self.config.endianess, self.data[self.offset:self.offset+2])[0] 276 | 277 | def read_next(self, size, raw=False): 278 | ''' 279 | Reads the next size bytes from self.data and increments self.offset by size. 280 | If size is 2 or 4, by default self.read_long or self.read_short will be called respectively, 281 | unless raw is set to True. 282 | ''' 283 | if size == 4 and not raw: 284 | val = self.read_long() 285 | elif size == 2 and not raw: 286 | val = self.read_short() 287 | else: 288 | val = self.data[self.offset:self.offset+size] 289 | 290 | self.offset += size 291 | return val 292 | 293 | def read_block(self): 294 | ''' 295 | Reads the next page of data from self.data, including the spare OOB data. 296 | Returns a tuple of (page_data, spare_data). 297 | The page and spare data sizes are determined by self.config.page_size and 298 | self.config.spare_size. 299 | ''' 300 | self.dbg_write("Reading page data from 0x%X - 0x%X\n" % (self.offset, self.offset+self.config.page_size)) 301 | page_data = self.read_next(self.config.page_size) 302 | 303 | self.dbg_write("Reading spare data from 0x%X - 0x%X\n" % (self.offset, self.offset+self.config.spare_size)) 304 | spare_data = self.read_next(self.config.spare_size) 305 | 306 | return (page_data, spare_data) 307 | 308 | def null_terminate_string(self, string): 309 | ''' 310 | Searches a string for the first null byte and terminates the 311 | string there. Returns the truncated string. 312 | ''' 313 | try: 314 | i = string.index(b'\x00') 315 | except Exception as e: 316 | i = len(string) 317 | 318 | return string[0:i] 319 | 320 | class YAFFSObjType(YAFFS): 321 | ''' 322 | YAFFS object type container. The object type is just a 4 byte identifier. 323 | ''' 324 | 325 | # Just maps object ID values to printable names, used by self.__str__ 326 | TYPE2STR = { 327 | YAFFS.YAFFS_OBJECT_TYPE_UNKNOWN : "YAFFS_OBJECT_TYPE_UNKNOWN", 328 | YAFFS.YAFFS_OBJECT_TYPE_FILE : "YAFFS_OBJECT_TYPE_FILE", 329 | YAFFS.YAFFS_OBJECT_TYPE_SYMLINK : "YAFFS_OBJECT_TYPE_SYMLINK", 330 | YAFFS.YAFFS_OBJECT_TYPE_DIRECTORY : "YAFFS_OBJECT_TYPE_DIRECTORY", 331 | YAFFS.YAFFS_OBJECT_TYPE_HARDLINK : "YAFFS_OBJECT_TYPE_HARDLINK", 332 | YAFFS.YAFFS_OBJECT_TYPE_SPECIAL : "YAFFS_OBJECT_TYPE_SPECIAL", 333 | } 334 | 335 | def __init__(self, data, config): 336 | ''' 337 | data - Raw 4 byte object type identifier data. 338 | config - An instance of YAFFSConfig. 339 | ''' 340 | self.data = data 341 | self.config = config 342 | self._type = self.read_next(4) 343 | 344 | if self._type not in self.TYPE2STR.keys(): 345 | raise YAFFSException("Invalid object type identifier: 0x%X!" % self._type) 346 | 347 | def __str__(self): 348 | return self.TYPE2STR[self._type] 349 | 350 | def __int__(self): 351 | return self._type 352 | 353 | def __get__(self, instance, owner): 354 | return self._type 355 | 356 | class YAFFSSpare(YAFFS): 357 | ''' 358 | Parses and stores relevant data from YAFFS spare data sections. 359 | Primarily important for retrieving each file object's ID. 360 | ''' 361 | 362 | def __init__(self, data, config): 363 | ''' 364 | data - Raw bytes of the spare OOB data. 365 | config - An instance of YAFFSConfig. 366 | ''' 367 | self.data = data 368 | self.config = config 369 | self.has_packed_data = None 370 | self.obj_id = None 371 | self.chunk_id = None 372 | self.parent_obj_id = None 373 | self.obj_type = None 374 | self.file_size = None 375 | 376 | # YAFFS images built without --yaffs-ecclayout have an extra two 377 | # bytes before the chunk ID. Possibly an unused CRC? 378 | if not self.config.ecclayout: 379 | junk = self.read_next(2) 380 | 381 | self.sequence_id = self.read_next(4) 382 | 383 | if self.sequence_id == 0xFFFFFFFF: 384 | raise YAFFSException("Bad spare data") 385 | 386 | self.obj_id = self.read_next(4) 387 | self.chunk_id = self.read_next(4) 388 | self.n_bytes = self.read_next(4) 389 | 390 | if self.chunk_id & 0x80000000: 391 | self.has_packed_data = True 392 | self.obj_type = struct.pack("> 28) 393 | self.obj_id = self.obj_id & ~(0x0f << 28) 394 | self.parent_obj_id = self.chunk_id & 0x0FFFFFFF 395 | self.chunk_id = 0 396 | self.file_size = self.n_bytes 397 | 398 | 399 | class YAFFSEntry(YAFFS): 400 | ''' 401 | Parses and stores information from each YAFFS object entry data structure. 402 | TODO: Implement as a ctypes Structure class? 403 | ''' 404 | 405 | def __init__(self, data, spare, config): 406 | ''' 407 | data - Page data, as returned by YAFFS.read_block. 408 | spare - Spare OOB data, as returned by YAFFS.read_block. 409 | config - An instance of YAFFSConfig. 410 | ''' 411 | self.data = data 412 | self.config = config 413 | # This is filled in later, by YAFFSParser.next_entry 414 | self.file_data = b'' 415 | 416 | # Pass the spare data to YAFFSSpare for processing. 417 | # Keep a copy of this object's ID, as parsed from the spare data, for convenience. 418 | try: 419 | self.spare = YAFFSSpare(spare, self.config) 420 | except YAFFSException as e: 421 | raise e 422 | self.yaffs_obj_id = self.spare.obj_id 423 | 424 | # Read in the first four bytes, which are the object type ID, 425 | # and pass them to YAFFSObjType for processing. 426 | obj_type_raw = self.read_next(4, raw=True) 427 | if self.spare.has_packed_data: 428 | if self.spare.obj_type: 429 | obj_type_raw = self.spare.obj_type 430 | else: 431 | raise YAFFSException("No obj_type in spare. Erased block, skipping!") 432 | #else: 433 | # raise YAFFSException("No packet tags found! Erased block, skipping!") 434 | 435 | if self.spare.chunk_id: 436 | raise YAFFSException("DATA page, skipping!") 437 | 438 | self.yaffs_obj_type = YAFFSObjType(obj_type_raw, self.config) 439 | # The object ID of this object's parent (e.g., the ID of the directory 440 | # that a file resides in). 441 | self.parent_obj_id = self.read_next(4) 442 | if self.spare.has_packed_data and self.spare.parent_obj_id: 443 | self.parent_obj_id = self.spare.parent_obj_id 444 | if self.parent_obj_id in (0, self.YAFFS_OBJECTID_LOSTNFOUND, 445 | self.YAFFS_OBJECTID_DELETED, self.YAFFS_OBJECTID_UNLINKED): 446 | self.dbg_write("Found deleted object, skipping!\n") 447 | raise YAFFSException("Found deleted object, skipping!") 448 | 449 | 450 | # File name and checksum (checksum no longer used in YAFFS) 451 | self.sum_no_longer_used = self.read_next(2) 452 | self.name = self.null_terminate_string(self.read_next(self.YAFFS_MAX_NAME_LENGTH+1)) 453 | 454 | # Should be 0xFFFFFFFF 455 | junk = self.read_next(4) 456 | 457 | # File mode and ownership info 458 | self.yst_mode = 0o7777 & self.read_next(4) 459 | self.yst_uid = self.read_next(4) 460 | self.yst_gid = self.read_next(4) 461 | 462 | # File timestamp info 463 | self.yst_atime = self.read_next(4) 464 | self.yst_mtime = self.read_next(4) 465 | self.yst_ctime = self.read_next(4) 466 | 467 | # Low 32 bits of file size 468 | self.file_size_low = self.read_next(4) 469 | 470 | # Used for hard links, specifies the object ID of the file to be hardlinked to. 471 | self.equiv_id = self.read_next(4) 472 | 473 | # Aliases are for symlinks only 474 | self.alias = self.null_terminate_string(self.read_next(self.YAFFS_MAX_ALIAS_LENGTH+1)) 475 | 476 | # Stuff for block and char devices (equivalent of stat.st_rdev in C) 477 | self.yst_rdev = self.read_next(4) 478 | 479 | # Appears to be for timestamp stuff for WinCE 480 | self.win_ctime_1 = self.read_next(4) 481 | self.win_ctime_2 = self.read_next(4) 482 | self.win_atime_1 = self.read_next(4) 483 | self.win_atime_2 = self.read_next(4) 484 | self.win_mtime_1 = self.read_next(4) 485 | self.win_mtime_2 = self.read_next(4) 486 | 487 | # The only thing this code uses from these entries is file_size_high (high 32 bits of 488 | # the file size). 489 | self.inband_shadowed_obj_id = self.read_next(4) 490 | self.inband_is_shrink = self.read_next(4) 491 | self.file_size_high = self.read_next(4) 492 | self.reserved = self.read_next(1) 493 | self.shadows_obj = self.read_next(4) 494 | self.is_shrink = self.read_next(4) 495 | 496 | # Calculate file size from file_size_low and file_size_high. 497 | # Both will be 0xFFFFFFFF if unused. 498 | if self.file_size_high != 0xFFFFFFFF: 499 | self.file_size = self.file_size_low | (self.file_size_high << 32) 500 | elif self.file_size_low != 0xFFFFFFFF: 501 | self.file_size = self.file_size_low 502 | else: 503 | self.file_size = 0 504 | 505 | if self.spare.has_packed_data and self.spare.file_size: 506 | self.file_size = self.spare.file_size 507 | 508 | class YAFFSParser(YAFFS): 509 | ''' 510 | Main YAFFS file system parser. Primary method is self.next_entry, which yields 511 | the next object entry in the file system. 512 | ''' 513 | 514 | def __init__(self, data, config): 515 | self.data = data 516 | self.data_len = len(data) 517 | self.config = config 518 | self.printset = set(Compat.str2bytes(string.printable)) 519 | 520 | def __enter__(self): 521 | return self 522 | 523 | def __exit__(self, a, b, c): 524 | return None 525 | 526 | class scanned_data(): 527 | def __init__(self): 528 | self.entries = [] 529 | self.spares = {} 530 | def append(self, data): 531 | if type(data) is YAFFSEntry: 532 | for entry in self.entries: 533 | if data.yaffs_obj_id == entry.yaffs_obj_id and \ 534 | data.spare.sequence_id >= entry.spare.sequence_id: 535 | self.entries.remove(entry) 536 | self.entries.append(data) 537 | if type(data) is YAFFSSpare: 538 | current = self.spares.get(data.obj_id, []) 539 | current.append(data) 540 | self.spares[data.obj_id] = current 541 | 542 | def sort_entries(self): 543 | self.entries.sort( 544 | key=lambda x: x.parent_obj_id if int(x.yaffs_obj_type) == YAFFS.YAFFS_OBJECT_TYPE_DIRECTORY else x.yaffs_obj_id 545 | ) 546 | 547 | def scan_fs(self): 548 | self.scanned_data = self.scanned_data() 549 | while self.offset < self.data_len: 550 | spare = None 551 | obj_hdr = None 552 | current_offset = self.offset 553 | (obj_hdr_data, obj_hdr_spare) = self.read_block() 554 | try: 555 | obj_hdr = YAFFSEntry(obj_hdr_data, obj_hdr_spare, self.config) 556 | except YAFFSException as e: 557 | self.dbg_write("YAFFSException: {}\n".format(e)) 558 | if not obj_hdr: 559 | try: 560 | obj_hdr = YAFFSSpare(obj_hdr_spare, self.config) 561 | except YAFFSException as e: 562 | self.dbg_write("YAFFSException: {}\n".format(e)) 563 | continue 564 | self.dbg_write("GOT OBJECT: {}\n".format(obj_hdr)) 565 | obj_hdr.data_offset = current_offset 566 | self.scanned_data.append(obj_hdr) 567 | self.scanned_data.sort_entries() 568 | self.dbg_write("FOUND {} FS objects\n".format(len(self.scanned_data.entries))) 569 | self.offset = 0 570 | return self.scanned_data 571 | 572 | def next_entry(self): 573 | ''' 574 | Yields the next object in the YAFFS file system (instance of YAFFSEntry) 575 | ''' 576 | for entry in self.scanned_data.entries: 577 | obj_hdr = entry 578 | self.dbg_write("GOT OBJECT: {}\n".format(obj_hdr)) 579 | self.offset = obj_hdr.data_offset 580 | 581 | # Sanity check the file name. This is done primarily for cases where there is trailing data 582 | # at the end of the YAFFS file system, so if we're processing bogus data then the file name 583 | # will likely be garbled. 584 | if obj_hdr.name: 585 | if not set(obj_hdr.name).issubset(self.printset): 586 | raise YAFFSException("Object ID #%d has a non-printable file name [%s]!\n" % (obj_hdr.yaffs_obj_id, obj_hdr.name)) 587 | 588 | # Read in the file data, one page at a time 589 | if obj_hdr.file_size > 0: 590 | 591 | # Sanity check the file size before reading it. Especially important if fed garbage data! 592 | if obj_hdr.file_size > (self.data_len - self.offset): 593 | raise YAFFSException("File size for file '%s' exceeds the end of the file system [0x%X]!\n" % (obj_hdr.name, 594 | obj_hdr.file_size)) 595 | 596 | bytes_remaining = obj_hdr.file_size 597 | 598 | # If a file ends in the middle of a page, which it most likely does, 599 | # then the page is padded out with 0xFF. Thus, it is safe to read data 600 | # one page at a time via self.read_block until all file data has been 601 | # read. 602 | file_chunk_id = 1 603 | while bytes_remaining: 604 | file_current_chunk = None 605 | file_seq = 0 606 | for en in self.scanned_data.spares.get(entry.yaffs_obj_id): 607 | if en.chunk_id == file_chunk_id and en.sequence_id >= file_seq: 608 | file_current_chunk = en 609 | file_seq = en.sequence_id 610 | self.dbg_write("Found candidate: \n") 611 | self.dbg_write("sequence: {}, obj_id: {}, chunk_id: {}, n_bytes: {}, parent_obj_id: {}, obj_type: {}, file_size: {}\n".format( 612 | en.sequence_id, en.obj_id, en.chunk_id, en.n_bytes, en.parent_obj_id, 613 | en.obj_type, en.file_size)) 614 | if not file_current_chunk: 615 | self.dbg_write("DID NOT FUND CHUNK FOR {} Read: {}, remaining: {} !!!\n".format( 616 | entry.name, len(obj_hdr.file_data), bytes_remaining)) 617 | break 618 | 619 | file_chunk_id += 1 620 | self.offset = file_current_chunk.data_offset 621 | (data, spare) = self.read_block() 622 | if len(data) < bytes_remaining: 623 | obj_hdr.file_data += data 624 | bytes_remaining -= len(data) 625 | else: 626 | obj_hdr.file_data += data[0:bytes_remaining] 627 | bytes_remaining = 0 628 | 629 | if obj_hdr.file_size > 0 and bytes_remaining: 630 | print("Not all chunks found for {}, skipping".format(obj_hdr.name)) 631 | continue 632 | 633 | yield obj_hdr 634 | 635 | class YAFFSExtractor(YAFFS): 636 | ''' 637 | Class for extracting information and data from a YAFFS file system. 638 | ''' 639 | 640 | def __init__(self, data, config): 641 | ''' 642 | data - Raw string containing YAFFS file system data. 643 | Trailing data is usually OK, but the first byte 644 | in data must be the beginning of the file system. 645 | config - An instance of YAFFSConfig. 646 | ''' 647 | self.file_paths = {} 648 | self.file_entries = {} 649 | self.data = data 650 | self.config = config 651 | 652 | def parse(self): 653 | ''' 654 | Parses the YAFFS file system, builds directory structures and stores file info / data. 655 | Must be called before all other methods in this class. 656 | ''' 657 | with YAFFSParser(self.data, self.config) as parser: 658 | parser.scan_fs() 659 | self.dbg_write("Scanned data:\n") 660 | for i in parser.scanned_data.entries: 661 | self.dbg_write("sequence: {}, obj_id: {}, chunk_id: {}, n_bytes: {}, parent_obj_id: {}, obj_type: {}, name: {}\n".format( 662 | i.spare.sequence_id, i.spare.obj_id, i.spare.chunk_id, i.file_size, i.parent_obj_id, 663 | i.spare.obj_type, i.name)) 664 | for dummy in parser.scanned_data.spares.values(): 665 | for i in dummy: 666 | self.dbg_write("sequence: {}, obj_id: {}, chunk_id: {}, n_bytes: {}, parent_obj_id: {}, obj_type: {}\n".format( 667 | i.sequence_id, i.obj_id, i.chunk_id, i.n_bytes, i.parent_obj_id, 668 | i.obj_type)) 669 | for entry in parser.next_entry(): 670 | 671 | # Figure out the full path of this file entry 672 | if Compat.has_key(self.file_paths, entry.parent_obj_id): 673 | path = os.path.join(self.file_paths[entry.parent_obj_id], entry.name) 674 | else: 675 | if entry.parent_obj_id != self.YAFFS_OBJECTID_ROOT: 676 | self.dbg_write("Warning: File %s is the child of an unknown parent object [%d]!\n" % (entry.name, 677 | entry.parent_obj_id)) 678 | path = os.path.join(b"lost+found", entry.name) 679 | else: 680 | path = entry.name 681 | 682 | # Store full file paths and entry data for later use 683 | self.file_paths[entry.yaffs_obj_id] = path 684 | self.file_entries[entry.yaffs_obj_id] = entry 685 | 686 | if self.config.debug: 687 | self._print_entry(entry) 688 | 689 | return len(self.file_entries) 690 | 691 | def _print_entry(self, entry): 692 | ''' 693 | Prints info about a specific file entry. 694 | ''' 695 | sys.stdout.write("###################################################\n") 696 | sys.stdout.write("File type: %s\n" % str(entry.yaffs_obj_type)) 697 | sys.stdout.write("File ID: %d\n" % entry.yaffs_obj_id) 698 | sys.stdout.write("File parent ID: %d\n" % entry.parent_obj_id) 699 | sys.stdout.write("File name: %s" % self.file_paths[entry.yaffs_obj_id]) 700 | if int(entry.yaffs_obj_type) == self.YAFFS_OBJECT_TYPE_SYMLINK: 701 | sys.stdout.write(" -> %s\n" % entry.alias) 702 | elif int(entry.yaffs_obj_type) == self.YAFFS_OBJECT_TYPE_HARDLINK: 703 | sys.stdout.write("\nPoints to file ID: %d\n" % entry.equiv_id) 704 | else: 705 | sys.stdout.write("\n") 706 | sys.stdout.write("File size: 0x%X\n" % entry.file_size) 707 | sys.stdout.write("File mode: %d\n" % entry.yst_mode) 708 | sys.stdout.write("File UID: %d\n" % entry.yst_uid) 709 | sys.stdout.write("File GID: %d\n" % entry.yst_gid) 710 | #sys.stdout.write("First bytes: %s\n" % entry.file_data[0:16]) 711 | sys.stdout.write("###################################################\n\n") 712 | 713 | 714 | def ls(self): 715 | ''' 716 | List info for all files in self.file_entries. 717 | ''' 718 | sys.stdout.write("\n") 719 | for (entry_id, entry) in Compat.iterator(self.file_entries): 720 | self._print_entry(entry) 721 | 722 | def _set_mode_owner(self, file_path, entry): 723 | ''' 724 | Conveniece wrapper for setting ownership and file permissions. 725 | ''' 726 | if self.config.preserve_mode: 727 | os.chmod(file_path, entry.yst_mode) 728 | if self.config.preserve_owner: 729 | os.chown(file_path, entry.yst_uid, entry.yst_gid) 730 | 731 | def extract(self, outdir): 732 | ''' 733 | Creates the outdir directory and extracts all files there. 734 | ''' 735 | dir_count = 0 736 | file_count = 0 737 | link_count = 0 738 | 739 | # Make it a bytes array for Python3 740 | outdir = Compat.str2bytes(outdir) 741 | 742 | # Create directories first, so that files can be written to them 743 | for (entry_id, file_path) in Compat.iterator(self.file_paths): 744 | entry = self.file_entries[entry_id] 745 | if file_path and int(entry.yaffs_obj_type) == self.YAFFS_OBJECT_TYPE_DIRECTORY: 746 | file_path = os.path.join(outdir, file_path) 747 | 748 | # Check the file name for possible path traversal attacks 749 | if not is_safe_path(outdir, file_path): 750 | sys.stderr.write("Warning: Refusing to create directory '%s': possible path traversal\n" % file_path) 751 | continue 752 | 753 | try: 754 | os.makedirs(file_path) 755 | self._set_mode_owner(file_path, entry) 756 | dir_count += 1 757 | except Exception as e: 758 | sys.stderr.write("WARNING: Failed to create directory '%s': %s\n" % (file_path, str(e))) 759 | 760 | # Create files, including special device files 761 | for (entry_id, file_path) in Compat.iterator(self.file_paths): 762 | if file_path: 763 | file_path = os.path.join(outdir, file_path) 764 | 765 | # Check the file name for possible path traversal attacks 766 | if not is_safe_path(outdir, file_path): 767 | sys.stderr.write("Warning: Refusing to create file '%s': possible path traversal\n" % file_path) 768 | continue 769 | 770 | entry = self.file_entries[entry_id] 771 | if int(entry.yaffs_obj_type) == self.YAFFS_OBJECT_TYPE_FILE: 772 | try: 773 | with open(file_path, 'wb') as fp: 774 | fp.write(self.file_entries[entry_id].file_data) 775 | self._set_mode_owner(file_path, entry) 776 | file_count += 1 777 | except Exception as e: 778 | sys.stderr.write("WARNING: Failed to create file '%s': %s\n" % (file_path, str(e))) 779 | elif int(entry.yaffs_obj_type) == self.YAFFS_OBJECT_TYPE_SPECIAL: 780 | try: 781 | os.mknod(file_path, entry.yst_mode, entry.yst_rdev) 782 | file_count += 1 783 | except Exception as e: 784 | sys.stderr.write("Failed to create special device file '%s': %s\n" % (file_path, str(e))) 785 | 786 | 787 | # Create hard/sym links 788 | for (entry_id, file_path) in Compat.iterator(self.file_paths): 789 | entry = self.file_entries[entry_id] 790 | 791 | if file_path: 792 | dst = os.path.join(outdir, file_path) 793 | # Check the file name for possible path traversal attacks 794 | if not is_safe_path(outdir, dst): 795 | sys.stderr.write("Warning: Refusing to create link file '%s': possible path traversal\n" % file_path) 796 | continue 797 | 798 | if int(entry.yaffs_obj_type) == self.YAFFS_OBJECT_TYPE_SYMLINK: 799 | src = entry.alias 800 | try: 801 | os.symlink(src, dst) 802 | link_count += 1 803 | except Exception as e: 804 | sys.stderr.write("WARNING: Failed to create symlink '%s' -> '%s': %s\n" % (dst, src, str(e))) 805 | elif int(entry.yaffs_obj_type) == self.YAFFS_OBJECT_TYPE_HARDLINK: 806 | src = os.path.join(outdir, self.file_paths[entry.equiv_id]) 807 | try: 808 | os.link(src, dst) 809 | link_count += 1 810 | except Exception as e: 811 | sys.stderr.write("WARNING: Failed to create hard link '%s' -> '%s': %s\n" % (dst, src, str(e))) 812 | 813 | return (dir_count, file_count, link_count) 814 | 815 | 816 | def parse_yaffs(fs): 817 | ''' 818 | Attempts to parse the file system via the provided YAFFSExtractor instance. 819 | Returns True if no errors were encountered during process, else returns False. 820 | ''' 821 | success = True 822 | 823 | # If something is going to go wrong, it will most likely be during the parsing stage 824 | try: 825 | fs.dbg_write("Parsing YAFFS objects...\n") 826 | fs.parse() 827 | fs.dbg_write("Parsed %d objects\n" % len(fs.file_entries)) 828 | except Exception as e: 829 | fs.dbg_write("File system parsing failed: %s\n" % str(e)) 830 | success = False 831 | 832 | return success 833 | 834 | def main(): 835 | from getopt import GetoptError, getopt 836 | 837 | page_size = None 838 | spare_size = None 839 | endianess = None 840 | ecclayout = None 841 | preserve_mode = None 842 | preserve_owner = None 843 | debug = None 844 | auto_detect = None 845 | in_file = None 846 | out_dir = None 847 | list_files = False 848 | brute_force = False 849 | fs = None 850 | config = None 851 | 852 | try: 853 | (opts, args) = getopt(sys.argv[1:], "f:d:p:s:e:c:oaDlb", ["file=", 854 | "dir=", 855 | "page-size=", 856 | "spare-size=", 857 | "endianess=", 858 | "no-ecc", 859 | "ownership", 860 | "debug", 861 | "ls", 862 | "brute-force", 863 | "auto"]) 864 | except GetoptError as e: 865 | sys.stderr.write(str(e) + "\n") 866 | sys.stderr.write("\nUsage: %s [OPTIONS]\n\n" % sys.argv[0]) 867 | sys.stderr.write(" -f, --file= YAFFS input file *\n") 868 | sys.stderr.write(" -d, --dir= Extract YAFFS files to this directory **\n") 869 | sys.stderr.write(" -p, --page-size= YAFFS page size [default: 2048]\n") 870 | sys.stderr.write(" -s, --spare-size= YAFFS spare size [default: 64]\n") 871 | sys.stderr.write(" -e, --endianess= Set input file endianess [default: little]\n") 872 | sys.stderr.write(" -n, --no-ecc Don't use the YAFFS oob scheme [default: use the oob scheme]\n") 873 | sys.stderr.write(" -a, --auto Attempt to auto detect page size, spare size, ECC, and endianess settings [default: False]\n") 874 | sys.stderr.write(" -b, --brute-force Attempt all combinations of page size, spare size, ECC, and endianess [default: False]\n") 875 | sys.stderr.write(" -o, --ownership Preserve original ownership of extracted files [default: False]\n") 876 | sys.stderr.write(" -l, --ls List file system contents [default: False]\n") 877 | sys.stderr.write(" -D, --debug Enable verbose debug output [default: False]\n\n") 878 | sys.stderr.write("* = Required argument\n") 879 | sys.stderr.write("** = Required argument, unless --ls is specified\n\n") 880 | sys.exit(1) 881 | 882 | for (opt, arg) in opts: 883 | if opt in ["-f", "--file"]: 884 | in_file = arg 885 | elif opt in ["-d", "--dir"]: 886 | out_dir = arg 887 | elif opt in["-l", "--ls"]: 888 | list_files = True 889 | elif opt in ["-a", "--auto"]: 890 | auto_detect = True 891 | elif opt in ["-b", "--brute-force"]: 892 | brute_force = True 893 | elif opt in ["-n", "--no-ecc"]: 894 | ecclayout = False 895 | elif opt in ["-e", "--endianess"]: 896 | if arg.lower()[0] == 'b': 897 | endianess = YAFFS.BIG_ENDIAN 898 | else: 899 | endianess = YAFFS.LITTLE_ENDIAN 900 | elif opt in ["-s", "--spare-size"]: 901 | spare_size = int(arg) 902 | elif opt in ["-p", "--page-size"]: 903 | page_size = int(arg) 904 | elif opt in ["-o", "--ownership"]: 905 | preserve_owner = True 906 | elif opt in ["-D", "--debug"]: 907 | debug = True 908 | 909 | if not in_file or (not out_dir and not list_files): 910 | sys.stderr.write("Error: Missing required arguments! Try --help.\n") 911 | sys.exit(1) 912 | 913 | if out_dir: 914 | try: 915 | os.makedirs(out_dir, exist_ok=True) 916 | except Exception as e: 917 | sys.stderr.write("Failed to create output directory: %s\n" % str(e)) 918 | sys.exit(1) 919 | 920 | try: 921 | with open(in_file, 'rb') as fp: 922 | data = fp.read() 923 | except Exception as e: 924 | sys.stderr.write("Failed to open file '%s': %s\n" % (in_file, str(e))) 925 | sys.exit(1) 926 | 927 | if auto_detect: 928 | try: 929 | # First 10K of data should be more than enough to detect the YAFFS settings 930 | config = YAFFSConfig(auto=True, 931 | sample_data=data[0:10240], 932 | preserve_mode=preserve_mode, 933 | preserve_owner=preserve_owner, 934 | debug=debug) 935 | except YAFFSException as e: 936 | sys.stderr.write(str(e) + "\n") 937 | config = None 938 | 939 | if config is None: 940 | config = YAFFSConfig(page_size=page_size, 941 | spare_size=spare_size, 942 | endianess=endianess, 943 | ecclayout=ecclayout, 944 | preserve_mode=preserve_mode, 945 | preserve_owner=preserve_owner, 946 | debug=debug) 947 | 948 | # Try auto-detected / manual / default settings first. 949 | # If those work without errors, then assume they are correct. 950 | fs = YAFFSExtractor(data, config) 951 | # If there were errors in parse_yaffs, and brute forcing is enabled, loop 952 | # through all possible configuration combinations looking for the one 953 | # combination that produces the most successfully parsed object entries. 954 | if not parse_yaffs(fs) and brute_force: 955 | for endianess in [YAFFS.LITTLE_ENDIAN, YAFFS.BIG_ENDIAN]: 956 | for ecclayout in [True, False]: 957 | for page_size in YAFFS.PAGE_SIZES: 958 | for spare_size in YAFFS.SPARE_SIZES: 959 | 960 | # This wouldn't make sense... 961 | if spare_size > page_size: 962 | continue 963 | 964 | config = YAFFSConfig(page_size=page_size, 965 | spare_size=spare_size, 966 | endianess=endianess, 967 | ecclayout=ecclayout, 968 | preserve_mode=preserve_mode, 969 | preserve_owner=preserve_owner, 970 | debug=debug) 971 | 972 | tmp_fs = YAFFSExtractor(data, config) 973 | parse_yaffs(tmp_fs) 974 | if len(tmp_fs.file_entries) > len(fs.file_entries): 975 | fs = tmp_fs 976 | 977 | if fs is None: 978 | sys.stdout.write("File system parsing failed, quitting...\n") 979 | return 1 980 | else: 981 | sys.stdout.write("Found %d file objects with the following YAFFS settings:\n" % len(fs.file_entries)) 982 | fs.config.print_settings() 983 | 984 | if list_files: 985 | fs.ls() 986 | 987 | if out_dir: 988 | sys.stdout.write("Extracting file objects...\n") 989 | (dc, fc, lc) = fs.extract(out_dir) 990 | sys.stdout.write("Created %d directories, %d files, and %d links.\n" % (dc, fc, lc)) 991 | 992 | return 0 993 | 994 | if __name__ == "__main__": 995 | sys.exit(main()) 996 | 997 | --------------------------------------------------------------------------------