├── .gitattributes ├── .gitignore ├── README.md ├── compressed_payload.py ├── docs ├── fota.yml └── rofs_partition.yml ├── fota_payload.py ├── main.py ├── metadata_plist.py ├── plist_unarchiver.py ├── rofs.py ├── super_binary.py └── uarp_payload.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binary files 2 | *.bin 3 | FOTA* 4 | */files/* 5 | *.plist 6 | *.uarp 7 | 8 | # For testing purposes 9 | /firmware 10 | /extracted 11 | 12 | # Python-related 13 | *.pyc 14 | .idea 15 | venv 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # superbinary-parser 2 | 3 | Provides a few tools to manipulate a [SuperBinary](https://github.com/hack-different/apple-knowledge/blob/main/_docs/UARP_and_FOTA.md#uarp---universal-accessory-restore-protocol). 4 | 5 | This tool permits you to extract the various UARP (Universal Accessory Restore Protocol) payload types within SuperBinaries. 6 | SuperBinaries are used in various Apple accessories, including the AirPods Pro (2nd generation), Beats devices with a MediaTek chip and MagSafe accessories. 7 | On Beats devices with a MediaTek chip, it can also extract the [FOTA](https://github.com/hack-different/apple-knowledge/blob/main/_docs/UARP_and_FOTA.md#fota---firmware-over-the-air) (MediaTek OTA) 8 | in a SuperBinary container. 9 | 10 | > [!NOTE] 11 | > This tool aims to maintain compatability with the latest version of SuperBinary available. 12 | > 13 | > For example, with AirPods Pro (2nd generation), the version of SuperBinary increased from 2 to 3 14 | > in version 6.0. If parsing fails with a version 3 SuperBinary in the future, please file an issue! 15 | 16 | ## Installation 17 | This package has no further dependencies. 18 | 19 | Previously, a separate package was required for LZMA decompression with FOTA payloads. 20 | This requirement has been removed as the built-in Python `lzma` module functions. 21 | 22 | If your version of Python encounters an error while decompressing, 23 | please file an issue with your operating system and precise Python version (such as 3.13.1). 24 | 25 | ## Usage 26 | First, clone this repository. You can then `python3 main.py`: 27 | ``` 28 | > python3 main.py 29 | usage: main.py [-h] [--extract-payloads | --no-extract-payloads] [--decompress-fota | --no-decompress-fota] [--extract-rofs | --no-extract-rofs] [--decompress-payload-contents | --no-decompress-payload-contents] source output_dir 30 | main.py: error: the following arguments are required: source, output_dir 31 | ``` 32 | 33 | If you are parsing a device that does not have an FOTA, you can split the SuperBinary and its plist using this syntax: 34 | ``` 35 | > python3 main.py --extract-payloads source output_dir 36 | ``` 37 | 38 | On Beats devices with a MediaTek chip and an FOTA container (like the Beats Studio Buds), it is also possible to extract the firmware sounds from the Read Only File System (ROFS) using this syntax: 39 | ``` 40 | > python3 main.py --extract-payloads --decompress-fota --extract-rofs FirmwareUpdate.uarp output_dir 41 | ``` 42 | 43 | The script will then extract all assets, such as sounds, to the output direction. 44 | -------------------------------------------------------------------------------- /compressed_payload.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from enum import Enum 3 | from io import BytesIO 4 | import ctypes 5 | import struct 6 | import sys 7 | 8 | from uarp_payload import UarpPayload 9 | 10 | # TODO(spotlightishere): Replace libcompression.dylib with a cross-platform implementation 11 | if sys.platform == "darwin": 12 | libcompression = ctypes.CDLL("libcompression.dylib") 13 | else: 14 | # For now, we cannot use it. 15 | libcompression = None 16 | 17 | # Our header's length is 10 bytes in length. 18 | COMPRESSED_HEADER_LENGTH = 10 19 | 20 | 21 | class CompressionTypes(Enum): 22 | """Possible values for a payload's compression type.""" 23 | 24 | # Simply copies data - no compression is applied. 25 | PASSTHROUGH = 0 26 | # Internally known as "LZBitmapFast2" 27 | LZBITMAPFAST = 1 28 | # Internally known as "LZBitmap2" 29 | LZBITMAP = 2 30 | LZ4 = 3 31 | 32 | def get_compression_algorithm(self) -> int: 33 | """Returns a usable value for compression algorithms with libcompression.""" 34 | # TODO(spotlightishere): This should be removed once usage can be 35 | # migrated to a platform-agnostic library. 36 | if self == CompressionTypes.PASSTHROUGH: 37 | # This is a special, uncompressed type. 38 | # 39 | # If this is erroneously handled as compressed data, 40 | # libcompression should error elsewhere. 41 | return 0 42 | elif self == CompressionTypes.LZBITMAPFAST: 43 | # Undocumented compression type. 44 | # It's 0x100 less than its sibling, LZBITMAP. 45 | return 0x602 46 | elif self == CompressionTypes.LZBITMAP: 47 | # https://developer.apple.com/documentation/compression/compression_algorithm/compression_lzbitmap?language=objc 48 | return 0x702 49 | elif self == CompressionTypes.LZ4: 50 | # https://developer.apple.com/documentation/compression/compression_algorithm/compression_lz4?language=objc 51 | return 0x100 52 | else: 53 | raise AssertionError("Unknown compression type!") 54 | 55 | 56 | @dataclass 57 | class CompressedChunk(object): 58 | """Parses and decompresses a chunk of a compressed payload.""" 59 | 60 | # Compression type - the only observed value is LZBitmapFast2. 61 | # (However, this appears to match with CoreUARP's handling of such.) 62 | raw_compression_type: int 63 | 64 | # Offset of this chunk within the decompressed file. 65 | # We can't use this. 66 | decompressed_offset: int 67 | 68 | # Amount of compressed data within this chunk. 69 | compressed_length: int 70 | 71 | # Size of uncompressed data this chunk produces. 72 | # 73 | # This should match the standard block size; if not, 74 | # this is the final chunk to be decompressed. 75 | decompressed_length: int 76 | 77 | # Our raw data to decompress. 78 | compressed_data: bytes = field(repr=False) 79 | 80 | def __init__(self, raw_data: BytesIO): 81 | # Parse the current chunk's metadata. This is seemingly always big endian. 82 | ( 83 | raw_compression_type, 84 | self.decompressed_offset, 85 | self.compressed_length, 86 | self.decompressed_length, 87 | ) = struct.unpack_from(">HIHH", raw_data.read(COMPRESSED_HEADER_LENGTH)) 88 | self.compression_type = CompressionTypes(raw_compression_type) 89 | 90 | # Passthrough chunks must have the same length for compressed and decompressed data. 91 | if self.compression_type == CompressionTypes.PASSTHROUGH: 92 | assert ( 93 | self.compressed_length == self.decompressed_length 94 | ), "Invalid passthrough chunk lengths!" 95 | 96 | # Our raw data is immediately beyond our header. 97 | self.compressed_data = raw_data.read(self.compressed_length) 98 | 99 | def decompress(self) -> bytes: 100 | """Leverages libcompression from macOS to decompress contents.""" 101 | # If this is passthrough, we simply return our "compressed" data's length. 102 | if self.compression_type == CompressionTypes.PASSTHROUGH: 103 | return self.compressed_data 104 | 105 | # TODO(spotlightishere): Replace libcompression.dylib with a cross-platform implementation 106 | compression_algorithm = self.compression_type.get_compression_algorithm() 107 | 108 | # Otherwise, decompresss! 109 | decompressed_buf = ctypes.create_string_buffer(self.decompressed_length) 110 | buffer_size = libcompression.compression_decode_buffer( 111 | decompressed_buf, 112 | self.decompressed_length, 113 | self.compressed_data, 114 | self.compressed_length, 115 | None, 116 | compression_algorithm, 117 | ) 118 | return decompressed_buf[0:buffer_size] 119 | 120 | 121 | def decompress_payload_chunks(payload: UarpPayload) -> bytes: 122 | """Decompresses a compressed payload within a SuperBinary.""" 123 | 124 | data = BytesIO(payload.contents) 125 | decompressed_data = BytesIO() 126 | 127 | # We're not presented with the resulting size of this content. 128 | # As such, we'll need to iterate through this entire file, handling chunks as we go. 129 | while True: 130 | # Read and decompress the current chunk. 131 | current_chunk = CompressedChunk(data) 132 | decompressed = current_chunk.decompress() 133 | 134 | # Ensure we've fully decompressed this data as expected. 135 | expected_length = current_chunk.decompressed_length 136 | actual_length = len(decompressed) 137 | 138 | if expected_length != actual_length: 139 | # We need to step back by this current chunk's size in order to get its starting offset. 140 | chunk_offset = ( 141 | data.tell() - current_chunk.compressed_length - COMPRESSED_HEADER_LENGTH 142 | ) 143 | raise AssertionError( 144 | "Data did not fully decompress! " 145 | f"(chunk offset {chunk_offset}; expected {expected_length}, but only read {actual_length})" 146 | ) 147 | 148 | decompressed_data.write(decompressed) 149 | 150 | # If we have a block size that decompresses to less than the chunk size 151 | # as specified in metadata, then we've come to an end of our chunks. 152 | if len(decompressed) != payload.plist_metadata.compressed_chunk_size: 153 | break 154 | 155 | return decompressed_data.getvalue() 156 | -------------------------------------------------------------------------------- /docs/fota.yml: -------------------------------------------------------------------------------- 1 | meta: 2 | id: fota 3 | endian: le 4 | encoding: ascii 5 | seq: 6 | - id: header 7 | type: fota_header 8 | size: 4096 9 | # Rather, until EOF! 10 | - id: compressed_binary 11 | size: _io.size - _io.pos 12 | types: 13 | fota_header: 14 | seq: 15 | - id: rsa_signature_maybe 16 | size: 256 17 | - id: header_metadata 18 | type: tlv_entry 19 | repeat: until 20 | repeat-until: _.data_length == 0xFFFF 21 | 22 | ############# 23 | # TLV Types # 24 | ############# 25 | format_metadata: 26 | seq: 27 | # Observed values have been 0x0102. 28 | # This may mean there's a u1 with value 2, 29 | # indicating two payloads, 30 | # or perhaps something entirely different. 31 | # 32 | # For ease, let's assume they're a u2, 33 | # and that there are only two payload segments. 34 | - id: file_offset 35 | type: u2 36 | - id: metadata_size 37 | type: u4 38 | - id: payload_size 39 | type: u4 40 | 41 | segment_metadata: 42 | seq: 43 | - id: segment_count 44 | type: u4 45 | - id: segment_data 46 | type: segment_info 47 | repeat: expr 48 | repeat-expr: segment_count 49 | 50 | segment_info: 51 | seq: 52 | - id: payload_offset 53 | doc: |> 54 | Seemingly must be subtracted by 0x1000 - this presumably 55 | has contents decompressed within memory. 56 | type: u4 57 | - id: payload_length 58 | type: u4 59 | - id: unknown 60 | type: u4 61 | 62 | hashes: 63 | seq: 64 | - id: hash_count 65 | type: u4 66 | - id: hash 67 | size: 32 68 | repeat: expr 69 | repeat-expr: hash_count 70 | 71 | # Every TLV entry appears to only be valid 72 | # if its length is not 0xFFFF - there seems 73 | # to be no count of metadata entries present. 74 | tlv_entry: 75 | seq: 76 | - id: data_type 77 | type: u2 78 | enum: tlv_types 79 | - id: data_length 80 | type: u2 81 | - id: data 82 | size: data_length 83 | if: data_type != tlv_types::invalid_data 84 | type: 85 | switch-on: data_type 86 | cases: 87 | 'tlv_types::format_metadata': format_metadata 88 | 'tlv_types::segment_metadata': segment_metadata 89 | 'tlv_types::firmware_version': strz 90 | 'tlv_types::hashes': hashes 91 | 'tlv_types::chipset_name': str 92 | 'tlv_types::design_name': str 93 | 'tlv_types::unknown': u4 94 | enums: 95 | tlv_types: 96 | 0x11: format_metadata 97 | 0x12: segment_metadata 98 | 0x13: firmware_version 99 | 0x14: hashes 100 | 0x20: chipset_name 101 | 0x21: design_name 102 | # Perhaps a production flag? 103 | 0xf0: unknown 104 | # Kaitai won't let us drop as soon as we match 0xFFFF. 105 | # This is not a real type. 106 | 0xFFFF: invalid_data -------------------------------------------------------------------------------- /docs/rofs_partition.yml: -------------------------------------------------------------------------------- 1 | meta: 2 | id: rofs 3 | file-extension: bin 4 | endian: le 5 | 6 | seq: 7 | - id: magic 8 | contents: "ROFS" 9 | - id: file_size 10 | type: u4 11 | - id: file_size_again 12 | type: u4 13 | - id: amount_of_files 14 | type: u4 15 | - id: files 16 | type: file_entry 17 | repeat: expr 18 | repeat-expr: amount_of_files 19 | types: 20 | file_entry: 21 | seq: 22 | - id: file_index 23 | type: u1 24 | - id: metadata_length 25 | type: u2 26 | - id: padding 27 | type: u1 28 | doc: Consistent. 29 | - id: file_name 30 | type: strz 31 | encoding: ASCII 32 | size: 32 33 | - id: first_unknown_marker 34 | type: u4 35 | doc: Likely used to mark where file metadata begins. 36 | - id: file_offset 37 | type: u4 38 | - id: file_length 39 | type: u4 40 | - id: first_padding 41 | type: u8 42 | - id: second_unknown_marker 43 | type: u4 44 | - id: end_padding 45 | size: 12 46 | instances: 47 | content: 48 | pos: file_offset 49 | size: file_length -------------------------------------------------------------------------------- /fota_payload.py: -------------------------------------------------------------------------------- 1 | import io 2 | import lzma 3 | import struct 4 | from dataclasses import dataclass, field 5 | from enum import IntEnum 6 | 7 | 8 | class FotaMetadataType(IntEnum): 9 | """Known metadata types within a FOTA payload. This is not exhaustive.""" 10 | 11 | FORMAT_METADATA = 0x11 12 | SEGMENT_METADATA = 0x12 13 | FIRMWARE_VERSION = 0x13 14 | PARTITION_HASHES = 0x14 15 | CHIPSET_NAME = 0x20 16 | DESIGN_NAME = 0x21 17 | FLAG_UNKNOWN = 0xF0 18 | 19 | 20 | @dataclass 21 | class FotaUnknown: 22 | """An unknown TLV type.""" 23 | 24 | contents: bytes 25 | 26 | 27 | @dataclass 28 | class FotaString: 29 | """A C string with a fixed length.""" 30 | 31 | contents: str 32 | 33 | def __init__(self, binary_contents: bytes): 34 | self.contents = binary_contents.decode("utf-8") 35 | 36 | def __repr__(self): 37 | return self.contents 38 | 39 | 40 | @dataclass 41 | class FotaFormatMetadata(object): 42 | # Observed values have been 0x0102. 43 | # This may mean there's a u1 with value 2, 44 | # indicating two payloads, 45 | # or perhaps something entirely different. 46 | # 47 | # For ease, let's assume they're a u2, 48 | # and that there are only two payload segments. 49 | unknown: int = 0 50 | 51 | # The size of this metadata segment. 52 | # Observed values have been 0x1000 (4096 bytes). 53 | metadata_size: int = 0 54 | 55 | # The size of the compressed firmware segment. 56 | payload_size: int = 0 57 | 58 | def __init__(self, contents: io.BytesIO): 59 | (self.payload_offset, self.payload_length, self.unknown) = struct.unpack( 60 | " str: 79 | """Determines the name to save this UarpPayload with.""" 80 | 81 | if args.use_tag_name: 82 | payload_filename = f"{tag_name}.bin" 83 | else: 84 | # We want to leverage the payload's given filepath. 85 | # Ensure its parent directories exist. 86 | payload_filename = payload.plist_metadata.filepath 87 | 88 | return payload_filename 89 | 90 | 91 | # Write out payloads if desired. 92 | if args.extract_payloads: 93 | # Used to avoid conflicts in both tag names and fullpaths. 94 | seen_filenames: dict[str, int] = {} 95 | 96 | for payload in super_binary.payloads: 97 | tag_name = payload.get_tag() 98 | payload_name = payload.plist_metadata.long_name or "no payload description" 99 | payload_filename = get_payload_filename(payload) 100 | 101 | # Sometimes, tags have multiple payloads, and filepaths conflict. 102 | # Let's append a number for every occurrence. 103 | seen_count = seen_filenames.get(payload_filename) 104 | if seen_count is not None: 105 | # We have a tag! Increment its seen count. 106 | seen_filenames[payload_filename] += 1 107 | 108 | # Append the count at the end of the file. 109 | payload_filename = f"{payload_filename}.{seen_count}" 110 | else: 111 | seen_filenames[payload_filename] = 1 112 | 113 | print(f"Found {tag_name} ({payload_name})") 114 | print(f"Saving to {payload_filename}...") 115 | 116 | # Sometimes, this may be an absolute path. 117 | # For example, some filepaths start with `/Library` or `/tmp`. 118 | # Tags should (hopefully) never run in to this. 119 | # 120 | # Let's append `./` to the start to ensure relative resolution. 121 | payload_filename = f"./{payload_filename}" 122 | write_payload(payload_filename, payload.contents) 123 | 124 | # Lastly, write the SuperBinary plist. 125 | write_payload("SuperBinary.plist", super_binary.raw_plist_data) 126 | 127 | if args.decompress_fota: 128 | # Ensure we have a payload of this type. 129 | fota_payload = super_binary.get_tag(b"FOTA") 130 | if not fota_payload: 131 | print("Missing FOTA payload!") 132 | exit(1) 133 | 134 | fota = FotaPayload(fota_payload.contents) 135 | write_payload("FOTA.bin.lzma", fota.compressed) 136 | 137 | # Decompress payload. 138 | write_payload("FOTA", fota.decompressed) 139 | 140 | # Separate segments within. 141 | os.makedirs(payload_dir / "segments", exist_ok=True) 142 | for i, segment_contents in enumerate(fota.segments): 143 | # Each segment offset is 0x1000 ahead 144 | # as the decompressed portions likely 145 | # overwrites the compressed portion in memory. 146 | write_payload(f"segments/{i}.bin", segment_contents) 147 | 148 | print("Extracted FOTA payload!") 149 | 150 | if args.extract_rofs: 151 | rofs_partition = find_rofs(fota.segments) 152 | os.makedirs(payload_dir / "files", exist_ok=True) 153 | for file in rofs_partition.files: 154 | write_payload(f"files/{file.file_name}", file.contents) 155 | 156 | 157 | if args.decompress_payload_contents: 158 | for payload in super_binary.payloads: 159 | # The metadata plist present at the end of the SuperBinary 160 | # defines what segments are compressed. 161 | # For our purpose, any compressed segment has a `compressed_chunk_size` that is not None. 162 | chunk_size = payload.plist_metadata.compressed_chunk_size 163 | if not chunk_size: 164 | continue 165 | 166 | # TODO(spotlightishere): This should function on platforms beyond macOS. 167 | assert ( 168 | sys.platform == "darwin" 169 | ), "Decompression is not yet supported on this platform." 170 | 171 | print(f"Decompressing {payload.get_tag()}...") 172 | contents = decompress_payload_chunks(payload) 173 | write_payload(f"{payload.get_tag()}.decompressed.bin", contents) 174 | -------------------------------------------------------------------------------- /metadata_plist.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Optional 2 | 3 | from plist_unarchiver import SomewhatKeyedUnarchiver 4 | from dataclasses import dataclass 5 | 6 | 7 | @dataclass 8 | class UarpMetadata: 9 | # The raw dictionary of metadata available at a top level. 10 | # There are many keys we do not care about, 11 | # such as "Payload Certificate", "Payload Signature", etc. 12 | all_metadata: dict 13 | 14 | # The given "Payload Filepath" for this payload, typically 15 | # either a direct filename, or the build path. 16 | # Three prefixes have been observed: 17 | # - usr/local/standalone/firmware/[...] 18 | # - /Library/Caches/com.apple.xbs/[...] 19 | # - /tmp/[...] 20 | filepath: str 21 | 22 | # The specified "Payload Long Name" for this payload. 23 | # This may not be present in all payloads. 24 | long_name: Optional[str] 25 | 26 | # A dictionary of other metadata on this payload under "Payload MetaData". 27 | # Consider the following examples of possible keys: 28 | # - Payload Compression Algorithm, a string (e.g. LZBitmapFast2) 29 | # - Compose Measured Payloads, an array of personalization options 30 | # - Urgent Update, a boolean value 31 | # 32 | # These correspond to the dictionary of options present 33 | # within the top-level key "MetaData Values". 34 | payload_metadata: Union[str, dict, bool] 35 | 36 | # If this payload is compressed, this is the 37 | # raw chunk size of this compressed contents. 38 | # If the payload is not, this is None. 39 | compressed_chunk_size: Optional[int] = None 40 | 41 | def __init__(self, metadata: dict): 42 | self.all_metadata = metadata 43 | self.filepath = metadata.get("Payload Filepath") 44 | self.long_name = metadata.get("Payload Long Name") 45 | self.payload_metadata = metadata.get("Payload MetaData") 46 | 47 | if self.payload_metadata: 48 | chunk_size = self.payload_metadata.get("Payload Compression ChunkSize") 49 | self.compressed_chunk_size = chunk_size 50 | 51 | 52 | @dataclass 53 | class MetadataPlist(object): 54 | """A simple parser of the metadata plist embedded at the end of a SuperBinary. 55 | 56 | For now, its implementation simply obtains compressed payloads. 57 | It may be desirable to extend this in the future.""" 58 | 59 | # The raw, unarchived metadata. 60 | all_metadata: dict 61 | 62 | # Metadata for all payloads. 63 | payload_tags: [(bytes, UarpMetadata)] 64 | 65 | def __init__(self, plist_data: bytes): 66 | unarchiver = SomewhatKeyedUnarchiver(plist_data) 67 | self.all_metadata = unarchiver.unarchive_root_object() 68 | self.payload_tags = [] 69 | 70 | # Extract metadata for all payloads. 71 | # Some compressed payloads have the special key 72 | # "Payload Compression ChunkSize" within "Payload MetaData". 73 | payloads = self.all_metadata["SuperBinary Payloads"] 74 | for current_payload in payloads: 75 | payload_tag = bytes(current_payload["Payload 4CC"], "ascii") 76 | payload_metadata = UarpMetadata(current_payload) 77 | 78 | self.payload_tags.append((payload_tag, payload_metadata)) 79 | -------------------------------------------------------------------------------- /plist_unarchiver.py: -------------------------------------------------------------------------------- 1 | import plistlib 2 | from typing import Union 3 | 4 | 5 | class SomewhatKeyedUnarchiver(object): 6 | """A loose implementation of a NSKeyedUnarchiver. 7 | 8 | This is very loosely put together: 9 | Please do not consider this as a reference implementation! 10 | This project's primary focus is on SuperBinary parsing, not NSKeyedUnarchiver :)""" 11 | 12 | plist: dict 13 | 14 | def __init__(self, plist_data: bytes): 15 | self.plist = plistlib.loads(plist_data, fmt=plistlib.FMT_BINARY) 16 | 17 | # Sanity checks for our assumptions: 18 | assert self.plist["$archiver"] == "NSKeyedArchiver", "Unknown archive type!" 19 | assert self.plist["$version"] == 100000, "Unknown version!" 20 | 21 | def get_object(self, uid: plistlib.UID) -> any: 22 | """Returns a root object at the given index. This effectively resolves a UID.""" 23 | return self.plist["$objects"][uid] 24 | 25 | def get_class_name(self, current_object: dict) -> str: 26 | """Returns the class name for the given object.""" 27 | # This class's UID is present under the special "$class" key. 28 | # We can then look it up within the root "$objects" dictionary. 29 | class_uid = current_object["$class"] 30 | class_info = self.plist["$objects"][class_uid] 31 | 32 | # For our intents and purposes, we only need to care about 33 | # "$classname" within this class's info. 34 | return class_info["$classname"] 35 | 36 | def unarchive_root_object(self) -> dict: 37 | """Begins iterating through the root object, unarchiving accordingly.""" 38 | 39 | # As a special case: here, we begin recursing via the special key "$top", 40 | # in which we assume that it only has one object. 41 | # This _should_ be UID 1, but you never know. 42 | 43 | root_class_uid = self.plist["$top"]["root"] 44 | root_object = self.get_object(root_class_uid) 45 | return self.unarchive_object(root_object) 46 | 47 | def unarchive_object(self, current_object: Union[dict, list]) -> any: 48 | """Unarchives an object.""" 49 | object_class = self.get_class_name(current_object) 50 | 51 | # Ensure this is a class type we're familiar with. 52 | if object_class == "NSMutableDictionary" or object_class == "NSDictionary": 53 | return self.unarchive_dict(current_object) 54 | elif object_class == "NSMutableArray" or object_class == "NSArray": 55 | return self.unarchive_array(current_object) 56 | elif object_class == "NSMutableString" or object_class == "NSString": 57 | return self.unarchive_string(current_object) 58 | else: 59 | raise AssertionError(f'Unknown archived class type "{object_class}"!') 60 | 61 | def unarchive_dict(self, current_object: dict) -> dict: 62 | """Unarchives a NS(Mutable)Dictionary.""" 63 | # For a dictionary, we have "NS.keys" and "NS.objects". 64 | keys = current_object["NS.keys"] 65 | values = current_object["NS.objects"] 66 | 67 | assert len(keys) == len(values), "Invalid dictionary length!" 68 | 69 | # Let's transform our results to {key UID => object UID}. 70 | uid_mapping = dict(zip(keys, values)) 71 | 72 | # First, we'll resolve key names. They should all be strings. 73 | result = {} 74 | for key_uid, value_uid in uid_mapping.items(): 75 | # Obtaining the key's name is as simple as looking up its UID. 76 | key_name = self.get_object(key_uid) 77 | resolved_object = self.get_object(value_uid) 78 | 79 | # If an object's value is a dictionary/array, we assume they 80 | # are another object, and we unarchive them accordingly. 81 | # Otherwise, preserve as-is. 82 | if isinstance(resolved_object, dict) or isinstance(resolved_object, list): 83 | value_contents = self.unarchive_object(resolved_object) 84 | else: 85 | value_contents = resolved_object 86 | 87 | result[key_name] = value_contents 88 | 89 | return result 90 | 91 | def unarchive_array(self, array: dict) -> list: 92 | """Resolves a NS(Mutable)Array.""" 93 | result = [] 94 | 95 | # NSArrays simply contain an array of "NS.objects". 96 | # We can iterate and resolve. 97 | array_objects = array["NS.objects"] 98 | for value_uid in array_objects: 99 | resolved_object = self.get_object(value_uid) 100 | 101 | # If an object's value is a dictionary/array, we assume they 102 | # are another object, and we unarchive them accordingly. 103 | # Otherwise, preserve as-is. 104 | if isinstance(resolved_object, dict) or isinstance(resolved_object, list): 105 | value_contents = self.unarchive_object(resolved_object) 106 | else: 107 | value_contents = resolved_object 108 | 109 | result.append(value_contents) 110 | return result 111 | 112 | def unarchive_string(self, array: dict) -> str: 113 | """Resolves a NS(Mutable)String.""" 114 | 115 | # NSStrings simply contain their string value under "NS.string". 116 | return array["NS.string"] 117 | -------------------------------------------------------------------------------- /rofs.py: -------------------------------------------------------------------------------- 1 | import struct 2 | from dataclasses import dataclass, field 3 | from io import BytesIO 4 | 5 | 6 | @dataclass 7 | class ROFSFile(object): 8 | """A file within a ROFS partition.""" 9 | 10 | file_name: str 11 | contents: bytes = field(repr=False) 12 | 13 | 14 | class ROFS(object): 15 | """Simple class to parse contents within a ROFS partition.""" 16 | 17 | files: [ROFSFile] = [] 18 | 19 | def __init__(self, passed_data: bytes): 20 | # Create a buffer we can read against. 21 | data = BytesIO(passed_data) 22 | 23 | # Ensure the header is correct. 24 | magic, length_one, length_two, file_count = struct.unpack_from( 25 | "<4sIII", data.read(16) 26 | ) 27 | assert magic == b"ROFS", "Invalid ROFS magic!" 28 | assert length_one == length_two, "Invalid ROFS length!" 29 | assert file_count != 0, "Zero file partition detected!" 30 | 31 | # Begin parsing. 32 | for index in range(file_count): 33 | file_name, file_offset, file_length = struct.unpack_from( 34 | "<4x32s4xII24x", data.read(72) 35 | ) 36 | # Determine filename based on null terminator. 37 | file_name = file_name.split(b"\x00")[0] 38 | file_name = file_name.decode("utf-8") 39 | 40 | contents = passed_data[file_offset : file_offset + file_length] 41 | file = ROFSFile(file_name, contents) 42 | self.files.append(file) 43 | 44 | 45 | def find_rofs(segments: list[bytes]) -> [ROFS, None]: 46 | """Attempts to find the ROFS contents within the given segments.""" 47 | 48 | # Our ROFS segment should have its magic as its first four bytes. 49 | for current_segment in segments: 50 | try: 51 | return ROFS(current_segment) 52 | except AssertionError: 53 | # Hmm... we'll have to keep trying. 54 | continue 55 | 56 | raise AssertionError("Unable to find ROFS partition!") 57 | -------------------------------------------------------------------------------- /super_binary.py: -------------------------------------------------------------------------------- 1 | import io 2 | import struct 3 | from dataclasses import dataclass, field 4 | from typing import Optional 5 | 6 | from metadata_plist import MetadataPlist, UarpMetadata 7 | from uarp_payload import UarpPayload 8 | 9 | 10 | @dataclass 11 | class SuperBinary(object): 12 | """Simple wrapper to assist in parsing SuperBinary contents.""" 13 | 14 | # Only known version is 2. 15 | header_version: int 16 | # The length of the SuperBinary header (not including others). 17 | header_length: int 18 | # The size this SuperBinary payload spans. 19 | # Note that data may trail (i.e. the SuperBinary plist). 20 | binary_size: int 21 | # i.e. 100 in '100.7916.1052884864.1'. 22 | major_version: int 23 | # i.e. 7916 in '100.7916.1052884864.1'. 24 | minor_version: int 25 | # i.e. 1052884864 in '100.7916.1052884864.1'. 26 | release_version: int 27 | # i.e. 1 in '100.7916.1052884864.1'. 28 | build_version: int 29 | # Metadata size. 30 | metadata_length: int 31 | # Observed to be zero. 32 | metadata_offset: int 33 | # Payloads length 34 | row_length: int 35 | # Payloads offset 36 | row_offset: int 37 | 38 | # Payloads available within this binary. 39 | payloads: list[UarpPayload] 40 | # Trailing data past payload (i.e. SuperBinary plist). 41 | raw_plist_data: bytes = field(repr=False) 42 | # The unarchived, top-level SuperBinary plist. 43 | metadata: MetadataPlist 44 | 45 | def __init__(self, data: io.BufferedReader): 46 | self.payloads = [] 47 | 48 | # Ensure the version and size initially to ensure this file is correct. 49 | self.header_version, self.header_length = struct.unpack_from( 50 | ">II", data.read(8) 51 | ) 52 | assert self.header_version in [2, 3], "Unknown version of SuperBinary!" 53 | assert self.header_length == 0x2C, "Invalid header length for version!" 54 | 55 | # Load the remainder of the header. 56 | ( 57 | self.binary_size, 58 | self.major_version, 59 | self.minor_version, 60 | self.release_version, 61 | self.build_version, 62 | ) = struct.unpack_from(">IIIII", data.read(20)) 63 | 64 | # Next, load offsets and length information. 65 | ( 66 | self.metadata_offset, 67 | self.metadata_length, 68 | self.row_offset, 69 | self.row_length, 70 | ) = struct.unpack_from(">IIII", data.read(16)) 71 | 72 | # At this point, we have gone past the SuperBinary header (0x2c). 73 | # Our binary plist is at the end of our payload (`binary_size`). 74 | # Let's read it, and then jump back. 75 | data.seek(self.binary_size) 76 | self.raw_plist_data = data.read() 77 | 78 | # Unarchive the SuperBinary plist. 79 | self.metadata = MetadataPlist(self.raw_plist_data) 80 | 81 | # Jump back to where we left off, and finally extract actual payload metadata. 82 | data.seek(0x2C) 83 | # The observed tag size is 0x28, so we will assume that. 84 | # Please make an issue (or a PR) to change this logic in the future! 85 | queried_data = struct.unpack_from(">I", data.peek(4)) 86 | metadata_tag_size = queried_data[0] 87 | assert metadata_tag_size == 0x28, "Unknown metadata tag size!" 88 | row_count = self.row_length // metadata_tag_size 89 | 90 | # TODO(spotlightishere): Is there any condition 91 | # in which the length of the payload metadata array 92 | # will not match the count of actual payloads? 93 | assert row_count == len( 94 | self.metadata.payload_tags 95 | ), "Mismatched payload count between binary and metadata!" 96 | 97 | # Obtain the metadata for all possible payloads. 98 | for payload_num in range(row_count): 99 | # Determine the metadata offset for this payload. 100 | offset = self.header_length + (payload_num * metadata_tag_size) 101 | data.seek(offset) 102 | payload_metadata = data.read(metadata_tag_size) 103 | 104 | # This is a tuple with [tag, UarpMetadata]. 105 | plist_tuple = self.metadata.payload_tags[payload_num] 106 | payload = UarpPayload(payload_metadata, plist_tuple, data) 107 | self.payloads.append(payload) 108 | 109 | def get_tag(self, tag: bytes) -> Optional[UarpPayload]: 110 | """Returns the payload for the given tag. Returns None if not present.""" 111 | assert len(tag) == 4, "Invalid 4CC/magic passed!" 112 | for payload in self.payloads: 113 | if payload.tag == tag: 114 | return payload 115 | return None 116 | -------------------------------------------------------------------------------- /uarp_payload.py: -------------------------------------------------------------------------------- 1 | import io 2 | import struct 3 | from dataclasses import dataclass, field 4 | from metadata_plist import UarpMetadata 5 | 6 | 7 | @dataclass 8 | class UarpPayload(object): 9 | # The tag representing this payload, i.e. 'FOTA'. 10 | tag: bytes 11 | # i.e. 100 in '100.7916.1052884864.1'. 12 | major_version: int 13 | # i.e. 7916 in '100.7916.1052884864.1'. 14 | minor_version: int 15 | # i.e. 1052884864 in '100.7916.1052884864.1'. 16 | release_version: int 17 | # i.e. 1 in '100.7916.1052884864.1'. 18 | build_version: int 19 | # Metadata size. 20 | metadata_length: int 21 | # Observed to be zero. 22 | metadata_offset: int 23 | # Payloads length 24 | payloads_length: int 25 | # Payloads offset 26 | payloads_offset: int 27 | # Binary metadata held by the current payload. 28 | # Note that, in some firmware, it may be empty. 29 | metadata: bytes = field(repr=False) 30 | # Metadata specified for this payload within the SuperBinary plist. 31 | plist_metadata: UarpMetadata 32 | # The data represented by this payload. 33 | contents: bytes = field(repr=False) 34 | 35 | def __init__( 36 | self, 37 | header: bytes, 38 | plist_tuple: (bytes, UarpMetadata), 39 | data: io.BufferedReader, 40 | ): 41 | # Parse the metadata within header. 42 | ( 43 | metadata_tag_size, 44 | self.tag, 45 | self.major_version, 46 | self.minor_version, 47 | self.release_version, 48 | self.build_version, 49 | self.metadata_offset, 50 | self.metadata_length, 51 | self.payloads_offset, 52 | self.payloads_length, 53 | ) = struct.unpack_from(">I4sIIIIIIII", header) 54 | 55 | # Verify that our SuperBinary plist's tag 56 | # matches the obtained one above. 57 | (plist_tag, plist_metadata) = plist_tuple 58 | assert plist_tag == self.tag, "Mismatched tag between payload and metadata!" 59 | self.plist_metadata = plist_metadata 60 | 61 | # Obtain our metadata and payload. 62 | data.seek(self.metadata_offset) 63 | self.metadata = data.read(self.metadata_length) 64 | 65 | data.seek(self.payloads_offset) 66 | self.contents = data.read(self.payloads_length) 67 | 68 | def get_tag(self) -> str: 69 | """Returns a string with the given tag name.""" 70 | return self.tag.decode("utf-8") 71 | --------------------------------------------------------------------------------