├── .github └── FUNDING.yml ├── .gitignore ├── LICENSE.md ├── README.md ├── dsstore.py ├── main.py └── samples └── .DS_Store.ctf /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: gehaxelt 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/python 3 | 4 | ### Python ### 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | .pytest_cache/ 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule.* 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # Environments 83 | .env 84 | .venv 85 | env/ 86 | venv/ 87 | ENV/ 88 | env.bak/ 89 | venv.bak/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | .spyproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | 98 | # mkdocs documentation 99 | /site 100 | 101 | # mypy 102 | .mypy_cache/ 103 | 104 | 105 | # End of https://www.gitignore.io/api/python 106 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 Sebastian Neef 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python .DS_Store parser 2 | 3 | This repository contains a parser for Apple's `.DS_Store` file format. 4 | 5 | A sample file form a CTF is included in the `./samples/` directory and the you can try the parser using `python3 main.py ./samples/.DS_Store.ctf`. 6 | 7 | Here's my blogpost that tries to explain the structure and format in detail: https://0day.work/parsing-the-ds_store-file-format/ 8 | 9 | # Usage 10 | 11 | ``` 12 | $ python main.py samples/.DS_Store.ctf 13 | Count: 6 14 | favicon.ico 15 | flag 16 | static 17 | templates 18 | vulnerable.py 19 | vulnerable.wsgi 20 | ``` 21 | 22 | # Useful ressources 23 | 24 | I found the following links to be quite helpful while developing the parser: 25 | 26 | - https://wiki.mozilla.org/DS_Store_File_Format 27 | - http://search.cpan.org/~wiml/Mac-Finder-DSStore/DSStoreFormat.pod 28 | - https://digi.ninja/projects/fdb.php 29 | 30 | # License 31 | 32 | MIT - See License.md 33 | -------------------------------------------------------------------------------- /dsstore.py: -------------------------------------------------------------------------------- 1 | import struct 2 | 3 | class ParsingError(Exception): pass 4 | 5 | class DataBlock(object): 6 | """ 7 | Class for a basic DataBlock inside of the DS_Store format. 8 | """ 9 | def __init__(self, data, debug=False): 10 | super(DataBlock, self).__init__() 11 | self.data = data 12 | self.pos = 0 13 | self.debug = debug 14 | 15 | def offset_read(self, length, offset=None): 16 | """ 17 | Returns an byte array of length from data at the given offset or pos. 18 | If no offset is given, pos will be increased by length. 19 | Throws ParsingError if offset+length > len(self.data) 20 | """ 21 | if not offset: 22 | offset_position = self.pos 23 | else: 24 | offset_position = offset 25 | 26 | if len(self.data) < offset_position+length: 27 | raise ParsingError("Offset+Length > len(self.data)") 28 | 29 | if not offset: 30 | self.pos += length 31 | 32 | value = self.data[offset_position:offset_position+length] 33 | self._log("Reading: {}-{} => {}".format(hex(offset_position), hex(offset_position+length), value)) 34 | return value 35 | 36 | def skip(self, length): 37 | """ 38 | Increases pos by length without reading data! 39 | """ 40 | self.pos += length 41 | 42 | def read_filename(self): 43 | """ 44 | Extracts a file name from the current position. 45 | """ 46 | # The length of the file name in bytes. 47 | length, = struct.unpack_from(">I", self.offset_read(4)) 48 | # The file name in UTF-16, which is two bytes per character. 49 | filename = self.offset_read(2 * length).decode("utf-16be") 50 | # A structure ID that I haven't found any use of. 51 | structure_id, = struct.unpack_from(">I", self.offset_read(4)) 52 | # Now read the structure type as a string of four characters and decode it to ascii. 53 | structure_type, = struct.unpack_from(">4s", self.offset_read(4)) 54 | 55 | structure_type = structure_type.decode() 56 | self._log("Structure type ", structure_type) 57 | # If we don't find a match, skip stays < 0 and we will do some magic to find the right skip due to somehow broken .DS_Store files.. 58 | skip = -1 59 | # Source: http://search.cpan.org/~wiml/Mac-Finder-DSStore/DSStoreFormat.pod 60 | while skip < 0: 61 | if structure_type == "bool": 62 | skip = 1 63 | elif structure_type == "type" or structure_type == "long" or structure_type == "shor" or structure_type == "fwsw" or structure_type == "fwvh" or structure_type == "icvt" or structure_type == "lsvt" or structure_type == "vSrn" or structure_type == "vstl": 64 | skip = 4 65 | elif structure_type == "comp" or structure_type == "dutc" or structure_type == "icgo" or structure_type == "icsp" or structure_type == "logS" or structure_type == "lg1S" or structure_type == "lssp" or structure_type == "modD" or structure_type == "moDD" or structure_type == "phyS" or structure_type == "ph1S": 66 | skip = 8 67 | elif structure_type == "blob": 68 | blen, = struct.unpack_from(">I", self.offset_read(4)) 69 | skip = blen 70 | elif structure_type == "ustr" or structure_type == "cmmt" or structure_type == "extn" or structure_type == "GRP0": 71 | blen, = struct.unpack_from(">I", self.offset_read(4)) 72 | skip = 2* blen 73 | elif structure_type == "BKGD": 74 | skip = 12 75 | elif structure_type == "ICVO" or structure_type == "LSVO" or structure_type == "dscl": 76 | skip = 1 77 | elif structure_type == "Iloc" or structure_type == "fwi0": 78 | skip = 16 79 | elif structure_type == "dilc": 80 | skip = 32 81 | elif structure_type == "lsvo": 82 | skip = 76 83 | elif structure_type == "icvo": 84 | pass 85 | elif structure_type == "info": 86 | pass 87 | else: 88 | pass 89 | 90 | if skip <= 0: 91 | # We somehow didn't find a matching type. Maybe this file name's length value is broken. Try to fix it! 92 | # This is a bit voodoo and probably not the nicest way. Beware, there by dragons! 93 | self._log("Re-reading!") 94 | # Rewind 8 bytes, so that we can re-read structure_id and structure_type 95 | self.skip(-1 * 2 * 0x4) 96 | filename += self.offset_read(0x2).decode("utf-16be") 97 | # re-read structure_id and structure_type 98 | structure_id, = struct.unpack_from(">I", self.offset_read(4)) 99 | structure_type, = struct.unpack_from(">4s", self.offset_read(4)) 100 | structure_type = structure_type.decode() 101 | # Look-ahead and check if we have structure_type==Iloc followed by blob. 102 | # If so, we're interested in blob, not Iloc. Otherwise continue! 103 | future_structure_type = struct.unpack_from(">4s", self.offset_read(4, offset=self.pos)) 104 | self._log("Re-read structure_id {} / structure_type {}".format(structure_id, structure_type)) 105 | if structure_type != "blob" and future_structure_type != "blob": 106 | structure_type = "" 107 | self._log("Forcing another round!") 108 | 109 | 110 | # Skip bytes until the next (file name) block 111 | self.skip(skip) 112 | self._log("Filename {}".format(filename)) 113 | return filename 114 | 115 | def _log(self, *args): 116 | if self.debug: 117 | print("[DEBUG] {}".format(*args)) 118 | 119 | class DS_Store(DataBlock, object): 120 | """ 121 | Represents the .DS_Store file from the given binary data. 122 | """ 123 | def __init__(self, data, debug=False): 124 | super(DS_Store, self).__init__(data, debug) 125 | self.data = data 126 | self.root = self.__read_header() 127 | self.offsets = self.__read_offsets() 128 | self.toc = self.__read_TOC() 129 | self.freeList = self.__read_freelist() 130 | self.debug = debug 131 | 132 | def __read_header(self): 133 | """ 134 | Checks if self.data is actually a .DS_Store file by checking the magic bytes. 135 | It returns the file's root block. 136 | """ 137 | # We read at least 32+4 bytes for the header! 138 | if len(self.data) < 36: 139 | raise ParsingError("Length of data is too short!") 140 | 141 | # Check the magic bytes for .DS_Store 142 | magic1, magic2 = struct.unpack_from(">II", self.offset_read(2*4)) 143 | if not magic1 == 0x1 and not magic2 == 0x42756431: 144 | raise ParsingError("Magic byte 1 does not match!") 145 | 146 | # After the magic bytes, the offset follows two times with block's size in between. 147 | # Both offsets have to match and are the starting point of the root block 148 | offset, size, offset2 = struct.unpack_from(">III", self.offset_read(3*4)) 149 | self._log("Offset 1: {}".format(offset)) 150 | self._log("Size: {}".format(size)) 151 | self._log("Offset 2: {}".format(offset2)) 152 | if not offset == offset2: 153 | raise ParsingError("Offsets do not match!") 154 | # Skip 16 bytes of unknown data... 155 | self.skip(4*4) 156 | 157 | return DataBlock(self.offset_read(size, offset+4), debug=self.debug) 158 | 159 | def __read_offsets(self): 160 | """ 161 | Reads the offsets which follow the header. 162 | """ 163 | start_pos = self.root.pos 164 | # First get the number of offsets in this file. 165 | count, = struct.unpack_from(">I", self.root.offset_read(4)) 166 | self._log("Offset count: {}".format(count)) 167 | # Always appears to be zero! 168 | self.root.skip(4) 169 | 170 | # Iterate over the offsets and get the offset addresses. 171 | offsets = [] 172 | for i in range(count): 173 | # Address of the offset. 174 | address, = struct.unpack_from(">I", self.root.offset_read(4)) 175 | self._log("Offset {} is {}".format(i, address)) 176 | if address == 0: 177 | # We're only interested in non-zero values 178 | continue 179 | offsets.append(address) 180 | 181 | # Calculate the end of the address space (filled with zeroes) instead of dumbly reading zero values... 182 | section_end = start_pos + (count // 256 + 1) * 256 * 4 - count*4 183 | 184 | # Skip to the end of the section 185 | self.root.skip(section_end) 186 | self._log("Skipped {} to {}".format(hex(self.root.pos + section_end), hex(self.root.pos))) 187 | self._log("Offsets: {}".format(offsets)) 188 | return offsets 189 | 190 | def __read_TOC(self): 191 | """ 192 | Reads the table of contents (TOCs) from the file. 193 | """ 194 | self._log("POS {}".format(hex(self.root.pos))) 195 | # First get the number of ToC entries. 196 | count, = struct.unpack_from(">I", self.root.offset_read(4)) 197 | self._log("Toc count: {}".format(count)) 198 | toc = {} 199 | # Iterate over all ToCs 200 | for i in range(count): 201 | # Get the length of a ToC's name 202 | toc_len, = struct.unpack_from(">b", self.root.offset_read(1)) 203 | # Read the ToC's name 204 | toc_name, = struct.unpack_from(">{}s".format(toc_len), self.root.offset_read(toc_len)) 205 | # Read the address (block id) in the data section 206 | block_id, = struct.unpack_from(">I", self.root.offset_read(4)) 207 | # Add all values to the dictionary 208 | toc[toc_name.decode()]= block_id 209 | 210 | self._log("Toc {}".format(toc)) 211 | return toc 212 | 213 | def __read_freelist(self): 214 | """ 215 | Read the free list from the header. 216 | The free list has n=0..31 buckets with the index 2^n 217 | """ 218 | freelist = {} 219 | for i in range(32): 220 | freelist[2**i] = [] 221 | # Read the amount of blocks in the specific free list. 222 | blkcount, = struct.unpack_from(">I", self.root.offset_read(4)) 223 | for j in range(blkcount): 224 | # Read blkcount block offsets. 225 | free_offset, = struct.unpack_from(">I", self.root.offset_read(4)) 226 | freelist[2**i].append(free_offset) 227 | 228 | self._log("Freelist: {}".format(freelist)) 229 | return freelist 230 | 231 | def __block_by_id(self, block_id): 232 | """ 233 | Create a DataBlock from a given block ID (e.g. from the ToC) 234 | """ 235 | # First check if the block_id is within the offsets range 236 | if len(self.offsets) < block_id: 237 | raise ParsingError("BlockID out of range!") 238 | 239 | # Get the address of the block 240 | addr = self.offsets[block_id] 241 | 242 | # Do some necessary bit operations to extract the offset and the size of the block. 243 | # The address without the last 5 bits is the offset in the file 244 | offset = (int(addr) >> 0x5 << 0x5) 245 | # The address' last five bits are the block's size. 246 | size = 1 << (int(addr) & 0x1f) 247 | self._log("New block: addr {} offset {} size {}".format( addr, offset + 0x4, size)) 248 | # Return the new block 249 | return DataBlock(self.offset_read(size, offset + 0x4), debug=self.debug) 250 | 251 | def traverse_root(self): 252 | """ 253 | Traverse from the root block and extract all file names. 254 | """ 255 | # Get the root block from the ToC 'DSDB' 256 | root = self.__block_by_id(self.toc['DSDB']) 257 | # Read the following root block's ID, so that we can traverse it. 258 | root_id, = struct.unpack(">I", root.offset_read(4)) 259 | self._log("Root-ID ", root_id) 260 | 261 | # Read other values that we might be useful, but we're not interested in... (at least right now) 262 | internal_block_count, = struct.unpack(">I", root.offset_read(4)) 263 | record_count, = struct.unpack(">I", root.offset_read(4)) 264 | block_count, = struct.unpack(">I", root.offset_read(4)) 265 | unknown, = struct.unpack(">I", root.offset_read(4)) 266 | 267 | # traverse from the extracted root block id. 268 | return self.traverse(root_id) 269 | 270 | def traverse(self, block_id): 271 | """ 272 | Traverses a block identified by the given block_id and extracts the file names. 273 | """ 274 | # Get the responsible block by it's ID 275 | node = self.__block_by_id(block_id) 276 | # Extract the pointer to the next block 277 | next_pointer, = struct.unpack(">I", node.offset_read(4)) 278 | # Get the number of next blocks or records 279 | count, = struct.unpack(">I", node.offset_read(4)) 280 | self._log("Next Ptr {} with {} ".format(hex(next_pointer), hex(count))) 281 | 282 | filenames = [] 283 | # If a next_pointer exists (>0), iterate through the next blocks recursively 284 | # If not, we extract all file names from the current block 285 | if next_pointer > 0: 286 | for i in range(0, count, 1): 287 | # Get the block_id for the next block 288 | next_id, = struct.unpack(">I", node.offset_read(4)) 289 | self._log("Child: {}".format(next_id)) 290 | # Traverse it recursively 291 | files = self.traverse(next_id) 292 | filenames += files 293 | # Also get the filename for the current block. 294 | filename = node.read_filename() 295 | self._log("Filename: ", filename) 296 | filenames.append(filename) 297 | # Now that we traversed all childs of the next_pointer, traverse the pointer itself. 298 | # TODO: Check if that is really necessary as the last child should be the current node... (or so?) 299 | files = self.traverse(next_pointer) 300 | filenames += files 301 | else: 302 | # We're probably in a leaf node, so extract the file names. 303 | for i in range(0, count, 1): 304 | f = node.read_filename() 305 | filenames.append(f) 306 | 307 | return filenames 308 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import dsstore 2 | import os 3 | import sys 4 | 5 | if __name__ == "__main__": 6 | if len(sys.argv) < 2: 7 | sys.exit("Usage: python main.py ") 8 | if not os.path.exists(sys.argv[1]): 9 | sys.exit("File not found: Usage main.py ") 10 | with open(sys.argv[1], "rb") as f: 11 | d = dsstore.DS_Store(f.read(), debug=False) 12 | files = d.traverse_root() 13 | print("Count: ", len(files)) 14 | for f in files: 15 | print(f) 16 | 17 | -------------------------------------------------------------------------------- /samples/.DS_Store.ctf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gehaxelt/Python-dsstore/859781b834244774cb509e96ccc29ee646f72739/samples/.DS_Store.ctf --------------------------------------------------------------------------------