├── .github
    └── FUNDING.yml
├── .gitignore
├── LICENSE.md
├── README.md
├── dsstore.py
├── main.py
└── samples
    └── .DS_Store.ctf


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: gehaxelt
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/python
  3 | 
  4 | ### Python ###
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | .pytest_cache/
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | .hypothesis/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule.*
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # Environments
 83 | .env
 84 | .venv
 85 | env/
 86 | venv/
 87 | ENV/
 88 | env.bak/
 89 | venv.bak/
 90 | 
 91 | # Spyder project settings
 92 | .spyderproject
 93 | .spyproject
 94 | 
 95 | # Rope project settings
 96 | .ropeproject
 97 | 
 98 | # mkdocs documentation
 99 | /site
100 | 
101 | # mypy
102 | .mypy_cache/
103 | 
104 | 
105 | # End of https://www.gitignore.io/api/python
106 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright (c) 2018 Sebastian Neef
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Python .DS_Store parser
 2 | 
 3 | This repository contains a parser for Apple's `.DS_Store` file format. 
 4 | 
 5 | A sample file form a CTF is included in the `./samples/` directory and the you can try the parser using `python3 main.py ./samples/.DS_Store.ctf`. 
 6 | 
 7 | Here's my blogpost that tries to explain the structure and format in detail:  https://0day.work/parsing-the-ds_store-file-format/
 8 | 
 9 | # Usage
10 | 
11 | ```
12 | $ python main.py samples/.DS_Store.ctf 
13 | Count:  6
14 | favicon.ico
15 | flag
16 | static
17 | templates
18 | vulnerable.py
19 | vulnerable.wsgi
20 | ```
21 | 
22 | # Useful ressources
23 | 
24 | I found the following links to be quite helpful while developing the parser:
25 | 
26 | - https://wiki.mozilla.org/DS_Store_File_Format
27 | - http://search.cpan.org/~wiml/Mac-Finder-DSStore/DSStoreFormat.pod
28 | - https://digi.ninja/projects/fdb.php
29 | 
30 | # License
31 | 
32 | MIT - See License.md
33 | 


--------------------------------------------------------------------------------
/dsstore.py:
--------------------------------------------------------------------------------
  1 | import struct
  2 | 
  3 | class ParsingError(Exception): pass
  4 | 
  5 | class DataBlock(object):
  6 |     """
  7 |     Class for a basic DataBlock inside of the DS_Store format.
  8 |     """
  9 |     def __init__(self, data, debug=False):
 10 |         super(DataBlock, self).__init__()
 11 |         self.data = data
 12 |         self.pos = 0
 13 |         self.debug = debug
 14 | 
 15 |     def offset_read(self, length, offset=None):
 16 |         """
 17 |         Returns an byte array of length from data at the given offset or pos.
 18 |         If no offset is given, pos will be increased by length.
 19 |         Throws ParsingError if offset+length > len(self.data)
 20 |         """
 21 |         if not offset:
 22 |             offset_position = self.pos
 23 |         else:
 24 |             offset_position = offset
 25 | 
 26 |         if len(self.data) < offset_position+length:
 27 |             raise ParsingError("Offset+Length > len(self.data)")
 28 |         
 29 |         if not offset:
 30 |             self.pos += length
 31 | 
 32 |         value = self.data[offset_position:offset_position+length]
 33 |         self._log("Reading: {}-{} => {}".format(hex(offset_position), hex(offset_position+length), value))
 34 |         return value
 35 | 
 36 |     def skip(self, length):
 37 |         """
 38 |         Increases pos by length without reading data!
 39 |         """
 40 |         self.pos += length
 41 | 
 42 |     def read_filename(self):
 43 |         """
 44 |         Extracts a file name from the current position.
 45 |         """
 46 |         # The length of the file name in bytes.
 47 |         length, = struct.unpack_from(">I", self.offset_read(4))
 48 |         # The file name in UTF-16, which is two bytes per character.
 49 |         filename = self.offset_read(2 * length).decode("utf-16be")
 50 |         # A structure ID that I haven't found any use of.
 51 |         structure_id, = struct.unpack_from(">I", self.offset_read(4))
 52 |         # Now read the structure type as a string of four characters and decode it to ascii.
 53 |         structure_type, = struct.unpack_from(">4s", self.offset_read(4))
 54 | 
 55 |         structure_type = structure_type.decode()
 56 |         self._log("Structure type ", structure_type)
 57 |         # If we don't find a match, skip stays < 0 and we will do some magic to find the right skip due to somehow broken .DS_Store files..
 58 |         skip = -1
 59 |         # Source: http://search.cpan.org/~wiml/Mac-Finder-DSStore/DSStoreFormat.pod
 60 |         while skip < 0:
 61 |             if structure_type == "bool":
 62 |                 skip = 1
 63 |             elif structure_type == "type" or structure_type == "long" or structure_type == "shor" or structure_type == "fwsw" or structure_type == "fwvh" or structure_type == "icvt" or structure_type == "lsvt" or structure_type == "vSrn" or structure_type == "vstl":
 64 |                 skip = 4
 65 |             elif  structure_type == "comp" or structure_type == "dutc" or structure_type == "icgo" or structure_type == "icsp" or structure_type == "logS" or structure_type == "lg1S" or structure_type == "lssp" or structure_type == "modD" or structure_type == "moDD" or structure_type == "phyS" or structure_type == "ph1S":
 66 |                 skip = 8
 67 |             elif structure_type == "blob":
 68 |                 blen, = struct.unpack_from(">I", self.offset_read(4))
 69 |                 skip = blen
 70 |             elif structure_type == "ustr" or structure_type == "cmmt" or structure_type == "extn" or structure_type == "GRP0":
 71 |                 blen, = struct.unpack_from(">I", self.offset_read(4))
 72 |                 skip = 2* blen
 73 |             elif structure_type == "BKGD":
 74 |                 skip = 12
 75 |             elif structure_type == "ICVO" or structure_type == "LSVO" or structure_type == "dscl":
 76 |                 skip = 1
 77 |             elif structure_type == "Iloc" or structure_type == "fwi0":
 78 |                 skip = 16
 79 |             elif structure_type == "dilc":
 80 |                 skip = 32
 81 |             elif structure_type == "lsvo":
 82 |                 skip = 76
 83 |             elif structure_type == "icvo":
 84 |                 pass
 85 |             elif structure_type == "info":
 86 |                 pass
 87 |             else:
 88 |                 pass
 89 | 
 90 |             if skip <= 0:
 91 |                 # We somehow didn't find a matching type. Maybe this file name's length value is broken. Try to fix it!
 92 |                 # This is a bit voodoo and probably not the nicest way. Beware, there by dragons!
 93 |                 self._log("Re-reading!")
 94 |                 # Rewind 8 bytes, so that we can re-read structure_id and structure_type
 95 |                 self.skip(-1 * 2 * 0x4)
 96 |                 filename += self.offset_read(0x2).decode("utf-16be")
 97 |                 # re-read structure_id and structure_type
 98 |                 structure_id, = struct.unpack_from(">I", self.offset_read(4))
 99 |                 structure_type, = struct.unpack_from(">4s", self.offset_read(4))
100 |                 structure_type = structure_type.decode()
101 |                 # Look-ahead and check if we have  structure_type==Iloc followed by blob.
102 |                 # If so, we're interested in blob, not Iloc. Otherwise continue!
103 |                 future_structure_type = struct.unpack_from(">4s", self.offset_read(4, offset=self.pos))
104 |                 self._log("Re-read structure_id {} / structure_type {}".format(structure_id, structure_type))
105 |                 if structure_type != "blob" and future_structure_type != "blob":
106 |                     structure_type = ""
107 |                     self._log("Forcing another round!")
108 | 
109 | 
110 |         # Skip bytes until the next (file name) block
111 |         self.skip(skip)
112 |         self._log("Filename {}".format(filename))
113 |         return filename
114 | 
115 |     def _log(self, *args):
116 |         if self.debug:
117 |             print("[DEBUG] {}".format(*args))
118 | 
119 | class DS_Store(DataBlock, object):
120 |     """
121 |     Represents the .DS_Store file from the given binary data. 
122 |     """
123 |     def __init__(self, data, debug=False):
124 |         super(DS_Store, self).__init__(data, debug)
125 |         self.data = data
126 |         self.root = self.__read_header()
127 |         self.offsets = self.__read_offsets()
128 |         self.toc = self.__read_TOC()
129 |         self.freeList = self.__read_freelist()
130 |         self.debug = debug
131 | 
132 |     def __read_header(self):
133 |         """
134 |         Checks if self.data is actually a .DS_Store file by checking the magic bytes.
135 |         It returns the file's root block.
136 |         """
137 |         # We read at least 32+4 bytes for the header!
138 |         if len(self.data) < 36:
139 |             raise ParsingError("Length of data is too short!")
140 | 
141 |         # Check the magic bytes for .DS_Store
142 |         magic1, magic2 = struct.unpack_from(">II", self.offset_read(2*4))
143 |         if not magic1 == 0x1 and not magic2 == 0x42756431:
144 |             raise ParsingError("Magic byte 1 does not match!")
145 | 
146 |         # After the magic bytes, the offset follows two times with block's size in between.
147 |         # Both offsets have to match and are the starting point of the root block
148 |         offset, size, offset2 = struct.unpack_from(">III", self.offset_read(3*4))
149 |         self._log("Offset 1: {}".format(offset))
150 |         self._log("Size: {}".format(size))
151 |         self._log("Offset 2: {}".format(offset2))
152 |         if not offset == offset2:
153 |             raise ParsingError("Offsets do not match!")
154 |         # Skip 16 bytes of unknown data...
155 |         self.skip(4*4)
156 | 
157 |         return DataBlock(self.offset_read(size, offset+4), debug=self.debug)
158 | 
159 |     def __read_offsets(self):
160 |         """
161 |         Reads the offsets which follow the header.
162 |         """
163 |         start_pos = self.root.pos
164 |         # First get the number of offsets in this file.
165 |         count, = struct.unpack_from(">I", self.root.offset_read(4))
166 |         self._log("Offset count: {}".format(count))
167 |         # Always appears to be zero!
168 |         self.root.skip(4)
169 | 
170 |         # Iterate over the offsets and get the offset addresses.
171 |         offsets = []
172 |         for i in range(count):
173 |             # Address of the offset.
174 |             address, = struct.unpack_from(">I", self.root.offset_read(4))
175 |             self._log("Offset {} is {}".format(i, address))
176 |             if address == 0:
177 |                 # We're only interested in non-zero values
178 |                 continue
179 |             offsets.append(address)
180 | 
181 |         # Calculate the end of the address space (filled with zeroes) instead of dumbly reading zero values...
182 |         section_end = start_pos + (count // 256 + 1) * 256 * 4 - count*4
183 | 
184 |         # Skip to the end of the section
185 |         self.root.skip(section_end)
186 |         self._log("Skipped {} to {}".format(hex(self.root.pos + section_end), hex(self.root.pos)))
187 |         self._log("Offsets: {}".format(offsets))
188 |         return offsets
189 | 
190 |     def __read_TOC(self):
191 |         """
192 |         Reads the table of contents (TOCs) from the file.
193 |         """
194 |         self._log("POS {}".format(hex(self.root.pos)))
195 |         # First get the number of ToC entries.
196 |         count, = struct.unpack_from(">I", self.root.offset_read(4))
197 |         self._log("Toc count: {}".format(count))
198 |         toc = {}
199 |         # Iterate over all ToCs
200 |         for i in range(count):
201 |             # Get the length of a ToC's name
202 |             toc_len, = struct.unpack_from(">b", self.root.offset_read(1))
203 |             # Read the ToC's name
204 |             toc_name, = struct.unpack_from(">{}s".format(toc_len), self.root.offset_read(toc_len))
205 |             # Read the address (block id) in the data section
206 |             block_id, = struct.unpack_from(">I", self.root.offset_read(4))
207 |             # Add all values to the dictionary
208 |             toc[toc_name.decode()]= block_id
209 | 
210 |         self._log("Toc {}".format(toc))
211 |         return toc
212 | 
213 |     def __read_freelist(self):
214 |         """
215 |         Read the free list from the header.
216 |         The free list has n=0..31 buckets with the index 2^n
217 |         """
218 |         freelist = {}
219 |         for i in range(32):
220 |             freelist[2**i] = []
221 |             # Read the amount of blocks in the specific free list.
222 |             blkcount, = struct.unpack_from(">I", self.root.offset_read(4))
223 |             for j in range(blkcount):
224 |                 # Read blkcount block offsets.
225 |                 free_offset, = struct.unpack_from(">I", self.root.offset_read(4))
226 |                 freelist[2**i].append(free_offset)
227 |         
228 |         self._log("Freelist: {}".format(freelist))
229 |         return freelist
230 | 
231 |     def __block_by_id(self, block_id):
232 |         """
233 |         Create a DataBlock from a given block ID (e.g. from the ToC)
234 |         """
235 |         # First check if the block_id is within the offsets range
236 |         if len(self.offsets) < block_id:
237 |             raise ParsingError("BlockID out of range!")
238 | 
239 |         # Get the address of the block
240 |         addr = self.offsets[block_id]
241 | 
242 |         # Do some necessary bit operations to extract the offset and the size of the block.
243 |         # The address without the last 5 bits is the offset in the file
244 |         offset = (int(addr) >> 0x5 << 0x5)
245 |         # The address' last five bits are the block's size.
246 |         size = 1 << (int(addr) & 0x1f)
247 |         self._log("New block: addr {} offset {} size {}".format( addr, offset + 0x4, size))
248 |         # Return the new block
249 |         return DataBlock(self.offset_read(size, offset + 0x4), debug=self.debug)
250 | 
251 |     def traverse_root(self):
252 |         """
253 |         Traverse from the root block and extract all file names.
254 |         """
255 |         # Get the root block from the ToC 'DSDB'
256 |         root = self.__block_by_id(self.toc['DSDB'])
257 |         # Read the following root block's ID, so that we can traverse it.
258 |         root_id, = struct.unpack(">I", root.offset_read(4))
259 |         self._log("Root-ID ", root_id)
260 | 
261 |         # Read other values that we might be useful, but we're not interested in... (at least right now)
262 |         internal_block_count, = struct.unpack(">I", root.offset_read(4))
263 |         record_count, = struct.unpack(">I", root.offset_read(4))
264 |         block_count, = struct.unpack(">I", root.offset_read(4))
265 |         unknown, = struct.unpack(">I", root.offset_read(4))
266 | 
267 |         # traverse from the extracted root block id.
268 |         return self.traverse(root_id)
269 | 
270 |     def traverse(self, block_id):
271 |         """
272 |         Traverses a block identified by the given block_id and extracts the file names.
273 |         """
274 |         # Get the responsible block by it's ID
275 |         node = self.__block_by_id(block_id)
276 |         # Extract the pointer to the next block
277 |         next_pointer, =  struct.unpack(">I", node.offset_read(4))
278 |         # Get the number of next blocks or records
279 |         count, =  struct.unpack(">I", node.offset_read(4))
280 |         self._log("Next Ptr {} with {} ".format(hex(next_pointer), hex(count)))
281 | 
282 |         filenames = []
283 |         # If a next_pointer exists (>0), iterate through the next blocks recursively
284 |         # If not, we extract all file names from the current block
285 |         if next_pointer > 0:
286 |             for i in range(0, count, 1):
287 |                 # Get the block_id for the next block
288 |                 next_id, = struct.unpack(">I", node.offset_read(4))
289 |                 self._log("Child: {}".format(next_id))
290 |                 # Traverse it recursively
291 |                 files = self.traverse(next_id)
292 |                 filenames += files
293 |                 # Also get the filename for the current block.
294 |                 filename = node.read_filename()
295 |                 self._log("Filename: ", filename)
296 |                 filenames.append(filename)
297 |             # Now that we traversed all childs of the next_pointer, traverse the pointer itself.
298 |             # TODO: Check if that is really necessary as the last child should be the current node... (or so?)
299 |             files = self.traverse(next_pointer)
300 |             filenames += files
301 |         else:
302 |             # We're probably in a leaf node, so extract the file names.
303 |             for i in range(0, count, 1):
304 |                 f = node.read_filename()
305 |                 filenames.append(f)
306 | 
307 |         return filenames
308 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import dsstore
 2 | import os
 3 | import sys
 4 | 
 5 | if __name__ == "__main__":
 6 |     if len(sys.argv) < 2:
 7 |         sys.exit("Usage: python main.py <DS_STORE FILE>")
 8 |     if not os.path.exists(sys.argv[1]):
 9 |         sys.exit("File not found: Usage main.py <file>")
10 |     with open(sys.argv[1], "rb") as f:
11 |         d = dsstore.DS_Store(f.read(), debug=False)
12 |         files = d.traverse_root()
13 |         print("Count: ", len(files))
14 |         for f in files:
15 |             print(f)
16 | 
17 | 


--------------------------------------------------------------------------------
/samples/.DS_Store.ctf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gehaxelt/Python-dsstore/859781b834244774cb509e96ccc29ee646f72739/samples/.DS_Store.ctf


--------------------------------------------------------------------------------