├── .gitignore
├── DRAG_DROP_QUICKLAUNCH.bat
├── LICENSE
├── NotionalSQLite.py
├── README.md
├── SQLitezer.py
└── _resources
    ├── File Format For SQLite Databases.pdf
    ├── Forensics from the sausage factory_ An analysis of the record structure within SQLite databases.pdf
    ├── Forensics from the sausage factory_ Carving SQLite databases from unallocated clusters.pdf
    ├── Forensics from the sausage factory_ SQLite Pointer Maps pages.pdf
    ├── Forensics from the sausage factory_ SQLite overflow pages and other loose ends...pdf
    ├── Sqlite_carving_extractAndroidData.pdf
    └── research.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | _output
  2 | _test_data
  3 | 
  4 | #################
  5 | ## Eclipse
  6 | #################
  7 | 
  8 | *.pydevproject
  9 | .project
 10 | .metadata
 11 | bin/
 12 | tmp/
 13 | *.tmp
 14 | *.bak
 15 | *.swp
 16 | *~.nib
 17 | local.properties
 18 | .classpath
 19 | .settings/
 20 | .loadpath
 21 | 
 22 | # External tool builders
 23 | .externalToolBuilders/
 24 | 
 25 | # Locally stored "Eclipse launch configurations"
 26 | *.launch
 27 | 
 28 | # CDT-specific
 29 | .cproject
 30 | 
 31 | # PDT-specific
 32 | .buildpath
 33 | 
 34 | 
 35 | #################
 36 | ## Visual Studio
 37 | #################
 38 | 
 39 | ## Ignore Visual Studio temporary files, build results, and
 40 | ## files generated by popular Visual Studio add-ons.
 41 | 
 42 | # User-specific files
 43 | *.suo
 44 | *.user
 45 | *.sln.docstates
 46 | 
 47 | # Build results
 48 | 
 49 | [Dd]ebug/
 50 | [Rr]elease/
 51 | x64/
 52 | build/
 53 | [Bb]in/
 54 | [Oo]bj/
 55 | 
 56 | # MSTest test Results
 57 | [Tt]est[Rr]esult*/
 58 | [Bb]uild[Ll]og.*
 59 | 
 60 | *_i.c
 61 | *_p.c
 62 | *.ilk
 63 | *.meta
 64 | *.obj
 65 | *.pch
 66 | *.pdb
 67 | *.pgc
 68 | *.pgd
 69 | *.rsp
 70 | *.sbr
 71 | *.tlb
 72 | *.tli
 73 | *.tlh
 74 | *.tmp
 75 | *.tmp_proj
 76 | *.log
 77 | *.vspscc
 78 | *.vssscc
 79 | .builds
 80 | *.pidb
 81 | *.log
 82 | *.scc
 83 | 
 84 | # Visual C++ cache files
 85 | ipch/
 86 | *.aps
 87 | *.ncb
 88 | *.opensdf
 89 | *.sdf
 90 | *.cachefile
 91 | 
 92 | # Visual Studio profiler
 93 | *.psess
 94 | *.vsp
 95 | *.vspx
 96 | 
 97 | # Guidance Automation Toolkit
 98 | *.gpState
 99 | 
100 | # ReSharper is a .NET coding add-in
101 | _ReSharper*/
102 | *.[Rr]e[Ss]harper
103 | 
104 | # TeamCity is a build add-in
105 | _TeamCity*
106 | 
107 | # DotCover is a Code Coverage Tool
108 | *.dotCover
109 | 
110 | # NCrunch
111 | *.ncrunch*
112 | .*crunch*.local.xml
113 | 
114 | # Installshield output folder
115 | [Ee]xpress/
116 | 
117 | # DocProject is a documentation generator add-in
118 | DocProject/buildhelp/
119 | DocProject/Help/*.HxT
120 | DocProject/Help/*.HxC
121 | DocProject/Help/*.hhc
122 | DocProject/Help/*.hhk
123 | DocProject/Help/*.hhp
124 | DocProject/Help/Html2
125 | DocProject/Help/html
126 | 
127 | # Click-Once directory
128 | publish/
129 | 
130 | # Publish Web Output
131 | *.Publish.xml
132 | *.pubxml
133 | 
134 | # NuGet Packages Directory
135 | ## TODO: If you have NuGet Package Restore enabled, uncomment the next line
136 | #packages/
137 | 
138 | # Windows Azure Build Output
139 | csx
140 | *.build.csdef
141 | 
142 | # Windows Store app package directory
143 | AppPackages/
144 | 
145 | # Others
146 | sql/
147 | *.Cache
148 | ClientBin/
149 | [Ss]tyle[Cc]op.*
150 | ~$*
151 | *~
152 | *.dbmdl
153 | *.[Pp]ublish.xml
154 | *.pfx
155 | *.publishsettings
156 | 
157 | # RIA/Silverlight projects
158 | Generated_Code/
159 | 
160 | # Backup & report files from converting an old project file to a newer
161 | # Visual Studio version. Backup files are not needed, because we have git ;-)
162 | _UpgradeReport_Files/
163 | Backup*/
164 | UpgradeLog*.XML
165 | UpgradeLog*.htm
166 | 
167 | # SQL Server files
168 | App_Data/*.mdf
169 | App_Data/*.ldf
170 | 
171 | #############
172 | ## Windows detritus
173 | #############
174 | 
175 | # Windows image file caches
176 | Thumbs.db
177 | ehthumbs.db
178 | 
179 | # Folder config file
180 | Desktop.ini
181 | 
182 | # Recycle Bin used on file shares
183 | $RECYCLE.BIN/
184 | 
185 | # Mac crap
186 | .DS_Store
187 | 
188 | 
189 | #############
190 | ## Python
191 | #############
192 | 
193 | *.py[co]
194 | 
195 | # Packages
196 | *.egg
197 | *.egg-info
198 | dist/
199 | build/
200 | eggs/
201 | parts/
202 | var/
203 | sdist/
204 | develop-eggs/
205 | .installed.cfg
206 | 
207 | # Installer logs
208 | pip-log.txt
209 | 
210 | # Unit test / coverage reports
211 | .coverage
212 | .tox
213 | 
214 | #Translations
215 | *.mo
216 | 
217 | #Mr Developer
218 | .mr.developer.cfg
219 | 


--------------------------------------------------------------------------------
/DRAG_DROP_QUICKLAUNCH.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 | cd /d "%~dp0"
3 | SQLitezer.py -i %1 -o %1_report -a -u -c
4 | PAUSE


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 | Version 2.0, January 2004
  3 | http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 | "License" shall mean the terms and conditions for use, reproduction, and
 10 | distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 | "Licensor" shall mean the copyright owner or entity authorized by the copyright
 13 | owner that is granting the License.
 14 | 
 15 | "Legal Entity" shall mean the union of the acting entity and all other entities
 16 | that control, are controlled by, or are under common control with that entity.
 17 | For the purposes of this definition, "control" means (i) the power, direct or
 18 | indirect, to cause the direction or management of such entity, whether by
 19 | contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
 20 | outstanding shares, or (iii) beneficial ownership of such entity.
 21 | 
 22 | "You" (or "Your") shall mean an individual or Legal Entity exercising
 23 | permissions granted by this License.
 24 | 
 25 | "Source" form shall mean the preferred form for making modifications, including
 26 | but not limited to software source code, documentation source, and configuration
 27 | files.
 28 | 
 29 | "Object" form shall mean any form resulting from mechanical transformation or
 30 | translation of a Source form, including but not limited to compiled object code,
 31 | generated documentation, and conversions to other media types.
 32 | 
 33 | "Work" shall mean the work of authorship, whether in Source or Object form, made
 34 | available under the License, as indicated by a copyright notice that is included
 35 | in or attached to the work (an example is provided in the Appendix below).
 36 | 
 37 | "Derivative Works" shall mean any work, whether in Source or Object form, that
 38 | is based on (or derived from) the Work and for which the editorial revisions,
 39 | annotations, elaborations, or other modifications represent, as a whole, an
 40 | original work of authorship. For the purposes of this License, Derivative Works
 41 | shall not include works that remain separable from, or merely link (or bind by
 42 | name) to the interfaces of, the Work and Derivative Works thereof.
 43 | 
 44 | "Contribution" shall mean any work of authorship, including the original version
 45 | of the Work and any modifications or additions to that Work or Derivative Works
 46 | thereof, that is intentionally submitted to Licensor for inclusion in the Work
 47 | by the copyright owner or by an individual or Legal Entity authorized to submit
 48 | on behalf of the copyright owner. For the purposes of this definition,
 49 | "submitted" means any form of electronic, verbal, or written communication sent
 50 | to the Licensor or its representatives, including but not limited to
 51 | communication on electronic mailing lists, source code control systems, and
 52 | issue tracking systems that are managed by, or on behalf of, the Licensor for
 53 | the purpose of discussing and improving the Work, but excluding communication
 54 | that is conspicuously marked or otherwise designated in writing by the copyright
 55 | owner as "Not a Contribution."
 56 | 
 57 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf
 58 | of whom a Contribution has been received by Licensor and subsequently
 59 | incorporated within the Work.
 60 | 
 61 | 2. Grant of Copyright License.
 62 | 
 63 | Subject to the terms and conditions of this License, each Contributor hereby
 64 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
 65 | irrevocable copyright license to reproduce, prepare Derivative Works of,
 66 | publicly display, publicly perform, sublicense, and distribute the Work and such
 67 | Derivative Works in Source or Object form.
 68 | 
 69 | 3. Grant of Patent License.
 70 | 
 71 | Subject to the terms and conditions of this License, each Contributor hereby
 72 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
 73 | irrevocable (except as stated in this section) patent license to make, have
 74 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where
 75 | such license applies only to those patent claims licensable by such Contributor
 76 | that are necessarily infringed by their Contribution(s) alone or by combination
 77 | of their Contribution(s) with the Work to which such Contribution(s) was
 78 | submitted. If You institute patent litigation against any entity (including a
 79 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a
 80 | Contribution incorporated within the Work constitutes direct or contributory
 81 | patent infringement, then any patent licenses granted to You under this License
 82 | for that Work shall terminate as of the date such litigation is filed.
 83 | 
 84 | 4. Redistribution.
 85 | 
 86 | You may reproduce and distribute copies of the Work or Derivative Works thereof
 87 | in any medium, with or without modifications, and in Source or Object form,
 88 | provided that You meet the following conditions:
 89 | 
 90 | You must give any other recipients of the Work or Derivative Works a copy of
 91 | this License; and
 92 | You must cause any modified files to carry prominent notices stating that You
 93 | changed the files; and
 94 | You must retain, in the Source form of any Derivative Works that You distribute,
 95 | all copyright, patent, trademark, and attribution notices from the Source form
 96 | of the Work, excluding those notices that do not pertain to any part of the
 97 | Derivative Works; and
 98 | If the Work includes a "NOTICE" text file as part of its distribution, then any
 99 | Derivative Works that You distribute must include a readable copy of the
100 | attribution notices contained within such NOTICE file, excluding those notices
101 | that do not pertain to any part of the Derivative Works, in at least one of the
102 | following places: within a NOTICE text file distributed as part of the
103 | Derivative Works; within the Source form or documentation, if provided along
104 | with the Derivative Works; or, within a display generated by the Derivative
105 | Works, if and wherever such third-party notices normally appear. The contents of
106 | the NOTICE file are for informational purposes only and do not modify the
107 | License. You may add Your own attribution notices within Derivative Works that
108 | You distribute, alongside or as an addendum to the NOTICE text from the Work,
109 | provided that such additional attribution notices cannot be construed as
110 | modifying the License.
111 | You may add Your own copyright statement to Your modifications and may provide
112 | additional or different license terms and conditions for use, reproduction, or
113 | distribution of Your modifications, or for any such Derivative Works as a whole,
114 | provided Your use, reproduction, and distribution of the Work otherwise complies
115 | with the conditions stated in this License.
116 | 
117 | 5. Submission of Contributions.
118 | 
119 | Unless You explicitly state otherwise, any Contribution intentionally submitted
120 | for inclusion in the Work by You to the Licensor shall be under the terms and
121 | conditions of this License, without any additional terms or conditions.
122 | Notwithstanding the above, nothing herein shall supersede or modify the terms of
123 | any separate license agreement you may have executed with Licensor regarding
124 | such Contributions.
125 | 
126 | 6. Trademarks.
127 | 
128 | This License does not grant permission to use the trade names, trademarks,
129 | service marks, or product names of the Licensor, except as required for
130 | reasonable and customary use in describing the origin of the Work and
131 | reproducing the content of the NOTICE file.
132 | 
133 | 7. Disclaimer of Warranty.
134 | 
135 | Unless required by applicable law or agreed to in writing, Licensor provides the
136 | Work (and each Contributor provides its Contributions) on an "AS IS" BASIS,
137 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
138 | including, without limitation, any warranties or conditions of TITLE,
139 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
140 | solely responsible for determining the appropriateness of using or
141 | redistributing the Work and assume any risks associated with Your exercise of
142 | permissions under this License.
143 | 
144 | 8. Limitation of Liability.
145 | 
146 | In no event and under no legal theory, whether in tort (including negligence),
147 | contract, or otherwise, unless required by applicable law (such as deliberate
148 | and grossly negligent acts) or agreed to in writing, shall any Contributor be
149 | liable to You for damages, including any direct, indirect, special, incidental,
150 | or consequential damages of any character arising as a result of this License or
151 | out of the use or inability to use the Work (including but not limited to
152 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or
153 | any and all other commercial damages or losses), even if such Contributor has
154 | been advised of the possibility of such damages.
155 | 
156 | 9. Accepting Warranty or Additional Liability.
157 | 
158 | While redistributing the Work or Derivative Works thereof, You may choose to
159 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or
160 | other liability obligations and/or rights consistent with this License. However,
161 | in accepting such obligations, You may act only on Your own behalf and on Your
162 | sole responsibility, not on behalf of any other Contributor, and only if You
163 | agree to indemnify, defend, and hold each Contributor harmless for any liability
164 | incurred by, or claims asserted against, such Contributor by reason of your
165 | accepting any such warranty or additional liability.
166 | 
167 | END OF TERMS AND CONDITIONS
168 | 
169 | APPENDIX: How to apply the Apache License to your work
170 | 
171 | To apply the Apache License to your work, attach the following boilerplate
172 | notice, with the fields enclosed by brackets "[]" replaced with your own
173 | identifying information. (Don't include the brackets!) The text should be
174 | enclosed in the appropriate comment syntax for the file format. We also
175 | recommend that a file or class name and description of purpose be included on
176 | the same "printed page" as the copyright notice for easier identification within
177 | third-party archives.
178 | 
179 |    Copyright [yyyy] [name of copyright owner]
180 | 
181 |    Licensed under the Apache License, Version 2.0 (the "License");
182 |    you may not use this file except in compliance with the License.
183 |    You may obtain a copy of the License at
184 | 
185 |      http://www.apache.org/licenses/LICENSE-2.0
186 | 
187 |    Unless required by applicable law or agreed to in writing, software
188 |    distributed under the License is distributed on an "AS IS" BASIS,
189 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
190 |    See the License for the specific language governing permissions and
191 |    limitations under the License.
192 | 


--------------------------------------------------------------------------------
/NotionalSQLite.py:
--------------------------------------------------------------------------------
  1 | #-------------------------------------------------------------------------------
  2 | # Name:        Notional SQLite module
  3 | # Purpose:     Provides an object class for decoding and interpreting a SQLite
  4 | #              DB data from a forenic perspective. Note: this module does not
  5 | #              provide querying capabilities - it is for low-level analysis only.
  6 | #
  7 | # Author:      Notional-Labs.com
  8 | #
  9 | # Created:     30/08/2013
 10 | # Licence:     Apache V.2
 11 | #-------------------------------------------------------------------------------
 12 | import logging
 13 | import struct
 14 | import os
 15 | import re
 16 | import unicodedata
 17 | 
 18 | class NotionalSQLite:
 19 |     """
 20 |     NotionalSQLite is used to store file structure information and provide
 21 |     convenience functions for parsing the contents.
 22 |     """
 23 |     _dbheaderfmt = ">16sHbbbbbbiiiiiiiiiii24sii"
 24 |     _dictkeys = ["sig","pagesize","writever","readver","resspace","maxpayload",
 25 |                 "minpayload","leafpayload","changecount","dbsize","freepagelist",
 26 |                 "totalfreepage","schemacookie","schemanum","defpagecache",
 27 |                 "bigroottree","textencode","userver","incvac","expansion",
 28 |                 "validfor","sqlver"]
 29 |     _btreetblleafheaderfmt = ">bsssbi"
 30 |     all_chars = (unichr(i) for i in xrange(0x110000))
 31 |     control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
 32 | 
 33 |     statuscode = 1
 34 |     headerdict = dict()
 35 |     headertransdict = dict()
 36 |     isDirty = bool()
 37 |     dbfile = None
 38 |     isWAL = False
 39 |     debug = False
 40 | 
 41 |     def __init__(self, filepath, debug):
 42 | 
 43 |         self.debug = debug
 44 | 
 45 |         for key in self._dictkeys:
 46 |             self.headertransdict[key] = "ERROR - call translateHeader() first."
 47 |             self.headerdict[key] = "ERROR - Could not read value."
 48 |         try:
 49 |             self.dbfile = open(filepath,"rb")
 50 |         except:
 51 |             logging.error("ERROR: Could not open database file")
 52 |             return
 53 | 
 54 |         self._parseDBHeader();
 55 | 
 56 |         if self.debug:
 57 |             pass
 58 | 
 59 |         self.statuscode = 0
 60 | 
 61 |     def _strip_nonprintable(self,s):
 62 |         control_char_re = re.compile('[%s]' % re.escape(self.control_chars))
 63 |         return control_char_re.sub('', s)
 64 | 
 65 |     def _parseTableLeafPageHeader(self,offset,pagesize):
 66 |         """
 67 |         Parse a binary-tree Table Leaf header given its starting (physical) offset.
 68 |         Pass physical offset to start of page (should be 0x0D) and page size from
 69 |         DB header. cell-pointers, freeblock lists, and the offset to unused area
 70 |         are relative offsets.
 71 |         Returns a dict of header field metadata, a list of (active) cell-pointers,
 72 |         a list of freeblocks, and the starting offset of the content area.
 73 |         """
 74 |         pageheader = dict()
 75 |         celllist = list()
 76 |         freeblklist = list()
 77 | 
 78 |         # Parse Page Header
 79 |         self.dbfile.seek(offset)
 80 |         pageheader['pagetype'] = ord(self.dbfile.read(1))
 81 |         pageheader['freeblockofs'] = struct.unpack(">h",self.dbfile.read(2))[0]
 82 |         pageheader['pagecellcount'] = struct.unpack(">h",self.dbfile.read(2))[0]
 83 |         pageheader['contentareaofs'] = struct.unpack(">h",self.dbfile.read(2))[0]
 84 |         pageheader['freebytefrags'] = ord(self.dbfile.read(1))
 85 | 
 86 |         # Parse Cell Pointer Array and note the start of cell content area
 87 |         for ptr in range(0,pageheader['pagecellcount']):
 88 |             celllist.append(struct.unpack(">h",self.dbfile.read(2))[0])
 89 |         cellptrendofs = self.dbfile.tell() - offset
 90 | 
 91 |         # Get Freeblock offsets
 92 |         self.dbfile.seek(offset+pageheader['freeblockofs'])
 93 |         freeblkptr = pageheader['freeblockofs']
 94 |         while freeblkptr != 0:
 95 |             freeblklist.append(freeblkptr)
 96 |             freeblkptr = struct.unpack(">h",self.dbfile.read(2))[0]
 97 |             self.dbfile.seek(offset+freeblkptr)
 98 | 
 99 |         return pageheader, celllist, freeblklist, cellptrendofs
100 | 
101 |     def _parseDBHeader(self):
102 |         """
103 |         Parse the SQLite 3 database header metadata and control information.
104 |         Sets headerdict.
105 |         """
106 |         rawheader = self.dbfile.read(100)
107 |         unpackedheader = struct.unpack(self._dbheaderfmt,rawheader)
108 |         self.headerdict = dict(zip(self._dictkeys,list(unpackedheader)))
109 |         if (self.headerdict["readver"] == 2) or (self.headerdict["writever"] == 2):
110 |             self.isWAL = True
111 | 
112 |     def _parseCell(self,offset):
113 |         """
114 |         Parse a B-Tree Leaf Page Cell, given it's starting absolute byte offset.
115 |         Pass absolute starting byte offset for the cell header.
116 |         Returns the parsed cell as a list in the form:
117 | 
118 |         """
119 |         celldatalist = list()
120 |         cellheader,dataoffset,payloadlen,recordnum = self._parseCellHeader(offset)
121 | 
122 |         for field in cellheader:
123 |             if field[0] == "NULL":
124 |                 celldatalist.append(recordnum)
125 |             elif field[0] == "ST_INT8":
126 |                 self.dbfile.seek(dataoffset)
127 |                 celldatalist.append(ord(struct.unpack(">c",self.dbfile.read(1))[0]))
128 |                 dataoffset+=field[1]
129 |             elif field[0] == "ST_INT16":
130 |                 self.dbfile.seek(dataoffset)
131 |                 celldatalist.append(struct.unpack(">h",self.dbfile.read(2))[0])
132 |                 dataoffset+=field[1]
133 |             elif field[0] == "ST_INT24":
134 |                 self.dbfile.seek(dataoffset)
135 |                 celldatalist.append("ST_INT24 - NOT IMPLEMENTED!") # NOT IMPLEMENTED YET!
136 |                 dataoffset+=field[1]
137 |             elif field[0] == "ST_INT32":
138 |                 self.dbfile.seek(dataoffset)
139 |                 celldatalist.append(struct.unpack(">i",self.dbfile.read(4))[0])
140 |                 dataoffset+=field[1]
141 |             elif field[0] == "ST_INT48":
142 |                 self.dbfile.seek(dataoffset)
143 |                 celldatalist.append("ST_INT48 - NOT IMPLEMENTED!") # NOT IMPLEMENTED YET!
144 |                 dataoffset+=field[1]
145 |             elif field[0] == "ST_INT64":
146 |                 self.dbfile.seek(dataoffset)
147 |                 celldatalist.append(struct.unpack(">q",self.dbfile.read(8))[0])
148 |                 dataoffset+=8
149 |             elif field[0] == "ST_FLOAT":
150 |                 self.dbfile.seek(dataoffset)
151 |                 celldatalist.append(struct.unpack(">d",self.dbfile.read(8))[0])
152 |                 dataoffset+=8
153 |             elif field[0] == "ST_C0":
154 |                 celldatalist.append("ST_C0 - NOT IMPLEMENTED!") # NOT IMPLEMENTED YET!
155 |             elif field[0] == "ST_C1":
156 |                 celldatalist.append("ST_C0 - NOT IMPLEMENTED!") # NOT IMPLEMENTED YET!
157 |             elif field[0] == "ST_BLOB":
158 |                 self.dbfile.seek(dataoffset)
159 |                 celldatalist.append(self.dbfile.read(field[1]))
160 |                 dataoffset+=field[1]
161 |             elif field[0] == "ST_TEXT":
162 |                 self.dbfile.seek(dataoffset)
163 |                 celldatalist.append(struct.unpack("%ss" % str(field[1]),self.dbfile.read(field[1]))[0])
164 |                 dataoffset+=field[1]
165 |             else:
166 |                 print field[0]
167 | 
168 |         return celldatalist
169 | 
170 |     def _parseCellHeader(self,offset):
171 |         """
172 |         Parse a B-Tree Leaf Page Cell Header, given it's starting absolute byte
173 |         offset.
174 |         Pass absolute starting byte offset for the cell header to be decoded.
175 |         Returns tuple containing a list of tuples in the form
176 |         [(String type,int length),...], and the starting offset of the payload
177 |         fields.
178 |         """
179 |         headerlist = list()
180 | 
181 |         # Payload length
182 |         payloadlen,length = self._getVarIntOfs(offset)
183 |         offset+=length
184 |         # Record Number
185 |         recordnum,length = self._getVarIntOfs(offset)
186 |         offset+=length
187 |         # Payload Header Length
188 |         payloadheaderlen,length = self._getVarIntOfs(offset)
189 |         payloadheaderlenofs = offset + payloadheaderlen
190 |         offset+=length
191 |         # Payload Fields
192 |         while offset < (payloadheaderlenofs):
193 |             fieldtype,length = self._getVarIntOfs(offset)
194 |             # Determine Serial Type
195 |             if fieldtype == 0:
196 |                 headerlist.append(("NULL",0))
197 |             elif fieldtype == 1:
198 |                 headerlist.append(("ST_INT8",1))
199 |             elif fieldtype == 2:
200 |                 headerlist.append(("ST_INT16",2))
201 |             elif fieldtype == 3:
202 |                 headerlist.append(("ST_INT24",3))
203 |             elif fieldtype == 4:
204 |                 headerlist.append(("ST_INT32",4))
205 |             elif fieldtype == 5:
206 |                 headerlist.append(("ST_INT48",6))
207 |             elif fieldtype == 6:
208 |                 headerlist.append(("ST_INT64",8))
209 |             elif fieldtype == 7:
210 |                 headerlist.append(("ST_FLOAT",8))
211 |             elif fieldtype == 8:
212 |                 headerlist.append(("ST_C0",0))
213 |             elif fieldtype == 9:
214 |                 headerlist.append(("ST_C1",0))
215 |             elif fieldtype > 11:
216 |                 if (fieldtype%2) == 0:
217 |                     headerlist.append(("ST_BLOB",(fieldtype-12)/2))
218 |                 else:
219 |                     headerlist.append(("ST_TEXT",(fieldtype-13)/2))
220 |             else:
221 |                 headerlist.append(("Reserved: %s" % str(fieldtype),0))
222 |             offset+=length
223 | 
224 |         return headerlist, offset, payloadlen, recordnum
225 | 
226 |     def _getVarIntOfs(self,offset):
227 |         """
228 |         Decode Huffman-coded two's compliment integers used for storing 64-bit
229 |         variable-length integers. Implements Mike Harrington's example technique
230 |         for decoding SQLite VarInts (https://mobileforensics.wordpress.com/2011/
231 |         09/17/huffman-coding-in-sqlite-a-primer-for-mobile-forensics/). SQLite
232 |         spec allows for between 1-9 byte runs per VarInt - this method should
233 |         scale to that size, despite such huge values being rare in practice.
234 | 
235 |         Pass starting byte offset to decode.
236 |         Returns tuple(VarInt value and the VarInt length).
237 |         """
238 |         self.dbfile.seek(offset)
239 |         varintlen = varintval = 0
240 | 
241 |         while True:
242 |             if((ord(self.dbfile.read(1))&(1<<7))!=0):
243 |                 varintlen+=1
244 |             else:
245 |                 varintlen+=1
246 |                 break
247 |         self.dbfile.seek(offset)
248 |         for i in reversed(range(0,varintlen)):
249 |             if (i == 0):
250 |                 byteval = ord(self.dbfile.read(1))
251 |                 varintval+=byteval
252 |             else:
253 |                 byteval = ord(self.dbfile.read(1))
254 |                 varintval+=(byteval - 128)*(2**(i*7))
255 | 
256 |         return varintval,varintlen
257 | 
258 |     def _getVarInt(self,bytestring):
259 |         """
260 |         As with _getVarIntOfs, but with an already-known length byte string.
261 |         Example: result = _getVarInt(file.read(3))
262 |         Warning: This methid will attempt to decode the bytestring regardless
263 |         of whether it's a valid VarInt.
264 |         Pass byte string to decode.
265 |         Returns VarInt value.
266 |         """
267 |         varintlen = len(bytestring)
268 |         varintval = bytestringpos = 0
269 | 
270 |         for i in reversed(range(0,varintlen)):
271 |             if (i == 0):
272 |                 byteval = ord(bytestring[bytestringpos])
273 |                 varintval+=byteval
274 |             else:
275 |                 byteval = ord(bytestring[bytestringpos])
276 |                 varintval+=(byteval - 128)*(2**(i*7))
277 |             bytestringpos+=1
278 | 
279 |         return varintval,varintlen
280 | 
281 |     def getPageTypeDict(self,pagesize):
282 |         """
283 |         Return a dict containing seperate lists of all Page type absolute
284 |         starting offsets.
285 |         """
286 |         pagedict = dict()
287 |         pagedict['intindex'] = list()
288 |         pagedict['inttable'] = list()
289 |         pagedict['leafindex'] = list()
290 |         pagedict['leaftable'] = list()
291 |         pagedict['overflow'] = list()
292 |         offset = 0
293 |         filesize = os.path.getsize(self.dbfile.name)
294 | 
295 |         while (offset < filesize):
296 |             self.dbfile.seek(offset)
297 |             flag = ord(self.dbfile.read(1))
298 |             if (flag == 2):
299 |                 pagedict['intindex'].append(offset)
300 |             elif (flag == 5):
301 |                 pagedict['inttable'].append(offset)
302 |             elif (flag == 10):
303 |                 pagedict['leafindex'].append(offset)
304 |             elif (flag == 13):
305 |                 pagedict['leaftable'].append(offset)
306 |             elif (flag == 83):
307 |                 pass
308 |             elif (flag == 0):
309 |                 pagedict['overflow'].append(offset)
310 |             else:
311 |                 print "Invalid Page Type: %s (%s)" % (str(flag), str(offset))
312 |             offset+=pagesize
313 |         return pagedict
314 | 
315 |     def getActiveRowContent(self, offset, pagesize):
316 |         """
317 |         Return a list of lists containing the content of all active cells in the
318 |         page.
319 |         """
320 |         cellcontentlist = list()
321 |         a,celllist,c,d = self._parseTableLeafPageHeader(offset,pagesize)
322 |         for cell in celllist:
323 |             cellcontentlist.append(self._parseCell(offset+cell))
324 |         return cellcontentlist
325 | 
326 |     def getUnallocContent(self, offset, pagesize):
327 |         """
328 |         Return a list of lists containing the content of all unallocated areas
329 |         in the page. All non-printable chars are stripped.
330 |         """
331 |         unalloclist = list()
332 |         pageheader, celllist, freeblklist, cellptrendofs = self._parseTableLeafPageHeader(offset,pagesize)
333 |         self.dbfile.seek(offset+cellptrendofs)
334 |         length = pageheader['contentareaofs']-cellptrendofs
335 |         unalloclist.append([offset+cellptrendofs,"Unallocated",length,self._strip_nonprintable(self.dbfile.read(length))])
336 |         for freeblk in freeblklist:
337 |             self.dbfile.seek(offset+freeblk+2) # skip past the 2-byte next freeblock ptr
338 |             freeblklen = struct.unpack(">H",self.dbfile.read(2))[0]
339 |             unalloclist.append([offset+freeblk,"Free Block",freeblklen,self._strip_nonprintable(self.dbfile.read(freeblklen-4))])
340 |         return unalloclist
341 | 
342 |     def mapPages(self,pagesize):
343 |         """
344 |         Debugging method to give a visual representation of the distribution of
345 |         page types.
346 |         Pass the pagesize value from the DB header.
347 |         Returns a string.
348 |         key:
349 |         h = header page
350 |         i = interior index b-tree page
351 |         t = interior table b-tree page
352 |         I = leaf index b-tree page
353 |         T = leaf table b-tree page
354 |         """
355 |         offset = intindex = inttbl = leafindex = leaftbl = headercnt = overflow = 0
356 |         pagemap = ""
357 |         filesize = os.path.getsize(self.dbfile.name)
358 | 
359 |         while (offset < filesize):
360 |             self.dbfile.seek(offset)
361 |             flag = ord(self.dbfile.read(1))
362 |             if (flag == 2):
363 |                 pagemap+="i"
364 |                 intindex+=1
365 |             elif (flag == 5):
366 |                 pagemap+="t"
367 |                 inttbl+=1
368 |             elif (flag == 10):
369 |                 pagemap+="I"
370 |                 leafindex+=1
371 |             elif (flag == 13):
372 |                 pagemap+="T"
373 |                 leaftbl+=1
374 |             elif (flag == 83):
375 |                 pagemap+="h"
376 |                 headercnt+=1
377 |             else:
378 |                 pagemap+="O"
379 |                 overflow+=1
380 |             offset+=(pagesize)
381 |         total = intindex + inttbl + leafindex + leaftbl + headercnt + overflow
382 |         return (pagemap,intindex,inttbl,leafindex,leaftbl,headercnt,overflow,total)
383 | 
384 |     def checkSignature(self):
385 |         """
386 |         Convenience function to perform signature check.
387 |         Returns bool.
388 |         """
389 |         if self.headerdict["sig"] == "SQLite format 3\x00":
390 |             return True
391 |         else:
392 |             return False
393 | 
394 |     def translateHeader(self):
395 |         """
396 |         Parse the unpacked header into human-readable values according to
397 |         the format spec at: http://www.sqlite.org/fileformat.html
398 |         Returns Dict.
399 |         """
400 |     # Magic Header String
401 |         if self.headerdict["sig"] == 'SQLite format 3\x00':
402 |             self.headertransdict["sig"] = self.headerdict["sig"]
403 |         else:
404 |             self.headertransdict["sig"] = ("Invalid Signature")
405 |     # Page Size
406 |         if self.headerdict["pagesize"] == -32768:
407 |             self.headertransdict["pagesize"] = "65536 - SQLite v.3.7.1 or greater"
408 |         else:
409 |             self.headertransdict["pagesize"] = str(self.headerdict["pagesize"])
410 |     # File format version numbers
411 |         if (self.headerdict["writever"] > 2) and (self.headerdict["readver"] in (1,2)):
412 |             self.headertransdict["writever"] = "READ-ONLY"
413 |             if self.headerdict["readver"] == 1:
414 |                 self.headertransdict["readver"] = "Legacy - Roll Back Journalling"
415 |             else:
416 |                 self.headertransdict["readver"] = "WAL - Write Ahead Log Journalling"
417 |         elif (self.headerdict["readver"] > 2):
418 |             self.headertransdict["readver"] = "Read and Write Disabled."
419 |             if self.headerdict["writever"] == 1:
420 |                 self.headertransdict["writever"] = "Legacy - Roll Back Journalling"
421 |             else:
422 |                 self.headertransdict["writever"] = "WAL - Write Ahead Log Journalling"
423 |         elif (self.headerdict["writever"] in (1,2)) and (self.headerdict["readver"] in (1,2)):
424 |             if self.headerdict["readver"] == 1:
425 |                 self.headertransdict["readver"] = "Legacy - Roll Back Journalling"
426 |             else:
427 |                 self.headertransdict["readver"] = "WAL - Write Ahead Log Journalling"
428 |             if self.headerdict["writever"] == 1:
429 |                 self.headertransdict["writever"] = "Legacy - Roll Back Journalling"
430 |             else:
431 |                 self.headertransdict["writever"] = "WAL - Write Ahead Log Journalling"
432 |         else:
433 |             self.headertransdict["readver"] = "Invalid Value: %s" % self.headerdict["readver"]
434 |             self.headertransdict["writever"] = "Invalid Value: %s" % self.headerdict["writever"]
435 |     # Reserved bytes per page
436 |         self.headertransdict["resspace"] = str(self.headerdict["resspace"])
437 |     # Payload fractions
438 |         if (self.headerdict["maxpayload"] == 64):
439 |             self.headertransdict["maxpayload"] = "64"
440 |         else:
441 |             self.headertransdict["maxpayload"] = "Invalid value: %s" % str(headerdict["maxpayload"])
442 |         if (self.headerdict["minpayload"] == 32):
443 |             self.headertransdict["minpayload"] = "32"
444 |         else:
445 |             self.headertransdict["minpayload"] = "Invalid value: %s" % str(headerdict["minpayload"])
446 |         if (self.headerdict["leafpayload"] == 32):
447 |             self.headertransdict["leafpayload"] = "32"
448 |         else:
449 |             self.headertransdict["leafpayload"] = "Invalid value: %s" % str(headerdict["leafpayload"])
450 |     # File change counter
451 |         self.headertransdict["changecount"] = str(self.headerdict["changecount"])
452 |         if self.isWAL:
453 |             self.headertransdict["changecount"] += " (WAL enabled - value may be inaccurate.)"
454 |     # In-header Database Size
455 |         if (self.headerdict["changecount"] == self.headerdict["validfor"]) and (self.headerdict["dbsize"] > 0):
456 |             self.headertransdict["dbsize"] = str(self.headerdict["dbsize"]) + " page(s)"
457 |         else:
458 |             self.headertransdict["dbsize"] = "Invalid value: %s" % str(self.headerdict["dbsize"])
459 |     # Free Page List page number
460 |         self.headertransdict["freepagelist"] = str(self.headerdict["freepagelist"])
461 |     # Total Free Pages
462 |         self.headertransdict["totalfreepage"] = str(self.headerdict["totalfreepage"])
463 |     # Schema cookie
464 |         self.headertransdict["schemacookie"] = str(self.headerdict["schemacookie"])
465 |     # Schema Format number
466 |         if self.headerdict["schemanum"] == 1:
467 |             self.headertransdict["schemanum"] = "1 - SQLite 3.0.0+ Compatible"
468 |         elif self.headerdict["schemanum"] == 2:
469 |             self.headertransdict["schemanum"] = "2 - SQLite 3.1.3+ Compatible"
470 |         elif self.headerdict["schemanum"] == 3:
471 |             self.headertransdict["schemanum"] = "3 - SQLite 3.1.4+ Compatible"
472 |         elif self.headerdict["schemanum"] == 4:
473 |             self.headertransdict["schemanum"] = "4 - SQLite 3.3.0+ Compatible"
474 |         else:
475 |             self.headertransdict["schemanum"] = "Invalid value: %s" % str(self.headerdict["schemanum"])
476 |     # Suggested cache size
477 |         self.headertransdict["defpagecache"] = str(self.headerdict["defpagecache"])
478 |     # Largest Root Tree Page and Incremental Vacuum Settings
479 |         if self.headerdict["bigroottree"] == 0:
480 |             self.headertransdict["bigroottree"] = "0 - ptrmap pages disabled"
481 |             if self.headerdict["incvac"] == 0:
482 |                 self.headertransdict["incvac"] = "0 - auto_vacuum mode"
483 |             else:
484 |                 self.headertransdict["incvac"] = "Invalid mode: %s" % str(self.headerdict["incvac"])
485 |         else:
486 |             self.headertransdict["bigroottree"] = str(self.headerdict["bigroottree"])
487 |             self.headertransdict["incvac"] = "%s - incremental_vacuum mode" % str(self.headerdict["incvac"])
488 |     # Text Encoding
489 |         if self.headerdict["textencode"] == 1:
490 |             self.headertransdict["textencode"] = "UTF-8"
491 |         elif self.headerdict["textencode"] == 2:
492 |             self.headertransdict["textencode"] = "UTF-16LE"
493 |         elif self.headerdict["textencode"] == 3:
494 |             self.headertransdict["textencode"] = "UTF-16BE"
495 |         else:
496 |             self.headertransdict["textencode"] = "Invalid Encoding: %s" % self.headerdict["textencode"]
497 |     # User Version
498 |         self.headertransdict["userver"] = str(self.headerdict["userver"])
499 |     # Expansion block
500 |         self.headertransdict["expansion"] = ":".join("{:02x}".format(ord(c)) for c in self.headerdict["expansion"])
501 |     # Version Valid For number
502 |         self.headertransdict["validfor"] = self.headerdict["validfor"]
503 |     # SQlite version number
504 |         self.headertransdict["sqlver"] = self.headerdict["sqlver"]
505 | 
506 |         return self.headertransdict
507 | 
508 | 
509 | 
510 | 
511 | 
512 | 
513 | 
514 | 
515 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | SQLiteZer
 2 | =========
 3 | 
 4 | A forensic SQLite 3 database analysis tool. Parse out DB unallocated space to recover deleted data, directly export active cell content (bypassing the SQL parser), automatically summarize database object statistics, and expose all the juicy technical info any self-respecting reverse engineer might want. Written in Python 2.7.
 5 | 
 6 | 	usage: SQLitezer.py [-h] -i INPUT -o OUTPUT [-a] [-c] [-m] [-u] [-x]
 7 | 
 8 | 	optional arguments:
 9 | 	-h, --help            show this help message and exit
10 | 	-i INPUT, --input INPUT
11 |                         Target SQLite database file.
12 | 	-o OUTPUT, --output OUTPUT
13 |                         Output job name (exclude file extension).
14 | 	-a, --active          OPTIONAL: Dump all raw active records into a CSV.
15 | 	-c, --content         OPTIONAL: Generate content report.
16 | 	-m, --pagemap         OPTIONAL: Print a visual map of the physical page
17 |                         distribution (work in progress).
18 | 	-u, --unalloc         OPTIONAL: Dump all unallocated areas of each page into
19 |                         a TSV.
20 | 	-x, --debug           OPTIONAL: Developers Only - Enable debug mode.
21 | 
22 | IMPORTANT NOTE: The sqlite3.dll packaged with the standard Python 2.x installers is not natively compiled with some of the extensions you are likely to encounter such as FTS2/3. In order to maximize compatibility, replace your python install's sqlite3.dll (e.g. C:\Python27\DLLs\sqlite3.dll) with the reference DLL from http://sqlite.org/2013/sqlite-dll-win32-x86-3080100.zip
23 | 


--------------------------------------------------------------------------------
/SQLitezer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-------------------------------------------------------------------------------
  3 | # Name:        SQLitezer - Forensic SQLite Database Analyser and Reporting Tool
  4 | # Purpose:     Produces a csv-formatted report of each element in a database and
  5 | #              the associated metadata and content statistics (if appropriate).
  6 | #
  7 | # Author:      Jim Hung
  8 | #
  9 | # Created:     24/08/2013
 10 | # Licence:     Apache v2.0
 11 | #-------------------------------------------------------------------------------
 12 | import os
 13 | import sys
 14 | import argparse
 15 | import datetime
 16 | import time
 17 | import sqlite3
 18 | import logging
 19 | import csv
 20 | import struct
 21 | 
 22 | import NotionalSQLite
 23 | 
 24 | version = '0.6.2'
 25 | build = '20131207'
 26 | 
 27 | headerfields = (("Signature","sig"),
 28 |                 ("Page Size","pagesize"),
 29 |                 ("Read Format","readver"),
 30 |                 ("Write Format","writever"),
 31 |                 ("Max Reserved Bytes","maxpayload"),
 32 |                 ("Min Reserved Bytes","minpayload"),
 33 |                 ("Leaf Reserved Bytes","leafpayload"),
 34 |                 ("File Change Count","changecount"),
 35 |                 ("In-header DB Size","dbsize"),
 36 |                 ("Free Page List starting page","freepagelist"),
 37 |                 ("Total Free Pages","totalfreepage"),
 38 |                 ("Schema Cookie","schemacookie"),
 39 |                 ("Schema Format number","schemanum"),
 40 |                 ("Suggested cache size","defpagecache"),
 41 |                 ("Largest Root Page Number","bigroottree"),
 42 |                 ("Text Encoding","textencode"),
 43 |                 ("User Version","userver"),
 44 |                 ("Vacuum Settings","incvac"),
 45 |                 ("Expansion block","expansion"),
 46 |                 ("Valid-For Version","validfor"),
 47 |                 ("Last SQLite Version","sqlver"))
 48 | 
 49 | def main():
 50 |     startTime = datetime.datetime.now()
 51 |     startTimeStr = str(startTime)[:19].replace(":","-").replace(" ","_")
 52 | 
 53 |     outfile, infile, pagemap, debug, active, content, unalloc = validateArgs()
 54 |     setupLogging(outfile)
 55 | 
 56 |     print "\n[CONFIGURATION]"
 57 |     logging.info(" Target Database: " + os.path.abspath(infile))
 58 |     logging.info(" Report File: " + os.path.abspath(outfile))
 59 | 
 60 |     print "\n <SETTING UP REPORT FILE(S)...>"
 61 |     outcsv = csv.writer(open(outfile+".csv","wb"))
 62 |     if active:
 63 |         outactivecsv = csv.writer(open(outfile+"_active.csv","wb"))
 64 |     if unalloc:
 65 |         outunalloctsv = csv.writer(open(outfile+"_unalloc.csv","wb"), delimiter='\t',quotechar='"')
 66 | 
 67 |     print "\n[DATABASE HEADER]"
 68 |     header = NotionalSQLite.NotionalSQLite(infile,debug)
 69 |     if header.statuscode == 1:
 70 |         logging.error("ERROR: Could not create NotionalSQL object - check that the target database is closed and unlocked.")
 71 |         logging.error("ERROR: Cannot continue - exiting.")
 72 |         sys.exit(1)
 73 |     if header.checkSignature():
 74 |         logging.info(" Signature check: Valid")
 75 |     else:
 76 |         logging.info(" Signature check: Invalid")
 77 |         logging.error("ERROR: Database is corrupt or encrypted - Signature: %s" % header.headerdict["sig"])
 78 |         logging.error("ERROR: Cannot continue - exiting.")
 79 |         sys.exit(1)
 80 | 
 81 |     transheaderdict = header.translateHeader()
 82 | 
 83 |     outcsv.writerow(["{HEADER}"])
 84 |     outcsv.writerow(["Field Name","Raw Value","Translated Value"])
 85 |     for value in headerfields:
 86 |         print " %s: %s" % (value[0],transheaderdict[value[1]])
 87 |         outcsv.writerow((value[0],header.headerdict[value[1]],transheaderdict[value[1]]))
 88 | 
 89 |     pagetypedict = header.getPageTypeDict(header.headerdict['pagesize'])
 90 | 
 91 |     if pagemap: # if 'm' switch is used.
 92 |         mapPages(header, outcsv)
 93 |     if content: # if 'c' switch is used.
 94 |         contentanalysis(infile, outcsv)
 95 |     if active: # if 'a' switch is used.
 96 |         dumpActiveRows(header,outactivecsv,pagetypedict)
 97 |     if unalloc: # if 'u' switch is used.
 98 |         dumpUnallocated(header,outunalloctsv,pagetypedict)
 99 | 
100 |     print ""
101 |     logging.info("[REPORTING COMPLETED]")
102 |     print ""
103 |     logging.info("SQLiteZer took " + str(datetime.datetime.now()-startTime) + " to run.")
104 | 
105 | def getRowCount(tablename,dbcurs):
106 |     """
107 |     Return the number of rows in the table.
108 |     """
109 |     try:
110 |         sqlquery = "SELECT count(*) FROM %s" % (tablename)
111 |         dbcurs.execute(sqlquery)
112 |     except sqlite3.OperationalError as e:
113 |         logging.error('ERROR: The SQLite3 module encountered an error querying the table "%s" - check that you replaced the sqlite3.dll with the latest Amalgamation DLL from http://www.sqlite.org/download.html\nError: %s' % (tablename,e))
114 |         return 'ERROR'
115 |     rowcount = dbcurs.fetchall()
116 |     return rowcount[0][0]
117 | 
118 | def getElements(dbcurs):
119 |     """
120 |     Return a Dict of all elements in DB.
121 |     """
122 |     try:
123 |         dbcurs.execute("SELECT * FROM sqlite_master")
124 |         elementresults = dbcurs.fetchall()
125 |     except sqlite3.OperationalError as e:
126 |         logging.error('ERROR: The SQLite3 module encountered an error querying the master table - check that the database is not locked or in-use. The application cannot continue.\nError: %s' % e)
127 |         sys.exit(1)
128 | 
129 |     elementdict = dict({"tables":list(),"indexes":list(),"triggers":list(),"views":list()})
130 |     elementcount = 0
131 |     tablenamelen = 0
132 |     triggernamelen = 0
133 |     indexnamelen = 0
134 |     indextblnamelen = 0
135 |     viewnamelen = 0
136 |     viewtblnamelen = 0
137 | 
138 |     for element in elementresults:
139 |         if element[0] == "table":
140 |             elementdict["tables"].append([element[1],element[2],element[3], element[4]])
141 |             if (len(element[1]) > tablenamelen):
142 |                 tablenamelen = len(element[1])
143 |             elementcount += 1
144 |         elif element[0] == "index":
145 |             elementdict["indexes"].append([element[1],element[2],element[3], element[4]])
146 |             if (len(element[1]) > indexnamelen):
147 |                 indexnamelen = len(element[1])
148 |             if (len(element[2]) > indextblnamelen):
149 |                 indextblnamelen = len(element[2])
150 |             elementcount += 1
151 |         elif element[0] == "trigger":
152 |             elementdict["triggers"].append([element[1],element[2],element[3], element[4]])
153 |             if (len(element[1]) > triggernamelen):
154 |                 triggernamelen = len(element[1])
155 |             elementcount += 1
156 |         elif element[0] == "view":
157 |             elementdict["views"].append([element[1],element[2],element[3], element[4]])
158 |             if (len(element[1]) > viewnamelen):
159 |                 viewnamelen = len(element[1])
160 |             if (len(element[2]) > viewtblnamelen):
161 |                 viewtblnamelen = len(element[2])
162 |             elementcount += 1
163 | 
164 |     elementdict["maxtablenamelen"] = tablenamelen
165 |     elementdict["maxindexnamelen"] = indexnamelen
166 |     elementdict["maxindextblnamelen"] = indextblnamelen
167 |     elementdict["maxtriggernamelen"] = triggernamelen
168 |     elementdict["maxviewnamelen"] = viewnamelen
169 |     elementdict["macviewtblnamelen"] = viewtblnamelen
170 | 
171 |     return elementcount, elementdict
172 | 
173 | def contentanalysis(infile,outcsv):
174 |     """
175 |     Triggered if the 'c' switch is supplied.
176 |     Enumerates the tables, indexes, triggers, etc... and enumerates the rows in each.
177 |     """
178 |     print "\n[CONTENT ANALYSIS]"
179 |     print "\n <CONNECTING TO DB...>"
180 |     try:
181 |         dbconn = sqlite3.connect(infile)
182 |         dbcurs = dbconn.cursor()
183 |     except:
184 |         logging.error("Could not connect to SQLite DB - Exiting...")
185 |         sys.exit(1)
186 | 
187 |     print " <GENERATING TABLE CONTENT REPORT>\n"
188 |     elementCount, elementDict = getElements(dbcurs)
189 | 
190 |     logging.info("Total elements identified in database: %s" % str(elementCount))
191 |     logging.info(" - # of Tables: %s" % str(len(elementDict["tables"])))
192 |     logging.info(" - # of Indexes: %s" % str(len(elementDict["indexes"])))
193 |     logging.info(" - # of Triggers: %s" % str(len(elementDict["triggers"])))
194 |     logging.info(" - # of Views: %s" % str(len(elementDict["views"])))
195 | 
196 |     if (elementCount > 0):
197 |         rowdata = list()
198 |     # TABLES - Collect, Print, and Export.
199 |         if len(elementDict["tables"]) > 0:
200 |             for tablename in elementDict["tables"]:
201 |                 rowcount = getRowCount(tablename[0],dbcurs)
202 |                 rowdata.append([rowcount,0])
203 | 
204 |             row_format = "{:^4} {:<%s} {:<12}" % str(elementDict["maxtablenamelen"] + 1)
205 |             column_header = ['#','Table Name', 'Row Count']
206 |             column_divider = ['-','----------','---------']
207 | 
208 |             print "\n{TABLES}\n"
209 |             print row_format.format(*column_header)
210 |             print row_format.format(*column_divider)
211 |             outcsv.writerow(["{TABLES}"])
212 |             outcsv.writerow(["#","Table Name","Row Count","Rootpage","SQL Statement"])
213 | 
214 |             for table, row in zip(elementDict["tables"], rowdata):
215 |                 print row_format.format(elementDict["tables"].index(table)+1, table[0], *row)
216 |                 outcsv.writerow([elementDict["tables"].index(table)+1, table[0],row[0],table[2],table[3].replace(os.linesep,"")])
217 | 
218 |     # INDEXES - Collect, Print, and Export.
219 |         if len(elementDict["indexes"]) > 0:
220 |             row_format = "{:^4} {:<%s} {:<%s}" % (str(elementDict["maxindexnamelen"] + 5), str(elementDict["maxindextblnamelen"] + 1))
221 |             column_header = ['#','Index Name', 'Associated Table']
222 |             column_divider = ['-','----------','----------------']
223 | 
224 |             print "\n{INDEXES}\n"
225 |             print row_format.format(*column_header)
226 |             print row_format.format(*column_divider)
227 |             outcsv.writerow(["{INDEXES}"])
228 |             outcsv.writerow(["#","Index Name","Associated Table Name","Rootpage","SQL Statement"])
229 | 
230 |             for index in elementDict["indexes"]:
231 |                 if index[3] is None:
232 |                     index[3] = "<EMPTY>"
233 |                 print row_format.format(elementDict["indexes"].index(index)+1, index[0], index[1])
234 |                 outcsv.writerow([elementDict["indexes"].index(index)+1,index[0],index[1],index[2],index[3].replace(os.linesep,"")])
235 | 
236 |     # TRIGGERS - Collect, Print, and Export.
237 |         if len(elementDict["triggers"]) > 0:
238 |             row_format = "{:^4} {:<15} {:<%s}" % (str(elementDict["maxtriggernamelen"] + 5))
239 |             column_header = ['#','Triggering Type', 'Associated Table']
240 |             column_divider = ['-','---------------','----------------']
241 | 
242 |             print "\n{TRIGGERS}\n"
243 |             print row_format.format(*column_header)
244 |             print row_format.format(*column_divider)
245 |             outcsv.writerow(["{TRIGGERS}"])
246 |             outcsv.writerow(["#","Triggering Type","Associated Table Name","Rootpage","SQL Statement"])
247 | 
248 |             for trigger in elementDict["triggers"]:
249 |                 print row_format.format(elementDict["triggers"].index(trigger)+1, trigger[0], trigger[1])
250 |                 outcsv.writerow([elementDict["triggers"].index(trigger)+1,trigger[0],trigger[1],trigger[2],trigger[3].replace(os.linesep,"")])
251 | 
252 |     # VIEWS - Collect, Print, and Export.
253 |         if len(elementDict["views"]) > 0:
254 |             for viewname in elementDict["views"]:
255 |                 rowcount = getRowCount(viewname[0],dbcurs)
256 |                 rowdata.append([rowcount,0])
257 | 
258 |             row_format = "{:^4} {:<%s} {:<12}" % str(elementDict["maxviewnamelen"] + 1)
259 |             column_header = ['#','View Name', 'Row Count']
260 |             column_divider = ['-','----------','---------']
261 | 
262 |             print "\n{VIEWS}\n"
263 |             print row_format.format(*column_header)
264 |             print row_format.format(*column_divider)
265 |             outcsv.writerow(["{VIEWS}"])
266 |             outcsv.writerow(["#","View Name","Row Count","Rootpage","SQL Statement"])
267 | 
268 |             for view, row in zip(elementDict["views"], rowdata):
269 |                 print row_format.format(elementDict["views"].index(view)+1, view[0], *row)
270 |                 outcsv.writerow([elementDict["views"].index(view)+1, view[0],row[0],view[2],view[3].replace(os.linesep,"")])
271 |     else:
272 |         logging.info("WARNING: Database does not contain any elements.")
273 | 
274 | def dumpActiveRows(header,outactivecsv,pagetypedict):
275 |     """
276 |     Triggered if the 'a' switch is supplied.
277 |     Export all active row content into CSV format. Useful for grep, manual review, etc...
278 |     The first value of each line is the Page Offset.
279 |     """
280 |     i=0
281 |     print "\n[DUMP ACTIVE CONTENT]"
282 |     print " <PARSING LEAF TABLE PAGES FOR ACTIVE CELL CONTENT>\n"
283 |     for page in pagetypedict['leaftable']:
284 |         for row in header.getActiveRowContent(page,header.headerdict['pagesize']):
285 |             row.insert(0,page)
286 |             outactivecsv.writerow(row)
287 |             i+=1
288 |             if((i%5000)==0):
289 |                 print "  %s cells exported..." % str(i)
290 |     logging.info("Active cell export complete; %s cells exported." % str(i))
291 | 
292 | def dumpUnallocated(header,outunalloctsv,pagetypedict):
293 |     """
294 |     Triggered if the 'u' switch is supplied.
295 |     Export all unallocated data to a tab-delimited file.
296 |     """
297 |     i=0
298 |     print "\n[DUMP UNALLOCATED CONTENT]"
299 |     outunalloctsv.writerow(["Offset","Unallocated Type","Block Length","Printable Data"])
300 |     print " <PARSING LEAF TABLE PAGES FOR UNALLOCATED CONTENT>\n"
301 |     for page in pagetypedict['leaftable']:
302 |         unalloclist = header.getUnallocContent(page,header.headerdict['pagesize'])
303 |         for row in unalloclist:
304 |             outunalloctsv.writerow(row)
305 |             i+=1
306 |             if((i%5000)==0):
307 |                 print "  %s unallocated blocks exported..." % str(i)
308 |     logging.info("Unallocated block export complete; %s blocks exported." % str(i))
309 | 
310 | def mapPages(header, outcsv):
311 |     """
312 |     Triggered if the 'm' switch is supplied.
313 |     Generate a visual map of the database's page type distribution.
314 |     """
315 |     print "\n[PAGE MAP]\n"
316 |     pagemap = header.mapPages(header.headerdict['pagesize'])
317 |     mapheaderfields = (("Page Map"),
318 |                        ("Interior Index Pages (i)"),
319 |                        ("Interior Table Pages (t)"),
320 |                        ("Leaf Index Pages (I)"),
321 |                        ("Leaf Table Pages (T)"),
322 |                        ("Header Pages (H)"),
323 |                        ("Overflow Pages (O)"),
324 |                        ("Total Identified Pages"))
325 | 
326 |     outcsv.writerow(["{PAGE MAP}"])
327 |     outcsv.writerow(["Page Statistics","Value"])
328 | 
329 |     i = pagecount = 0
330 |     for value in mapheaderfields:
331 |         if (i == 0):
332 |             j=0
333 |             rowlabel = "{:>%s}" % len(str(len(pagemap[0]))+"  ")
334 |             print rowlabel.format("  ") + "0       8      16      24      31"
335 |             print rowlabel.format("  ") + "|.......|.......|.......|......."
336 |             while True:
337 |                 print rowlabel.format(str(j) + ": ") + pagemap[0][j:j+32]
338 |                 j+=32
339 |                 if (j>len(pagemap[0])):
340 |                     print ""
341 |                     break
342 |         else:
343 |             print " %s: %s" % (value,pagemap[i])
344 |         outcsv.writerow((value,pagemap[i]))
345 |         i+=1
346 | 
347 | 
348 | def setupLogging(outfile):
349 |     """
350 |     Configure basic logging and populate the log with bibliographic info.
351 |     """
352 |     logfile = outfile + ".log"
353 |     logging.basicConfig(filename=os.path.join(logfile),level=logging.DEBUG, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%H:%M:%S')
354 |     console = logging.StreamHandler()
355 |     console.setLevel(logging.INFO)
356 |     console.setFormatter(logging.Formatter('%(message)s'))
357 |     logging.getLogger('').addHandler(console)
358 |     logging.info("""\n   _____ ____    __    _ __    _____
359 |   / ___// __ \  / /   (_) /___/__  / Notional Labs 2013
360 |   \__ \/ / / / / /   / / __/ _ \/ / / _ \/ ___/
361 |  ___/ / /_/ / / /___/ / /_/  __/ /_/  __/ /
362 | /____/\___\_\/_____/_/\__/\___/____|___/_/
363 | Forensic SQLite Database Analyser and Reporting Tool         """)
364 |     print "-------------------------------------------------------"
365 |     logging.info(" Version ["+version+"] Build ["+build+"] Author [James E. Hung]")
366 |     print "-------------------------------------------------------"
367 |     return
368 | 
369 | def validateArgs():
370 |     """
371 |     Validate input arguments.
372 |     """
373 |     parser = argparse.ArgumentParser(description="Notional-Labs.com: SQLiteZer")
374 |     parser.add_argument('-i','--input', help='Target SQLite database file.', required=True)
375 |     parser.add_argument('-o','--output', help='Output job name (exclude file extension).', required=True)
376 |     parser.add_argument('-a','--active', help='OPTIONAL: Dump all raw active records into a CSV.', action='store_true')
377 |     parser.add_argument('-c','--content', help='OPTIONAL: Generate content report.', action='store_true')
378 |     parser.add_argument('-m','--pagemap', help='OPTIONAL: Print a visual map of the physical page distribution', action='store_true')
379 |     parser.add_argument('-u','--unalloc', help='OPTIONAL: Dump all unallocated areas of each page into a CSV.', action='store_true')
380 |     parser.add_argument('-x','--debug', help='OPTIONAL: Developers Only - Enable debug mode.', action='store_true')
381 | 
382 |     args = vars(parser.parse_args())
383 | 
384 |     try:
385 |         with open(args['input']): pass
386 |     except IOError:
387 |         print "Target SQLite DB file does not exist or cannot be opened. Exiting..."
388 |         sys.exit(1)
389 | 
390 |     return args['output'],args['input'],args['pagemap'],args['debug'],args['active'],args['content'],args['unalloc']
391 | 
392 | if __name__ == '__main__':
393 |     main()
394 | 


--------------------------------------------------------------------------------
/_resources/File Format For SQLite Databases.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NotionalLabs/SQLiteZer/17689863b3f8dbb2bc03b52f3bb5d7135cd80f5d/_resources/File Format For SQLite Databases.pdf


--------------------------------------------------------------------------------
/_resources/Forensics from the sausage factory_ An analysis of the record structure within SQLite databases.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NotionalLabs/SQLiteZer/17689863b3f8dbb2bc03b52f3bb5d7135cd80f5d/_resources/Forensics from the sausage factory_ An analysis of the record structure within SQLite databases.pdf


--------------------------------------------------------------------------------
/_resources/Forensics from the sausage factory_ Carving SQLite databases from unallocated clusters.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NotionalLabs/SQLiteZer/17689863b3f8dbb2bc03b52f3bb5d7135cd80f5d/_resources/Forensics from the sausage factory_ Carving SQLite databases from unallocated clusters.pdf


--------------------------------------------------------------------------------
/_resources/Forensics from the sausage factory_ SQLite Pointer Maps pages.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NotionalLabs/SQLiteZer/17689863b3f8dbb2bc03b52f3bb5d7135cd80f5d/_resources/Forensics from the sausage factory_ SQLite Pointer Maps pages.pdf


--------------------------------------------------------------------------------
/_resources/Forensics from the sausage factory_ SQLite overflow pages and other loose ends...pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NotionalLabs/SQLiteZer/17689863b3f8dbb2bc03b52f3bb5d7135cd80f5d/_resources/Forensics from the sausage factory_ SQLite overflow pages and other loose ends...pdf


--------------------------------------------------------------------------------
/_resources/Sqlite_carving_extractAndroidData.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NotionalLabs/SQLiteZer/17689863b3f8dbb2bc03b52f3bb5d7135cd80f5d/_resources/Sqlite_carving_extractAndroidData.pdf


--------------------------------------------------------------------------------
/_resources/research.txt:
--------------------------------------------------------------------------------
1 | http://forensicsfromthesausagefactory.blogspot.com/2011/04/carving-sqlite-databases-from.html
2 | 
3 | http://forensicsfromthesausagefactory.blogspot.com/2011/05/analysis-of-record-structure-within.html
4 | 
5 | http://sandbox.dfrws.org/2011/fox-it/DFRWS2011_results/Report/Sqlite_carving_extractAndroidData.pdf
6 | 
7 | https://viaforensics.com/computer-forensics/sqlite-forensics-presentation-andrew-hoog.html
8 | 
9 | https://mobileforensics.wordpress.com/category/sqlite/


--------------------------------------------------------------------------------