├── .DS_Store ├── .travis.yml ├── LICENSE.txt ├── README.md ├── browserhistory.py └── setup.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kcp18/browserhistory/f76863350654bed103e3e43e42acfdf5e3f927e6/.DS_Store -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | script: 3 | - pytest 4 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 Chanwoo Park 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BrowserHistory Module 2 | 3 | browserhistory is a simple Python module that extracts browser history from a user's local computer and writes the data to csv files. 4 | 5 | Platforms: Linux, MacOS, and Windows. 6 | Suported Browsers: Firefox, Google Chrome, and Safari. 7 | 8 | ## Installation 9 | ```sh 10 | $ python3 -m pip install browserhistory 11 | ``` 12 | 13 | or 14 | 15 | ```sh 16 | $ git clone https://github.com/kcp18/browserhistory 17 | ``` 18 | 19 | ## Overview 20 | ### Functions: 21 | - get_browserhistory() -> dict 22 | - get_database_paths() -> dict 23 | - get_username() -> str 24 | - write_browserhistory_csv() -> None 25 | 26 | 27 | #### Example: 28 | ```python 29 | Example 30 | ======= 31 | >>> import browserhistory as bh 32 | >>> dict_obj = bh.get_browserhistory() 33 | >>> dict_obj.keys() 34 | >>> dict_keys(['safari', 'chrome', 'firefox']) 35 | >>> dict_obj['safari'][0] 36 | >>> ('https://mail.google.com', 'Mail', '2018-08-14 08:27:26') 37 | # Write the data to csv files in the current working directory. 38 | # safari_browserhistory.csv, chrome_browserhistory.csv, and firefox_browerhistory.csv. 39 | >>> bh.write_browserhistory_csv() 40 | # Create csv files that contain broswer history 41 | ``` 42 | 43 | ### The Description of browserhistory 44 | 45 | ``` 46 | NAME 47 | 48 | browserhistory 49 | 50 | FUNCTIONS 51 | 52 | get_browserhistory() -> dict 53 | Get the user's browser history by using sqlite3 module to connect to the dabase. 54 | It returns a dictionary: its key is a name of browser in str and its value is a list of 55 | tuples, each tuple contains four elements, including url, title, and visited_time. 56 | 57 | get_database_paths() -> dict 58 | Get paths to the database of browsers and store them in a dictionary. 59 | It returns a dictionary: its key is the name of browser in str and its value is the path to database in str. 60 | 61 | get_username() -> str 62 | Get username based on their local computers 63 | 64 | write_browserhistory_csv() -> None 65 | It writes csv files that contain the browser history in 66 | the current working directory. It will writes csv files base on 67 | the name of browsers the program detects. 68 | ``` 69 | 70 | ### Issue Report 71 | 72 | 73 | If you have any questions or find bugs in the module, 74 | please report the issues/questions at the follwing address. 75 | 76 | - https://github.com/kcp18/browserhistory/issues 77 | 78 | 79 | -------------------------------------------------------------------------------- /browserhistory.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | import sqlite3 4 | import sys 5 | from datetime import datetime 6 | 7 | 8 | # platform_table maps the name of user's OS to a platform code 9 | platform_table = { 10 | 'linux': 0, 11 | 'linux2': 0, 12 | 'darwin': 1, 13 | 'cygwin': 2, 14 | 'win32': 2, 15 | } 16 | 17 | # it supports Linux, MacOS, and Windows platforms. 18 | try: 19 | user_platformcode = platform_table[sys.platform] 20 | except KeyError: 21 | class NotAvailableOS(Exception): 22 | pass 23 | raise NotAvailableOS("It does not support your OS.") 24 | 25 | 26 | def get_username() -> str: 27 | """ 28 | Get username based on their local computers 29 | """ 30 | platform_code = user_platformcode 31 | cwd_path = os.getcwd() 32 | cwd_path_list = [] 33 | # if it is a macOS 34 | if platform_code == 1: 35 | cwd_path_list = cwd_path.split('/') 36 | # if it is a windows 37 | elif platform_code == 2: 38 | cwd_path_list = cwd_path.split('\\') 39 | # if it is a linux 40 | else: 41 | cwd_path_list = cwd_path.split('/') 42 | return cwd_path_list[2] 43 | 44 | 45 | def get_database_paths() -> dict: 46 | """ 47 | Get paths to the database of browsers and store them in a dictionary. 48 | It returns a dictionary: its key is the name of browser in str and its value is the path to database in str. 49 | """ 50 | platform_code = user_platformcode 51 | browser_path_dict = dict() 52 | # if it is a macOS 53 | if platform_code == 1: 54 | cwd_path = os.getcwd() 55 | cwd_path_list = cwd_path.split('/') 56 | # it creates string paths to broswer databases 57 | abs_safari_path = os.path.join('/', cwd_path_list[1], cwd_path_list[2], 'Library', 'Safari', 'History.db') 58 | abs_chrome_path = os.path.join('/', cwd_path_list[1], cwd_path_list[2], 'Library', 'Application Support', 'Google/Chrome/Default', 'History') 59 | abs_firefox_path = os.path.join('/', cwd_path_list[1], cwd_path_list[2], 'Library', 'Application Support', 'Firefox/Profiles') 60 | # check whether the databases exist 61 | if os.path.exists(abs_safari_path): 62 | browser_path_dict['safari'] = abs_safari_path 63 | if os.path.exists(abs_chrome_path): 64 | browser_path_dict['chrome'] = abs_chrome_path 65 | if os.path.exists(abs_firefox_path): 66 | firefox_dir_list = os.listdir(abs_firefox_path) 67 | # it looks for a directory that ends '.default' 68 | for f in firefox_dir_list: 69 | if f.find('.default') > 0: 70 | abs_firefox_path = os.path.join(abs_firefox_path, f, 'places.sqlite') 71 | # check whether the firefox database exists 72 | if os.path.exists(abs_firefox_path): 73 | browser_path_dict['firefox'] = abs_firefox_path 74 | # if it is a windows 75 | if platform_code == 2: 76 | homepath = os.path.expanduser("~") 77 | abs_chrome_path = os.path.join(homepath, 'AppData', 'Local', 'Google', 'Chrome', 'User Data', 'Default', 'History') 78 | abs_firefox_path = os.path.join(homepath, 'AppData', 'Roaming', 'Mozilla', 'Firefox', 'Profiles') 79 | # it creates string paths to broswer databases 80 | if os.path.exists(abs_chrome_path): 81 | browser_path_dict['chrome'] = abs_chrome_path 82 | if os.path.exists(abs_firefox_path): 83 | firefox_dir_list = os.listdir(abs_firefox_path) 84 | for f in firefox_dir_list: 85 | if f.find('.default') > 0: 86 | abs_firefox_path = os.path.join(abs_firefox_path, f, 'places.sqlite') 87 | if os.path.exists(abs_firefox_path): 88 | browser_path_dict['firefox'] = abs_firefox_path 89 | # if it is a linux and it has only a firefox 90 | if platform_code == 0: 91 | cwd_path = os.getcwd() 92 | cwd_path_list = cwd_path.split('/') 93 | # it creates string paths to broswer databases 94 | abs_firefox_path = os.path.join('/', cwd_path_list[1], cwd_path_list[2], '.mozilla', 'firefox') 95 | # check whether the path exists 96 | if os.path.exists(abs_firefox_path): 97 | firefox_dir_list = os.listdir(abs_firefox_path) 98 | # it looks for a directory that ends '.default' 99 | for f in firefox_dir_list: 100 | if f.find('.default') > 0: 101 | abs_firefox_path = os.path.join(abs_firefox_path, f, 'places.sqlite') 102 | # check whether the firefox database exists 103 | if os.path.exists(abs_firefox_path): 104 | browser_path_dict['firefox'] = abs_firefox_path 105 | 106 | return browser_path_dict 107 | 108 | 109 | def get_browserhistory() -> dict: 110 | """Get the user's browsers history by using sqlite3 module to connect to the dabases. 111 | It returns a dictionary: its key is a name of browser in str and its value is a list of 112 | tuples, each tuple contains four elements, including url, title, and visited_time. 113 | 114 | Example 115 | ------- 116 | >>> import browserhistory as bh 117 | >>> dict_obj = bh.get_browserhistory() 118 | >>> dict_obj.keys() 119 | >>> dict_keys(['safari', 'chrome', 'firefox']) 120 | >>> dict_obj['safari'][0] 121 | >>> ('https://mail.google.com', 'Mail', '2018-08-14 08:27:26') 122 | """ 123 | # browserhistory is a dictionary that stores the query results based on the name of browsers. 124 | browserhistory = {} 125 | 126 | # call get_database_paths() to get database paths. 127 | paths2databases = get_database_paths() 128 | 129 | for browser, path in paths2databases.items(): 130 | try: 131 | conn = sqlite3.connect(path) 132 | cursor = conn.cursor() 133 | _SQL = '' 134 | # SQL command for browsers' database table 135 | if browser == 'chrome': 136 | _SQL = """SELECT url, title, datetime((last_visit_time/1000000)-11644473600, 'unixepoch', 'localtime') 137 | AS last_visit_time FROM urls ORDER BY last_visit_time DESC""" 138 | elif browser == 'firefox': 139 | _SQL = """SELECT url, title, datetime((visit_date/1000000), 'unixepoch', 'localtime') AS visit_date 140 | FROM moz_places INNER JOIN moz_historyvisits on moz_historyvisits.place_id = moz_places.id ORDER BY visit_date DESC""" 141 | elif browser == 'safari': 142 | _SQL = """SELECT url, title, datetime(visit_time + 978307200, 'unixepoch', 'localtime') 143 | FROM history_visits INNER JOIN history_items ON history_items.id = history_visits.history_item ORDER BY visit_time DESC""" 144 | else: 145 | pass 146 | # query_result will store the result of query 147 | query_result = [] 148 | try: 149 | cursor.execute(_SQL) 150 | query_result = cursor.fetchall() 151 | except sqlite3.OperationalError: 152 | print('* Notification * ') 153 | print('Please Completely Close ' + browser.upper() + ' Window') 154 | except Exception as err: 155 | print(err) 156 | # close cursor and connector 157 | cursor.close() 158 | conn.close() 159 | # put the query result based on the name of browsers. 160 | browserhistory[browser] = query_result 161 | except sqlite3.OperationalError: 162 | print('* ' + browser.upper() + ' Database Permission Denied.') 163 | 164 | return browserhistory 165 | 166 | 167 | def write_browserhistory_csv() -> None: 168 | """It writes csv files that contain the browser history in 169 | the current working directory. It will writes csv files base on 170 | the name of browsers the program detects.""" 171 | browserhistory = get_browserhistory() 172 | for browser, history in browserhistory.items(): 173 | with open(browser + '_history.csv', mode='w', encoding='utf-8', newline='') as csvfile: 174 | csv_writer = csv.writer(csvfile, delimiter=',', 175 | quoting=csv.QUOTE_ALL) 176 | for data in history: 177 | csv_writer.writerow(data) 178 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | 7 | setup( 8 | name='browserhistory', 9 | version='0.1.2', 10 | author='Chanwoo Park', 11 | author_email='parkchan@brandeis.edu', 12 | description="A simple module to extract browsers's history.", 13 | long_description=long_description, 14 | long_description_content_type="text/markdown", 15 | license='MIT', 16 | url='https://github.com/kcp18/browserhistory', 17 | py_modules=['browserhistory'], 18 | platforms=['Linux', 'MacOS', 'Windows'], 19 | classifiers=[ 20 | "Programming Language :: Python :: 3.5", 21 | "License :: OSI Approved :: MIT License", 22 | "Operating System :: POSIX :: Linux", 23 | "Operating System :: MacOS", 24 | "Operating System :: Microsoft :: Windows", 25 | "Topic :: Internet :: WWW/HTTP :: Browsers", 26 | "Topic :: Database", 27 | ], 28 | ) 29 | --------------------------------------------------------------------------------