├── .gitignore
├── LICENSE
├── README.md
├── cbh_jupyter.ipynb
├── ignore.py
├── main.py
├── requirements.txt
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | # .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | *.json
163 | 
164 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Mark Melnic
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Visualize Your Chrome Browsing History
 3 | 
 4 | A lightweight tool to visualize your Chrome browsing history using Google Takeout data.
 5 | 
 6 | ![Chart Example](https://i.imgur.com/sJCc9gq.png)
 7 | 
 8 | ---
 9 | 
10 | ## Getting Started
11 | 
12 | Follow these steps to visualize your browsing history:
13 | 
14 | ### 1. Initial Setup
15 | 
16 | Ensure you meet all requirements by following [this guide](https://gist.github.com/markmelnic/b5a6d399b2c08008c989829cbf9c3618).
17 | 
18 | ### 2. Download Your Browsing Data
19 | 
20 | 1. Learn how to download your Google data: [Google Takeout Guide](https://support.google.com/accounts/answer/3024190?hl=en).  
21 | 2. Download your data from [Google Takeout](https://takeout.google.com/).
22 | 
23 | > **Note:** You only need to download your Chrome browsing history. Google provides it as a JSON file by default.
24 | 
25 | Save the `History.json` file in the same directory as the script.
26 | 
27 | ### 3. Run the Script
28 | 
29 | Open your terminal or command prompt and execute the following command:
30 | 
31 | ```shell
32 | python main.py <file>
33 | ```
34 | 
35 | Replace `<file>` with the name of your downloaded `History.json` file (the name may vary).
36 | 
37 | ---
38 | 
39 | ## Script Usage
40 | 
41 | ### Command Syntax
42 | 
43 | ```shell
44 | main.py <file> [-h] [-s SIZE] [-d DAYS]
45 | ```
46 | 
47 | #### Positional Arguments:
48 | - **`<file>`**: The JSON file downloaded from Google Takeout.
49 | 
50 | #### Optional Arguments:
51 | - **`-h`**: Display help information.
52 | - **`-s, --size`**: Number of top sites to display (default: 20).
53 | - **`-d, --days`**: Number of recent days to analyze (default: 60).
54 | 
55 | ---
56 | 
57 | ## Examples
58 | 
59 | 1. Basic usage with default settings:
60 |    ```shell
61 |    python main.py History.json
62 |    ```
63 | 
64 | 2. Display data from the last 50 days:
65 |    ```shell
66 |    python main.py History.json -d 50
67 |    ```
68 | 
69 | 3. Show the top 30 sites:
70 |    ```shell
71 |    python main.py History.json --size 30
72 |    ```
73 | 
74 | 4. Analyze 90 days and display the top 50 sites:
75 |    ```shell
76 |    python main.py History.json -s 50 --days 90
77 |    ```
78 | 


--------------------------------------------------------------------------------
/cbh_jupyter.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from utils import generate_plots, chart_json, hist_json\n",
10 |     "\n",
11 |     "FILE = 'BrowserHistory.json'"
12 |    ]
13 |   },
14 |   {
15 |    "cell_type": "code",
16 |    "execution_count": null,
17 |    "metadata": {},
18 |    "outputs": [],
19 |    "source": [
20 |     "data = [\n",
21 |     "    chart_json(FILE, 365),\n",
22 |     "    hist_json(FILE, 365),\n",
23 |     "]"
24 |    ]
25 |   },
26 |   {
27 |    "cell_type": "code",
28 |    "execution_count": null,
29 |    "metadata": {},
30 |    "outputs": [],
31 |    "source": [
32 |     "# using defaults: 10 for size and 365 for days\n",
33 |     "generate_plots(data, 10, 365)"
34 |    ]
35 |   }
36 |  ],
37 |  "metadata": {
38 |   "kernelspec": {
39 |    "display_name": "Python 3.8.1 64-bit",
40 |    "name": "python38164bit4531da3462464465a8c5836a34dda191"
41 |   },
42 |   "language_info": {
43 |    "codemirror_mode": {
44 |     "name": "ipython",
45 |     "version": 3
46 |    },
47 |    "file_extension": ".py",
48 |    "mimetype": "text/x-python",
49 |    "name": "python",
50 |    "nbconvert_exporter": "python",
51 |    "pygments_lexer": "ipython3",
52 |    "version": "3.8.1-final"
53 |   },
54 |   "orig_nbformat": 2
55 |  },
56 |  "nbformat": 4,
57 |  "nbformat_minor": 2
58 | }
59 | 


--------------------------------------------------------------------------------
/ignore.py:
--------------------------------------------------------------------------------
1 | 
2 | IGNORE = [
3 |     'chrome://',
4 |     'chrome-search://',
5 |     'file://',
6 |     '.jpg',
7 |     '.png']
8 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s]: %(message)s')
 3 | 
 4 | from utils import generate_plots, chart_json, hist_json
 5 | from argparse import ArgumentParser, RawTextHelpFormatter
 6 | 
 7 | 
 8 | if __name__ == '__main__':
 9 |     parser = ArgumentParser(formatter_class=RawTextHelpFormatter)
10 | 
11 |     parser.add_argument("file", metavar="file", type=str, help="Google takeout JSON file.")
12 |     parser.add_argument("-s", "--size", dest="size", type=int, required=False,
13 |     help="Number of top sites to be displayed.", default=20)
14 |     parser.add_argument("-d", "--days", dest="days", type=int, required=False,
15 |     help="Number of x last days of data to be displayed.", default=60)
16 | 
17 |     args = parser.parse_args()
18 | 
19 |     logging.info("(1/2): Processing data")
20 |     data = [
21 |         chart_json(args.file, args.days),
22 |         hist_json(args.file, args.days),
23 |     ]
24 | 
25 |     logging.info("(2/2): Generating graph")
26 |     generate_plots(data, args.size, args.days)
27 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | numpy
3 | scipy
4 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import json, time
  3 | import numpy as np
  4 | from heapq import nlargest
  5 | from operator import itemgetter
  6 | import matplotlib.pyplot as plt
  7 | from urllib.parse import urlparse
  8 | 
  9 | from ignore import IGNORE
 10 | 
 11 | DAY = 86400000 # miliseconds
 12 | 
 13 | # generate plots
 14 | def generate_plots(data : list, size : int, days : int):
 15 |     # define figure
 16 |     fig, (links_plot, act_plot) = plt.subplots(2)
 17 |     fig.tight_layout()
 18 | 
 19 |     # LINKS PLOT
 20 |     links_plot.invert_yaxis()
 21 |     links_plot.set_xlabel('Number of visits')
 22 |     links_plot.set_title('Top %d most visited websites in the last %d days' % (size, days))
 23 |     # plot variables
 24 |     sorted_data = nlargest(size, data[0], key=itemgetter(1))
 25 |     sitenames = [el[0] for el in sorted_data]
 26 |     occurences = [el[1] for el in sorted_data]
 27 | 
 28 |     # set figure variables
 29 |     links_plot.barh(np.arange(len(sitenames)), occurences, align='center', color="#247ba0")
 30 |     links_plot.set_yticks(np.arange(len(sitenames)))
 31 |     links_plot.set_yticklabels(sitenames, minor=False)
 32 |     for ind, oc in enumerate(occurences):
 33 |         links_plot.text(oc + oc * 0.005, ind + .25, str(oc), color='#565656')
 34 | 
 35 |     # ACTIVITY PLOT
 36 |     act_plot.set_ylabel('Nr. of links visited')
 37 |     act_plot.set_xlabel('Nr. of days ago from today')
 38 |     act_plot.set_title('Number of links visited per day in the last %d days.\n This histogram is meant to display consistency.' % (days))
 39 |     # separate data
 40 |     days_ago = [el[0] for el in data[1]]
 41 |     instances = [el[1] for el in data[1]]
 42 |     act_plot.plot(np.arange(len(days_ago)), instances, label="Nr. of listings", color="#247ba0")
 43 | 
 44 |     # show plot
 45 |     plt.rcdefaults()
 46 |     plt.show()
 47 | 
 48 | # process json file
 49 | def chart_json(json_file : str, days : int) -> list:
 50 |     required_time = int(round(time.time() * 1000)) - (days * DAY)
 51 |     # read json file
 52 |     with open(json_file, mode = 'r', encoding="utf8") as data_file:
 53 |         data = json.load(data_file)
 54 | 
 55 |     sites = []
 56 |     instances = []
 57 |     # process data
 58 |     for data_set in data['Browser History']:
 59 |         #icon = data_set['favicon_url']
 60 |         timestamp = round(int(data_set['time_usec']) / 1000)
 61 |         if required_time > timestamp:
 62 |             continue
 63 | 
 64 |         url = data_set['url']
 65 |         try:
 66 |             url = urlparse(url).netloc.replace("www.", "")
 67 |         except AssertionError:
 68 |             continue
 69 | 
 70 |         if url not in sites:
 71 |             # index url
 72 |             sites.append(url)
 73 |             instances.append(1)
 74 |         else:
 75 |             # increment instance
 76 |             instances[sites.index(url)] += 1
 77 | 
 78 |     # merge url and instance to a tuple
 79 |     return [(site, inst) for site, inst in zip(sites, instances)]
 80 | 
 81 | # built histogram dataset
 82 | def hist_json(json_file : str, days : int) -> list:
 83 |     current_time = int(round(time.time() * 1000))
 84 |     # read json file
 85 |     with open(json_file, mode = 'r', encoding="utf8") as data_file:
 86 |         data = json.load(data_file)
 87 | 
 88 |     day_nr = 1
 89 |     counter = 0
 90 |     time_indexes = []
 91 |     # process data
 92 |     for data_set in data['Browser History']:
 93 |         timestamp = round(int(data_set['time_usec']) / 1000)
 94 |         if current_time > timestamp:
 95 |             time_indexes.append((day_nr, counter))
 96 |             day_nr += 1
 97 |             counter = 0
 98 |             current_time = current_time - DAY
 99 |             continue
100 |         counter += 1
101 | 
102 |         if day_nr > days:
103 |             break
104 | 
105 |     return time_indexes
106 | 


--------------------------------------------------------------------------------