├── .gitignore ├── LICENSE ├── Pipfile ├── Pipfile.lock ├── README.md ├── analyzer.py ├── screenshots └── tool.png └── src ├── helpers.py ├── metrics.py ├── processor.py └── service.py /.gitignore: -------------------------------------------------------------------------------- 1 | credentials.json 2 | *.pickle 3 | tt*.py 4 | NOTES 5 | TODO 6 | *.mo 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | venv/ 11 | target/ 12 | .DS_Store 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2019 M. Hasbini 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | google-api-python-client = "*" 10 | google-auth-httplib2 = "*" 11 | google-auth-oauthlib = "*" 12 | colorama = "*" 13 | progress = "*" 14 | termtables = "*" 15 | termgraph = "*" 16 | ascii-graph = "*" 17 | agate = "*" 18 | termcolor = "*" 19 | argparse = "*" 20 | 21 | [requires] 22 | python_version = "3.7" 23 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "2b98faaf1aeaff58bd8263fa6fcd4b76cc5985e48f53a868b5849f7af1e72e04" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.7" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "agate": { 20 | "hashes": [ 21 | "sha256:48d6f80b35611c1ba25a642cbc5b90fcbdeeb2a54711c4a8d062ee2809334d1c", 22 | "sha256:c93aaa500b439d71e4a5cf088d0006d2ce2c76f1950960c8843114e5f361dfd3" 23 | ], 24 | "index": "pypi", 25 | "version": "==1.6.1" 26 | }, 27 | "argparse": { 28 | "hashes": [ 29 | "sha256:62b089a55be1d8949cd2bc7e0df0bddb9e028faefc8c32038cc84862aefdd6e4", 30 | "sha256:c31647edb69fd3d465a847ea3157d37bed1f95f19760b11a47aa91c04b666314" 31 | ], 32 | "index": "pypi", 33 | "version": "==1.4.0" 34 | }, 35 | "ascii-graph": { 36 | "hashes": [ 37 | "sha256:c1844fe309cd221f35f19efc58c5c7157941e35172d486d7c824ba5ad1d05f71" 38 | ], 39 | "index": "pypi", 40 | "version": "==1.5.1" 41 | }, 42 | "babel": { 43 | "hashes": [ 44 | "sha256:1aac2ae2d0d8ea368fa90906567f5c08463d98ade155c0c4bfedd6a0f7160e38", 45 | "sha256:d670ea0b10f8b723672d3a6abeb87b565b244da220d76b4dba1b66269ec152d4" 46 | ], 47 | "version": "==2.8.0" 48 | }, 49 | "cachetools": { 50 | "hashes": [ 51 | "sha256:1d057645db16ca7fe1f3bd953558897603d6f0b9c51ed9d11eb4d071ec4e2aab", 52 | "sha256:de5d88f87781602201cde465d3afe837546663b168e8b39df67411b0bf10cefc" 53 | ], 54 | "version": "==4.1.0" 55 | }, 56 | "certifi": { 57 | "hashes": [ 58 | "sha256:1d987a998c75633c40847cc966fcf5904906c920a7f17ef374f5aa4282abd304", 59 | "sha256:51fcb31174be6e6664c5f69e3e1691a2d72a1a12e90f872cbdb1567eb47b6519" 60 | ], 61 | "version": "==2020.4.5.1" 62 | }, 63 | "chardet": { 64 | "hashes": [ 65 | "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", 66 | "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" 67 | ], 68 | "version": "==3.0.4" 69 | }, 70 | "colorama": { 71 | "hashes": [ 72 | "sha256:7d73d2a99753107a36ac6b455ee49046802e59d9d076ef8e47b61499fa29afff", 73 | "sha256:e96da0d330793e2cb9485e9ddfd918d456036c7149416295932478192f4436a1" 74 | ], 75 | "index": "pypi", 76 | "version": "==0.4.3" 77 | }, 78 | "google-api-python-client": { 79 | "hashes": [ 80 | "sha256:3121d55d106ef1a2756e8074239512055bd99eb44da417b3dd680f9a1385adec", 81 | "sha256:a8a88174f66d92aed7ebbd73744c2c319b4b1ce828e565f9ec721352d2e2fb8c" 82 | ], 83 | "index": "pypi", 84 | "version": "==1.7.11" 85 | }, 86 | "google-auth": { 87 | "hashes": [ 88 | "sha256:73b141d122942afe12e8bfdcb6900d5df35c27d39700f078363ba0b1298ad33b", 89 | "sha256:fbf25fee328c0828ef293459d9c649ef84ee44c0b932bb999d19df0ead1b40cf" 90 | ], 91 | "version": "==1.15.0" 92 | }, 93 | "google-auth-httplib2": { 94 | "hashes": [ 95 | "sha256:098fade613c25b4527b2c08fa42d11f3c2037dda8995d86de0745228e965d445", 96 | "sha256:f1c437842155680cf9918df9bc51c1182fda41feef88c34004bd1978c8157e08" 97 | ], 98 | "index": "pypi", 99 | "version": "==0.0.3" 100 | }, 101 | "google-auth-oauthlib": { 102 | "hashes": [ 103 | "sha256:88d2cd115e3391eb85e1243ac6902e76e77c5fe438b7276b297fbe68015458dd", 104 | "sha256:a92a0f6f41a0fb6138454fbc02674e64f89d82a244ea32f98471733c8ef0e0e1" 105 | ], 106 | "index": "pypi", 107 | "version": "==0.4.1" 108 | }, 109 | "httplib2": { 110 | "hashes": [ 111 | "sha256:4f6988e6399a2546b525a037d56da34aed4d149bbdc0e78523018d5606c26e74", 112 | "sha256:b0e1f3ed76c97380fe2485bc47f25235453b40ef33ca5921bb2897e257a49c4c" 113 | ], 114 | "index": "pypi", 115 | "version": "==0.18.0" 116 | }, 117 | "idna": { 118 | "hashes": [ 119 | "sha256:7588d1c14ae4c77d74036e8c22ff447b26d0fde8f007354fd48a7814db15b7cb", 120 | "sha256:a068a21ceac8a4d63dbfd964670474107f541babbd2250d61922f029858365fa" 121 | ], 122 | "version": "==2.9" 123 | }, 124 | "isodate": { 125 | "hashes": [ 126 | "sha256:2e364a3d5759479cdb2d37cce6b9376ea504db2ff90252a2e5b7cc89cc9ff2d8", 127 | "sha256:aa4d33c06640f5352aca96e4b81afd8ab3b47337cc12089822d6f322ac772c81" 128 | ], 129 | "version": "==0.6.0" 130 | }, 131 | "leather": { 132 | "hashes": [ 133 | "sha256:076d1603b5281488285718ce1a5ce78cf1027fe1e76adf9c548caf83c519b988", 134 | "sha256:e0bb36a6d5f59fbf3c1a6e75e7c8bee29e67f06f5b48c0134407dde612eba5e2" 135 | ], 136 | "version": "==0.3.3" 137 | }, 138 | "oauthlib": { 139 | "hashes": [ 140 | "sha256:bee41cc35fcca6e988463cacc3bcb8a96224f470ca547e697b604cc697b2f889", 141 | "sha256:df884cd6cbe20e32633f1db1072e9356f53638e4361bef4e8b03c9127c9328ea" 142 | ], 143 | "version": "==3.1.0" 144 | }, 145 | "parsedatetime": { 146 | "hashes": [ 147 | "sha256:3b835fc54e472c17ef447be37458b400e3fefdf14bb1ffdedb5d2c853acf4ba1", 148 | "sha256:d2e9ddb1e463de871d32088a3f3cea3dc8282b1b2800e081bd0ef86900451667" 149 | ], 150 | "version": "==2.5" 151 | }, 152 | "progress": { 153 | "hashes": [ 154 | "sha256:69ecedd1d1bbe71bf6313d88d1e6c4d2957b7f1d4f71312c211257f7dae64372" 155 | ], 156 | "index": "pypi", 157 | "version": "==1.5" 158 | }, 159 | "pyasn1": { 160 | "hashes": [ 161 | "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d", 162 | "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba" 163 | ], 164 | "version": "==0.4.8" 165 | }, 166 | "pyasn1-modules": { 167 | "hashes": [ 168 | "sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e", 169 | "sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74" 170 | ], 171 | "version": "==0.2.8" 172 | }, 173 | "python-slugify": { 174 | "hashes": [ 175 | "sha256:a8fc3433821140e8f409a9831d13ae5deccd0b033d4744d94b31fea141bdd84c" 176 | ], 177 | "version": "==4.0.0" 178 | }, 179 | "pytimeparse": { 180 | "hashes": [ 181 | "sha256:04b7be6cc8bd9f5647a6325444926c3ac34ee6bc7e69da4367ba282f076036bd", 182 | "sha256:e86136477be924d7e670646a98561957e8ca7308d44841e21f5ddea757556a0a" 183 | ], 184 | "version": "==1.1.8" 185 | }, 186 | "pytz": { 187 | "hashes": [ 188 | "sha256:a494d53b6d39c3c6e44c3bec237336e14305e4f29bbf800b599253057fbb79ed", 189 | "sha256:c35965d010ce31b23eeb663ed3cc8c906275d6be1a34393a1d73a41febf4a048" 190 | ], 191 | "version": "==2020.1" 192 | }, 193 | "requests": { 194 | "hashes": [ 195 | "sha256:43999036bfa82904b6af1d99e4882b560e5e2c68e5c4b0aa03b655f3d7d73fee", 196 | "sha256:b3f43d496c6daba4493e7c431722aeb7dbc6288f52a6e04e7b6023b0247817e6" 197 | ], 198 | "version": "==2.23.0" 199 | }, 200 | "requests-oauthlib": { 201 | "hashes": [ 202 | "sha256:7f71572defaecd16372f9006f33c2ec8c077c3cfa6f5911a9a90202beb513f3d", 203 | "sha256:b4261601a71fd721a8bd6d7aa1cc1d6a8a93b4a9f5e96626f8e4d91e8beeaa6a" 204 | ], 205 | "version": "==1.3.0" 206 | }, 207 | "rsa": { 208 | "hashes": [ 209 | "sha256:14ba45700ff1ec9eeb206a2ce76b32814958a98e372006c8fb76ba820211be66", 210 | "sha256:1a836406405730121ae9823e19c6e806c62bbad73f890574fff50efa4122c487" 211 | ], 212 | "version": "==4.0" 213 | }, 214 | "six": { 215 | "hashes": [ 216 | "sha256:236bdbdce46e6e6a3d61a337c0f8b763ca1e8717c03b369e87a7ec7ce1319c0a", 217 | "sha256:8f3cd2e254d8f793e7f3d6d9df77b92252b52637291d0f0da013c76ea2724b6c" 218 | ], 219 | "version": "==1.14.0" 220 | }, 221 | "termcolor": { 222 | "hashes": [ 223 | "sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b" 224 | ], 225 | "index": "pypi", 226 | "version": "==1.1.0" 227 | }, 228 | "termgraph": { 229 | "hashes": [ 230 | "sha256:3ea73b7659faec1a420ed3515d0b67e1bd7cb915f36f2016c4e524926d3eb0ae" 231 | ], 232 | "index": "pypi", 233 | "version": "==0.2.1" 234 | }, 235 | "termtables": { 236 | "hashes": [ 237 | "sha256:0fd321f7f478001f824896b4dbc5af0cf296ecf3c4aa450018cdcd31b4880b6e", 238 | "sha256:762ba718cae224917fd8e1eda5cbfb33027ab6c86cfb9dd676e35f0c6516b4bf" 239 | ], 240 | "index": "pypi", 241 | "version": "==0.1.1" 242 | }, 243 | "text-unidecode": { 244 | "hashes": [ 245 | "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8", 246 | "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93" 247 | ], 248 | "version": "==1.3" 249 | }, 250 | "uritemplate": { 251 | "hashes": [ 252 | "sha256:07620c3f3f8eed1f12600845892b0e036a2420acf513c53f7de0abd911a5894f", 253 | "sha256:5af8ad10cec94f215e3f48112de2022e1d5a37ed427fbd88652fa908f2ab7cae" 254 | ], 255 | "version": "==3.0.1" 256 | }, 257 | "urllib3": { 258 | "hashes": [ 259 | "sha256:3018294ebefce6572a474f0604c2021e33b3fd8006ecd11d62107a5d2a963527", 260 | "sha256:88206b0eb87e6d677d424843ac5209e3fb9d0190d0ee169599165ec25e9d9115" 261 | ], 262 | "version": "==1.25.9" 263 | } 264 | }, 265 | "develop": {} 266 | } 267 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Gmail Analyzer 2 | 3 | ![screenshot](screenshots/tool.png) 4 | 5 | 6 | This tool will analyze your gmail account to show you statics of your emails. e.g. 7 | 8 | - Total number of emails 9 | - First email received 10 | - Top senders 11 | - Distribution of emails by years 12 | 13 | There are many metrics that can be added, feel free to contribute (or open a ticket!). 14 | 15 | More information in [this blog post](https://mhasbini.com/blog/introducing-gmail-analyzer.html). 16 | 17 | 18 | # Installation 19 | 20 | ```shell 21 | $ git clone https://github.com/0xbsec/gmail_analyzer.git 22 | $ cd gmail_analyzer 23 | $ pipenv install 24 | $ python analyzer.py --help 25 | ``` 26 | 27 | # Usage 28 | 29 | ``` 30 | $ python analyzer.py --help 31 | usage: analyzer.py [-h] [--top TOP] [--user USER] [--verbose] [--version] 32 | 33 | Simple Gmail Analyzer 34 | 35 | optional arguments: 36 | -h, --help show this help message and exit 37 | --top TOP Number of results to show 38 | --user USER User ID to fetch data for 39 | --verbose Verbose output, helpful for debugging 40 | --version Display version and exit 41 | ``` 42 | -------------------------------------------------------------------------------- /analyzer.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import colorama 4 | 5 | from src.metrics import Metrics 6 | 7 | VERSION = "0.0.1" 8 | 9 | 10 | def init_args(): 11 | """Parse and return the arguments.""" 12 | 13 | parser = argparse.ArgumentParser(description="Simple Gmail Analyzer") 14 | parser.add_argument("--top", type=int, default=10, help="Number of results to show") 15 | parser.add_argument( 16 | "--user", type=str, default="me", help="User ID to fetch data for" 17 | ) 18 | parser.add_argument( 19 | "--verbose", action="store_true", help="Verbose output, helpful for debugging" 20 | ) 21 | parser.add_argument( 22 | "--version", action="store_true", help="Display version and exit" 23 | ) 24 | 25 | args = vars(parser.parse_args()) 26 | 27 | return args 28 | 29 | 30 | if __name__ == "__main__": 31 | colorama.init() 32 | 33 | args = init_args() 34 | 35 | if args["version"]: 36 | print("gmail analyzer v{}".format(VERSION)) 37 | sys.exit() 38 | 39 | Metrics(args).start() 40 | -------------------------------------------------------------------------------- /screenshots/tool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mhasbini/gmail_analyzer/ef8cb00a70268c85495b4180cca85ec7130af6f4/screenshots/tool.png -------------------------------------------------------------------------------- /src/helpers.py: -------------------------------------------------------------------------------- 1 | import re 2 | from datetime import datetime 3 | from termcolor import colored 4 | 5 | 6 | def remove_dup_timezone(date_str): 7 | # Convert 'Tue, 24 Dec 2019 08:25:25 +0000 (UTC)' to 'Tue, 24 Dec 2019 08:25:25 +0000' 8 | _updated_date = re.sub(r"\s+\(.{1,20}\)$", "", date_str) 9 | # Convert 'Tue, 24 Dec 2019 08:25:25 +0000' to '24 Dec 2019 08:25:25 +0000' 10 | _updated_date = re.sub(r"^.{1,4},\s+", "", _updated_date) 11 | 12 | return _updated_date 13 | 14 | 15 | def convert_date(date_str): 16 | # Dates comes multiple formats, this function tries to guess it 17 | clean_date = remove_dup_timezone(date_str) 18 | 19 | _val = None 20 | 21 | try: 22 | _val = datetime.strptime(clean_date, "%d %b %Y %H:%M:%S %z") 23 | except ValueError: 24 | try: 25 | _val = datetime.strptime(clean_date, "%d %b %Y %H:%M:%S %Z") 26 | except ValueError: 27 | _val = datetime.strptime(clean_date, "%d %b %Y %H:%M:%S") 28 | 29 | return _val 30 | 31 | 32 | def reduce_to_date(date_str): 33 | return convert_date(date_str).strftime("%Y-%m-%d") 34 | 35 | 36 | def reduce_to_datetime(date_str): 37 | return convert_date(date_str).strftime("%Y-%m-%d %H:%M:%S") 38 | 39 | 40 | def reduce_to_time(date_str): 41 | return convert_date(date_str).strftime("%H") 42 | 43 | 44 | def reduce_to_year(date_str): 45 | return int(convert_date(date_str).strftime("%Y")) 46 | 47 | 48 | def chunks(l, n): 49 | for i in range(0, len(l), n): 50 | yield l[i : i + n] 51 | 52 | 53 | loader_icn = colored("*", "green") 54 | h1_icn = colored("#", "red") 55 | h2_icn = colored("##", "red") 56 | -------------------------------------------------------------------------------- /src/metrics.py: -------------------------------------------------------------------------------- 1 | import time 2 | from progress.spinner import Spinner 3 | from ascii_graph import Pyasciigraph 4 | from termgraph.termgraph import chart, calendar_heatmap 5 | import agate 6 | import warnings 7 | import concurrent.futures 8 | from threading import Event 9 | import termtables 10 | 11 | from src import helpers 12 | from src.processor import Processor 13 | 14 | 15 | class Metrics: 16 | def __init__(self, args): 17 | # Ignore warnings about SSL connections 18 | warnings.simplefilter("ignore", ResourceWarning) 19 | 20 | self.processor = Processor() 21 | self.user_id = args["user"] 22 | self.resultsLimit = args["top"] 23 | self.table = None 24 | 25 | def _load_table(self, event): 26 | table = agate.Table.from_object(list(self.processor.messagesQueue)) 27 | 28 | event.set() 29 | 30 | self.table = table 31 | 32 | return 33 | 34 | def _analyze_senders(self, event): 35 | data = ( 36 | self.table.pivot("fields/from") 37 | .where(lambda row: row["fields/from"] is not None) 38 | .order_by("Count", reverse=True) 39 | .limit(self.resultsLimit) 40 | ) 41 | 42 | _values = data.columns.values() 43 | 44 | data_keys = list(_values[0].values()) 45 | data_count = [[i] for i in list(map(int, list(_values[1].values())))] 46 | 47 | event.set() 48 | 49 | print(f"\n\n{helpers.h1_icn} Senders (top {self.resultsLimit})\n") 50 | args = { 51 | "stacked": False, 52 | "width": 55, 53 | "no_labels": False, 54 | "format": "{:<,d}", 55 | "suffix": "", 56 | "vertical": False, 57 | "different_scale": False, 58 | } 59 | 60 | chart(colors=[94], data=data_count, args=args, labels=data_keys) 61 | 62 | def _analyze_count(self, event): 63 | # Average emails per day 64 | total = self.table.aggregate([("total", agate.Count())])["total"] 65 | total_senders = ( 66 | self.table.distinct("fields/from") 67 | .select("fields/from") 68 | .aggregate([("total", agate.Count())])["total"] 69 | ) 70 | 71 | if total == 0: 72 | first_email_date = "" 73 | last_email_date = None 74 | else: 75 | date_data = self.table.where( 76 | lambda row: row["fields/date"] is not None 77 | ).compute( 78 | [ 79 | ( 80 | "reduce_to_datetime", 81 | agate.Formula( 82 | agate.DateTime(datetime_format="%Y-%m-%d %H:%M:%S"), 83 | lambda row: helpers.reduce_to_datetime(row["fields/date"]), 84 | ), 85 | ) 86 | ] 87 | ) 88 | first_email_date = ( 89 | date_data.order_by("reduce_to_datetime") 90 | .limit(1) 91 | .columns["fields/date"] 92 | .values()[0] 93 | ) 94 | last_email_date = ( 95 | date_data.order_by("reduce_to_datetime", reverse=True) 96 | .limit(1) 97 | .columns["fields/date"] 98 | .values()[0] 99 | ) 100 | event.set() 101 | 102 | metrics = [ 103 | ["Total emails", total], 104 | ["Senders", total_senders], 105 | ["First Email Date", first_email_date], 106 | ] 107 | 108 | if last_email_date: 109 | date_delta = helpers.convert_date(last_email_date) - helpers.convert_date( 110 | first_email_date 111 | ) 112 | avg_email_per_day = total / date_delta.days 113 | metrics.append(["Avg. Emails/Day", f"{avg_email_per_day:.2f}"]) 114 | 115 | print(f"\n\n{helpers.h1_icn} Stats\n") 116 | print(termtables.to_string(metrics)) 117 | 118 | def _analyze_date(self, event): 119 | table = self.table.where(lambda row: row["fields/date"] is not None).compute( 120 | [ 121 | ( 122 | "reduce_to_date", 123 | agate.Formula( 124 | agate.Text(), 125 | lambda row: helpers.reduce_to_date(row["fields/date"]), 126 | ), 127 | ), 128 | ( 129 | "reduce_to_year", 130 | agate.Formula( 131 | agate.Number(), 132 | lambda row: helpers.reduce_to_year(row["fields/date"]), 133 | ), 134 | ), 135 | ( 136 | "reduce_to_time", 137 | agate.Formula( 138 | agate.Number(), 139 | lambda row: helpers.reduce_to_time(row["fields/date"]), 140 | ), 141 | ), 142 | ] 143 | ) 144 | 145 | years = table.distinct("reduce_to_year").columns["reduce_to_year"].values() 146 | 147 | _data = {} 148 | 149 | for year in years: 150 | _data[year] = ( 151 | table.where(lambda row: row["reduce_to_year"] == year) 152 | .select("reduce_to_date") 153 | .pivot("reduce_to_date") 154 | .order_by("reduce_to_date") 155 | ) 156 | 157 | event.set() 158 | 159 | print(f"\n\n{helpers.h1_icn} Date\n") 160 | 161 | for year in years: 162 | data_keys = list(_data[year].columns["reduce_to_date"].values()) 163 | _counts = list(map(int, list(_data[year].columns["Count"].values()))) 164 | _sum = sum(_counts) 165 | data_count = [[i] for i in _counts] 166 | 167 | args = {"color": False, "custom_tick": False, "start_dt": f"{year}-01-01"} 168 | 169 | print(f"\n{helpers.h2_icn} Year {year} ({_sum:,} emails)\n") 170 | calendar_heatmap(data=data_count, args=args, labels=data_keys) 171 | 172 | def analyse(self): 173 | """ 174 | read from the messages queue, and generate: 175 | 1. Counter for From field 176 | 2. Counter for Time field (by hour) 177 | """ 178 | 179 | # {'id': '16f39fe119ee8427', 'labels': ['UNREAD', 'CATEGORY_UPDATES', 'INBOX'], 'fields': {'from': 'Coursera ', 'date': 'Tue, 24 Dec 2019 22:13:09 +0000'}} 180 | 181 | with concurrent.futures.ThreadPoolExecutor() as executor: 182 | progress = Spinner(f"{helpers.loader_icn} Loading messages ") 183 | 184 | event = Event() 185 | 186 | future = executor.submit(self._load_table, event) 187 | 188 | while not event.isSet() and future.running(): 189 | progress.next() 190 | time.sleep(0.1) 191 | 192 | progress.finish() 193 | 194 | progress = Spinner(f"{helpers.loader_icn} Analysing count ") 195 | 196 | event = Event() 197 | 198 | future = executor.submit(self._analyze_count, event) 199 | 200 | while not event.isSet() and future.running(): 201 | progress.next() 202 | time.sleep(0.1) 203 | 204 | progress.finish() 205 | 206 | progress = Spinner(f"{helpers.loader_icn} Analysing senders ") 207 | 208 | event = Event() 209 | 210 | future = executor.submit(self._analyze_senders, event) 211 | 212 | while not event.isSet() and future.running(): 213 | progress.next() 214 | time.sleep(0.1) 215 | 216 | progress.finish() 217 | 218 | progress = Spinner(f"{helpers.loader_icn} Analysing dates ") 219 | 220 | event = Event() 221 | 222 | future = executor.submit(self._analyze_date, event) 223 | 224 | while not event.isSet() and future.running(): 225 | progress.next() 226 | time.sleep(0.1) 227 | 228 | progress.finish() 229 | 230 | def start(self): 231 | messages = self.processor.get_messages() 232 | 233 | self.processor.get_metadata(messages) 234 | 235 | self.analyse() 236 | -------------------------------------------------------------------------------- /src/processor.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import os.path 3 | import pickle 4 | from progress.counter import Counter 5 | from progress.bar import IncrementalBar 6 | 7 | from src import helpers 8 | from src.service import Service 9 | 10 | _progressPadding = 29 11 | 12 | 13 | class Processor: 14 | # Talk to google api, fetch results and decorate them 15 | def __init__(self): 16 | self.service = Service().instance() 17 | self.user_id = "me" 18 | self.messagesQueue = collections.deque() 19 | self.failedMessagesQueue = collections.deque() 20 | 21 | def get_messages(self): 22 | # Get all messages of user 23 | # Output format: 24 | # [{'id': '13c...7', 'threadId': '13c...7'}, ...] 25 | 26 | # if os.path.exists("messages.pickle"): 27 | # with open("messages.pickle", "rb") as token: 28 | # messages = pickle.load(token) 29 | # return messages 30 | 31 | # includeSpamTrash 32 | # labelIds 33 | 34 | response = self.service.users().messages().list(userId=self.user_id).execute() 35 | messages = [] 36 | est_max = response["resultSizeEstimate"] * 5 37 | 38 | progress = Counter( 39 | f"{helpers.loader_icn} Fetching messages page ".ljust(_progressPadding, " ") 40 | ) 41 | 42 | if "messages" in response: 43 | messages.extend(response["messages"]) 44 | 45 | while "nextPageToken" in response: 46 | page_token = response["nextPageToken"] 47 | 48 | response = ( 49 | self.service.users() 50 | .messages() 51 | .list(userId=self.user_id, pageToken=page_token) 52 | .execute() 53 | ) 54 | messages.extend(response["messages"]) 55 | 56 | progress.next() 57 | 58 | progress.finish() 59 | 60 | return messages 61 | 62 | def process_message(self, request_id, response, exception): 63 | if exception is not None: 64 | self.failedMessagesQueue.append(exception.uri) 65 | return 66 | 67 | headers = response["payload"]["headers"] 68 | 69 | _date = next( 70 | (header["value"] for header in headers if header["name"] == "Date"), None 71 | ) 72 | _from = next( 73 | (header["value"] for header in headers if header["name"] == "From"), None 74 | ) 75 | 76 | self.messagesQueue.append( 77 | { 78 | "id": response["id"], 79 | "labels": response["labelIds"], 80 | "fields": {"from": _from, "date": _date}, 81 | } 82 | ) 83 | 84 | def get_metadata(self, messages): 85 | # Get metadata for all messages: 86 | # 1. Create a batch get message request for all messages 87 | # 2. Process the returned output 88 | # 89 | # Output format: 90 | # { 91 | # 'id': '16f....427', 92 | # 'labels': ['UNREAD', 'CATEGORY_UPDATES', 'INBOX'], 93 | # 'fields': [ 94 | # {'name': 'Date', 'value': 'Tue, 24 Dec 2019 22:13:09 +0000'}, 95 | # {'name': 'From', 'value': 'Coursera '} 96 | # ] 97 | # } 98 | 99 | # if os.path.exists("success.pickle"): 100 | # with open("success.pickle", "rb") as token: 101 | # self.messagesQueue = pickle.load(token) 102 | # return 103 | 104 | progress = IncrementalBar( 105 | f"{helpers.loader_icn} Fetching messages meta data ".ljust( 106 | _progressPadding, " " 107 | ), 108 | max=len(messages), 109 | ) 110 | 111 | for messages_batch in helpers.chunks(messages, 250): 112 | # for messages_batch in [messages[0:1000]]: 113 | batch = self.service.new_batch_http_request() 114 | 115 | for message in messages_batch: 116 | msg_id = message["id"] 117 | batch.add( 118 | self.service.users().messages().get(userId=self.user_id, id=msg_id), 119 | callback=self.process_message, 120 | ) 121 | 122 | batch.execute() 123 | progress.next(len(messages_batch)) 124 | 125 | progress.finish() 126 | -------------------------------------------------------------------------------- /src/service.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import os.path 3 | from googleapiclient.discovery import build 4 | from google_auth_oauthlib.flow import InstalledAppFlow 5 | from google.auth.transport.requests import Request 6 | 7 | 8 | class Service: 9 | def __init__(self): 10 | self.scopes = ["https://www.googleapis.com/auth/gmail.readonly"] 11 | 12 | def instance(self): 13 | service = build("gmail", "v1", credentials=self._get_creds()) 14 | 15 | return service 16 | 17 | def _get_creds(self): 18 | creds = None 19 | 20 | # The file token.pickle stores the user's access and refresh tokens, and is 21 | # created automatically when the authorization flow completes for the first 22 | # time. 23 | if os.path.exists("token.pickle"): 24 | with open("token.pickle", "rb") as token: 25 | creds = pickle.load(token) 26 | 27 | # If there are no (valid) credentials available, let the user log in. 28 | if not creds or not creds.valid: 29 | if creds and creds.expired and creds.refresh_token: 30 | creds.refresh(Request()) 31 | else: 32 | flow = InstalledAppFlow.from_client_secrets_file( 33 | "src/credentials.json", self.scopes 34 | ) 35 | creds = flow.run_local_server(port=0) 36 | 37 | # Save the credentials for the next run 38 | with open("token.pickle", "wb") as token: 39 | pickle.dump(creds, token) 40 | 41 | return creds 42 | --------------------------------------------------------------------------------