├── Base ├── __init__.py ├── activity_store.py ├── config.py ├── models.py ├── period.py ├── sniff_x.py └── stats.py ├── Makefile ├── README.md ├── requirements.txt └── setup.py /Base/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import argparse 6 | import configparser 7 | 8 | from Base.activity_store import ActivityStore 9 | from Base import config as cfg 10 | 11 | 12 | def parse_config(): 13 | conf_parser = argparse.ArgumentParser(description=__doc__, add_help=False, 14 | formatter_class=argparse.RawDescriptionHelpFormatter) 15 | conf_parser.add_argument("-c", "--config", 16 | help="Config file with defaults. Command line parameters will override those given in the config file. The config file must start with a \"[Defaults]\" section, followed by [argument]=[value] on each line.", metavar="FILE") 17 | args, remaining_argv = conf_parser.parse_known_args() 18 | 19 | defaults = {} 20 | if args.config: 21 | if not os.path.exists(args.config): 22 | raise EnvironmentError(f"Config file {args.config} doesn't exist.") 23 | config = configparser.ConfigParser() 24 | config.read([args.config]) 25 | defaults = dict(config.items('Defaults')) 26 | else: 27 | if os.path.exists(os.path.expanduser('~/.Base/Base.conf')): 28 | config = configparser.ConfigParser() 29 | config.read([os.path.expanduser('~/.Base/Base.conf')]) 30 | defaults = dict(config.items('Defaults')) 31 | 32 | parser = argparse.ArgumentParser(description='Monitor your computer activities and store them in a database for later analysis or disaster recovery.', parents=[conf_parser]) 33 | parser.set_defaults(**defaults) 34 | parser.add_argument('-d', '--data-dir', help=f'Data directory for Base, where the database is stored. Remember that Base must have read/write access. Default is {cfg.DATA_DIR}', default=cfg.DATA_DIR) 35 | parser.add_argument('-n', '--no-text', action='store_true', help='Do not store what you type. This will make your database smaller and less sensitive to security breaches. Process name, window titles, window geometry, mouse clicks, number of keys pressed and key timings will still be stored, but not the actual letters. Key timings are stored to enable activity calculation in selfstats.') 36 | parser.add_argument('-r', '--no-repeat', action='store_true', help='Do not store special characters as repeated characters.') 37 | 38 | return parser.parse_args() 39 | 40 | 41 | def main(): 42 | try: 43 | args = vars(parse_config()) 44 | except EnvironmentError as e: 45 | print(str(e)) 46 | sys.exit(1) 47 | 48 | args['data_dir'] = os.path.expanduser(args['data_dir']) 49 | 50 | try: 51 | os.makedirs(args['data_dir']) 52 | except OSError: 53 | pass 54 | 55 | astore = ActivityStore(os.path.join(args['data_dir'], cfg.DBNAME), 56 | store_text=(not args['no_text']), 57 | repeat_char=(not args['no_repeat'])) 58 | try: 59 | astore.run() 60 | except KeyboardInterrupt: 61 | astore.close() 62 | 63 | if __name__ == '__main__': 64 | main() 65 | -------------------------------------------------------------------------------- /Base/activity_store.py: -------------------------------------------------------------------------------- 1 | import time 2 | from datetime import datetime 3 | NOW = datetime.now 4 | 5 | 6 | from Base import sniff_x as sniffer 7 | 8 | from Base import models 9 | from Base.models import Process, Window, Geometry, Click, Keys 10 | 11 | 12 | SKIP_MODIFIERS = {"", "Shift_L", "Control_L", "Super_L", "Alt_L", "Super_R", "Control_R", "Shift_R", "[65027]"} # [65027] is AltGr in X for some ungodly reason. 13 | 14 | SCROLL_BUTTONS = {4, 5, 6, 7} 15 | SCROLL_COOLOFF = 10 # seconds 16 | 17 | 18 | class Display: 19 | def __init__(self): 20 | self.proc_id = None 21 | self.win_id = None 22 | self.geo_id = None 23 | 24 | 25 | class KeyPress: 26 | def __init__(self, key, time, is_repeat): 27 | self.key = key 28 | self.time = time 29 | self.is_repeat = is_repeat 30 | 31 | 32 | class ActivityStore: 33 | def __init__(self, db_name, store_text=True, repeat_char=True): 34 | self.session_maker = models.initialize(db_name) 35 | 36 | self.store_text = store_text 37 | self.repeat_char = repeat_char 38 | self.curtext = "" 39 | 40 | self.key_presses = [] 41 | self.mouse_path = [] 42 | 43 | self.current_window = Display() 44 | 45 | self.last_scroll = {button: 0 for button in SCROLL_BUTTONS} 46 | 47 | self.last_key_time = time.time() 48 | self.last_commit = time.time() 49 | 50 | self.started = NOW() 51 | self.last_screen_change = None 52 | 53 | def trycommit(self): 54 | self.last_commit = time.time() 55 | for _ in range(1000): 56 | try: 57 | self.session.commit() 58 | break 59 | except: 60 | self.session.rollback() 61 | 62 | def run(self): 63 | self.session = self.session_maker() 64 | 65 | self.sniffer = sniffer.Sniffer() 66 | self.sniffer.screen_hook = self.got_screen_change 67 | self.sniffer.key_hook = self.got_key 68 | self.sniffer.mouse_button_hook = self.got_mouse_click 69 | self.sniffer.mouse_move_hook = self.got_mouse_move 70 | 71 | self.sniffer.run() 72 | 73 | def got_screen_change(self, process_name, window_name, win_x, win_y, win_width, win_height): 74 | """Receives a screen change and stores any changes. 75 | If the process or window has changed it will also store any queued pressed keys. 76 | Keyword arguments: 77 | process_name -- the name of the process running the current window 78 | window_name -- the name of the window 79 | win_x -- the x position of the window 80 | win_y -- the y position of the window 81 | win_width -- the width of the window 82 | win_height -- the height of the window""" 83 | 84 | # skip the event if same arguments as last time are passed 85 | args = [process_name, window_name, win_x, win_y, win_width, win_height] 86 | if self.last_screen_change == args: 87 | return 88 | 89 | self.last_screen_change = args 90 | 91 | cur_process = self.session.query( 92 | Process 93 | ).filter_by( 94 | name=process_name 95 | ).scalar() 96 | if not cur_process: 97 | cur_process = Process(process_name) 98 | self.session.add(cur_process) 99 | 100 | cur_geometry = self.session.query( 101 | Geometry 102 | ).filter_by( 103 | xpos=win_x, 104 | ypos=win_y, 105 | width=win_width, 106 | height=win_height 107 | ).scalar() 108 | if not cur_geometry: 109 | cur_geometry = Geometry(win_x, win_y, win_width, win_height) 110 | self.session.add(cur_geometry) 111 | 112 | cur_window = self.session.query(Window).filter_by(title=window_name, 113 | process_id=cur_process.id).scalar() 114 | if not cur_window: 115 | cur_window = Window(window_name, cur_process.id) 116 | self.session.add(cur_window) 117 | 118 | if not (self.current_window.proc_id == cur_process.id 119 | and self.current_window.win_id == cur_window.id): 120 | self.trycommit() 121 | self.store_keys() # happens before as these keypresses belong to the previous window 122 | self.current_window.proc_id = cur_process.id 123 | self.current_window.win_id = cur_window.id 124 | self.current_window.geo_id = cur_geometry.id 125 | 126 | def filter_many(self): 127 | specials_in_row = 0 128 | lastpress = None 129 | newpresses = [] 130 | for press in self.key_presses: 131 | key = press.key 132 | if specials_in_row and key != lastpress.key: 133 | if specials_in_row > 1: 134 | lastpress.key = f'{lastpress.key[:-2]}]x{specials_in_row}>' 135 | 136 | newpresses.append(lastpress) 137 | specials_in_row = 0 138 | 139 | if len(key) > 1: 140 | specials_in_row += 1 141 | lastpress = press 142 | else: 143 | newpresses.append(press) 144 | 145 | if specials_in_row: 146 | if specials_in_row > 1: 147 | lastpress.key = f'{lastpress.key[:-2]}]x{specials_in_row}>' 148 | newpresses.append(lastpress) 149 | 150 | self.key_presses = newpresses 151 | 152 | def store_keys(self): 153 | """ Stores the current queued key-presses """ 154 | if self.repeat_char: 155 | self.filter_many() 156 | 157 | if self.key_presses: 158 | keys = [press.key for press in self.key_presses] 159 | timings = [press.time for press in self.key_presses] 160 | nrkeys = sum(0 if press.is_repeat else 1 for press in self.key_presses) 161 | 162 | curtext = "" 163 | if not self.store_text: 164 | keys = [] 165 | else: 166 | curtext = ''.join(keys) 167 | 168 | self.session.add(Keys(curtext, 169 | keys, 170 | timings, 171 | nrkeys, 172 | self.started, 173 | self.current_window.proc_id, 174 | self.current_window.win_id, 175 | self.current_window.geo_id)) 176 | 177 | self.trycommit() 178 | 179 | self.started = NOW() 180 | self.key_presses = [] 181 | self.last_key_time = time.time() 182 | 183 | def got_key(self, keycode, state, string, is_repeat): 184 | """ Receives key-presses and queues them for storage. 185 | keycode is the code sent by the keyboard to represent the pressed key 186 | state is the list of modifier keys pressed, each modifier key should be represented 187 | with capital letters and optionally followed by an underscore and location 188 | specifier, i.e: SHIFT or SHIFT_L/SHIFT_R, ALT, CTRL 189 | string is the string representation of the key press 190 | repeat is True if the current key is a repeat sent by the keyboard """ 191 | now = time.time() 192 | 193 | if string in SKIP_MODIFIERS: 194 | return 195 | 196 | if len(state) > 1 or (len(state) == 1 and state[0] != "Shift"): 197 | string = f'<[{" ".join(state)}: {string}]>' 198 | elif len(string) > 1: 199 | string = f'<[{string}]>' 200 | 201 | self.key_presses.append(KeyPress(string, now - self.last_key_time, is_repeat)) 202 | self.last_key_time = now 203 | 204 | def store_click(self, button, x, y): 205 | """ Stores incoming mouse-clicks """ 206 | self.session.add(Click(button, 207 | True, 208 | x, y, 209 | len(self.mouse_path), 210 | self.current_window.proc_id, 211 | self.current_window.win_id, 212 | self.current_window.geo_id)) 213 | self.mouse_path = [] 214 | self.trycommit() 215 | 216 | def got_mouse_click(self, button, x, y): 217 | """ Receives mouse clicks and sends them for storage. 218 | Mouse buttons: left: 1, middle: 2, right: 3, scroll up: 4, down:5, left:6, right:7 219 | x,y are the coordinates of the keypress 220 | press is True if it pressed down, False if released""" 221 | if button in [4, 5, 6, 7]: 222 | if time.time() - self.last_scroll[button] < SCROLL_COOLOFF: 223 | return 224 | self.last_scroll[button] = time.time() 225 | 226 | self.store_click(button, x, y) 227 | 228 | def got_mouse_move(self, x, y): 229 | """ Queues mouse movements. 230 | x,y are the new coorinates on moving the mouse""" 231 | self.mouse_path.append([x, y]) 232 | 233 | def close(self): 234 | """ stops the sniffer and stores the latest keys. To be used on shutdown of program""" 235 | self.sniffer.cancel() 236 | self.store_keys() 237 | 238 | 239 | -------------------------------------------------------------------------------- /Base/config.py: -------------------------------------------------------------------------------- 1 | DATA_DIR = '~/source' 2 | DBNAME = 'source.sqlite' 3 | LOCK_FILE = 'selfspy.pid' 4 | LOCK = None 5 | -------------------------------------------------------------------------------- /Base/models.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import duckdb 3 | import sqlite3 4 | import os 5 | from Base.period import Period 6 | 7 | def initialize(fname): 8 | os.makedirs('data', exist_ok=True) 9 | sqlite_file = os.path.join('data', fname) 10 | con = sqlite3.connect(sqlite_file) 11 | con.execute(""" 12 | CREATE TABLE IF NOT EXISTS process ( 13 | id INTEGER PRIMARY KEY, 14 | name VARCHAR UNIQUE, 15 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 16 | ); 17 | 18 | CREATE TABLE IF NOT EXISTS window ( 19 | id INTEGER PRIMARY KEY, 20 | title VARCHAR, 21 | process_id INTEGER NOT NULL, 22 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 23 | FOREIGN KEY (process_id) REFERENCES process(id) 24 | ); 25 | 26 | CREATE TABLE IF NOT EXISTS geometry ( 27 | id INTEGER PRIMARY KEY, 28 | xpos INTEGER NOT NULL, 29 | ypos INTEGER NOT NULL, 30 | width INTEGER NOT NULL, 31 | height INTEGER NOT NULL, 32 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 33 | ); 34 | 35 | CREATE TABLE IF NOT EXISTS click ( 36 | id INTEGER PRIMARY KEY, 37 | button INTEGER NOT NULL, 38 | press BOOLEAN NOT NULL, 39 | x INTEGER NOT NULL, 40 | y INTEGER NOT NULL, 41 | nrmoves INTEGER NOT NULL, 42 | process_id INTEGER NOT NULL, 43 | window_id INTEGER NOT NULL, 44 | geometry_id INTEGER NOT NULL, 45 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 46 | FOREIGN KEY (process_id) REFERENCES process(id), 47 | FOREIGN KEY (window_id) REFERENCES window(id), 48 | FOREIGN KEY (geometry_id) REFERENCES geometry(id) 49 | ); 50 | 51 | CREATE TABLE IF NOT EXISTS keys ( 52 | id INTEGER PRIMARY KEY, 53 | text VARCHAR NOT NULL, 54 | started TIMESTAMP NOT NULL, 55 | process_id INTEGER NOT NULL, 56 | window_id INTEGER NOT NULL, 57 | geometry_id INTEGER NOT NULL, 58 | nrkeys INTEGER, 59 | keys VARCHAR, 60 | timings VARCHAR, 61 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 62 | FOREIGN KEY (process_id) REFERENCES process(id), 63 | FOREIGN KEY (window_id) REFERENCES window(id), 64 | FOREIGN KEY (geometry_id) REFERENCES geometry(id) 65 | ); 66 | 67 | CREATE TABLE IF NOT EXISTS activity ( 68 | id INTEGER PRIMARY KEY, 69 | process_id INTEGER NOT NULL, 70 | window_id INTEGER NOT NULL, 71 | start_time TIMESTAMP NOT NULL, 72 | end_time TIMESTAMP NOT NULL, 73 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 74 | FOREIGN KEY (process_id) REFERENCES process(id), 75 | FOREIGN KEY (window_id) REFERENCES window(id) 76 | ); 77 | 78 | CREATE TABLE IF NOT EXISTS screenshot ( 79 | id INTEGER PRIMARY KEY, 80 | process_id INTEGER NOT NULL, 81 | window_id INTEGER NOT NULL, 82 | geometry_id INTEGER NOT NULL, 83 | image BLOB NOT NULL, 84 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 85 | FOREIGN KEY (process_id) REFERENCES process(id), 86 | FOREIGN KEY (window_id) REFERENCES window(id), 87 | FOREIGN KEY (geometry_id) REFERENCES geometry(id) 88 | ); 89 | 90 | CREATE INDEX IF NOT EXISTS idx_click_created_at ON click (created_at); 91 | CREATE INDEX IF NOT EXISTS idx_keys_created_at ON keys (created_at); 92 | CREATE INDEX IF NOT EXISTS idx_activity_start_time ON activity (start_time); 93 | CREATE INDEX IF NOT EXISTS idx_activity_end_time ON activity (end_time); 94 | CREATE INDEX IF NOT EXISTS idx_screenshot_created_at ON screenshot (created_at); 95 | """) 96 | return con 97 | 98 | def export_to_parquet(sqlite_file): 99 | parquet_file = sqlite_file.replace('.db', '_' + datetime.datetime.now().strftime('%m-%d-%y') + '.parquet') 100 | duckdb_con = duckdb.connect() 101 | duckdb_con.execute(f"INSTALL sqlite;") 102 | duckdb_con.execute(f"LOAD sqlite;") 103 | duckdb_con.execute(f"ATTACH DATABASE '{sqlite_file}' AS sqlite;") 104 | for table_name in ['process', 'window', 'geometry', 'click', 'keys', 'activity', 'screenshot']: 105 | duckdb_con.execute(f""" 106 | COPY (SELECT * FROM sqlite.{table_name}) TO '{parquet_file}_{table_name}.parquet' (FORMAT 'parquet'); 107 | """) 108 | 109 | class Process: 110 | def __init__(self, name): 111 | self.name = name 112 | 113 | def __repr__(self): 114 | return f"" 115 | 116 | class Window: 117 | def __init__(self, title, process_id): 118 | self.title = title 119 | self.process_id = process_id 120 | 121 | def __repr__(self): 122 | return f"" 123 | 124 | class Geometry: 125 | def __init__(self, x, y, width, height): 126 | self.xpos = x 127 | self.ypos = y 128 | self.width = width 129 | self.height = height 130 | 131 | def __repr__(self): 132 | return f"" 133 | 134 | class Click: 135 | def __init__(self, button, press, x, y, nrmoves, process_id, window_id, geometry_id): 136 | self.button = button 137 | self.press = press 138 | self.x = x 139 | self.y = y 140 | self.nrmoves = nrmoves 141 | self.process_id = process_id 142 | self.window_id = window_id 143 | self.geometry_id = geometry_id 144 | 145 | def __repr__(self): 146 | return f"" 147 | 148 | class Keys: 149 | def __init__(self, text, keys, timings, nrkeys, started, process_id, window_id, geometry_id): 150 | self.text = text 151 | self.keys = keys 152 | self.timings = timings 153 | self.nrkeys = nrkeys 154 | self.started = started 155 | self.process_id = process_id 156 | self.window_id = window_id 157 | self.geometry_id = geometry_id 158 | 159 | def __repr__(self): 160 | return f"" 161 | 162 | class Activity: 163 | def __init__(self, process_id, window_id, start_time, end_time): 164 | self.process_id = process_id 165 | self.window_id = window_id 166 | self.start_time = start_time 167 | self.end_time = end_time 168 | 169 | def duration(self): 170 | return self.end_time - self.start_time 171 | 172 | @staticmethod 173 | def get_for_process(process_id, start_time, end_time, sqlite_file): 174 | duckdb_con = duckdb.connect() 175 | duckdb_con.execute("INSTALL sqlite;") 176 | duckdb_con.execute("LOAD sqlite;") 177 | duckdb_con.execute(f"ATTACH DATABASE '{sqlite_file}' AS sqlite;") 178 | 179 | parquet_files = [f for f in os.listdir('data') if f.endswith('.parquet')] 180 | for parquet_file in parquet_files: 181 | duckdb_con.execute(f""" 182 | CREATE VIEW IF NOT EXISTS {parquet_file} AS 183 | SELECT * FROM parquet_scan('data/{parquet_file}') 184 | """) 185 | 186 | periods = Period(datetime.timedelta(seconds=5), end_time) 187 | rows = duckdb_con.execute(""" 188 | SELECT process_id, window_id, start_time, end_time 189 | FROM sqlite.activity 190 | UNION ALL BY NAME 191 | SELECT process_id, window_id, start_time, end_time 192 | FROM activity 193 | WHERE process_id = ? AND start_time >= ? AND start_time <= ? 194 | ORDER BY start_time 195 | """, [process_id, start_time, end_time]) 196 | periods.extend(row[2] for row in rows.fetchall()) 197 | return periods.times 198 | 199 | def __repr__(self): 200 | return f"" 201 | 202 | class Screenshot: 203 | def __init__(self, process_id, window_id, geometry_id, image): 204 | self.process_id = process_id 205 | self.window_id = window_id 206 | self.geometry_id = geometry_id 207 | self.image = image 208 | 209 | def __repr__(self): 210 | return f"" -------------------------------------------------------------------------------- /Base/period.py: -------------------------------------------------------------------------------- 1 | import bisect 2 | import datetime 3 | 4 | 5 | class Period: 6 | def __init__(self, cutoff: datetime.timedelta, maxtime: datetime.datetime): 7 | self.times = [] 8 | self.cutoff = cutoff 9 | self.maxtime = maxtime 10 | 11 | def append(self, time: datetime.datetime): 12 | ltimes = len(self.times) 13 | end = min(time + self.cutoff, self.maxtime) 14 | 15 | def check_in(i): 16 | if self.times[i][0] <= time <= self.times[i][1]: 17 | self.times[i] = (self.times[i][0], max(end, self.times[i][1])) 18 | return True 19 | return False 20 | 21 | def maybe_merge(i): 22 | if ltimes > i + 1: 23 | if self.times[i][1] >= self.times[i + 1][0]: 24 | self.times[i] = (self.times[i][0], self.times[i + 1][1]) 25 | self.times.pop(i + 1) 26 | 27 | if ltimes == 0: 28 | self.times.append((time, end)) 29 | return 30 | 31 | i = bisect.bisect(self.times, (time,)) 32 | if i >= 1 and check_in(i - 1): 33 | maybe_merge(i - 1) 34 | elif i < ltimes and check_in(i): 35 | maybe_merge(i) 36 | else: 37 | self.times.insert(i, (time, end)) 38 | maybe_merge(i) 39 | 40 | def extend(self, times): 41 | for time in times: 42 | self.append(time) 43 | 44 | def calc_total(self) -> datetime.timedelta: 45 | return sum((t2 - t1 for t1, t2 in self.times), start=datetime.timedelta()) 46 | -------------------------------------------------------------------------------- /Base/sniff_x.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import datetime 3 | from PIL import ImageGrab 4 | from Xlib import X, XK, display 5 | from Xlib.ext import record 6 | from Xlib.error import XError 7 | from Xlib.protocol import rq 8 | 9 | from Base.models import Process, Window, Geometry, Click, Keys, Activity, Screenshot 10 | 11 | def state_to_idx(state): # this could be a dict, but I might want to extend it. 12 | if state == 1: 13 | return 1 14 | if state == 128: 15 | return 4 16 | if state == 129: 17 | return 5 18 | return 0 19 | 20 | 21 | class Sniffer: 22 | def __init__(self, db_conn): 23 | self.keysymdict = {} 24 | for name in dir(XK): 25 | if name.startswith("XK_"): 26 | self.keysymdict[getattr(XK, name)] = name[3:] 27 | 28 | self.key_hook = lambda x: True 29 | self.mouse_button_hook = lambda x: True 30 | self.mouse_move_hook = lambda x: True 31 | self.screen_hook = lambda x: True 32 | 33 | self.contextEventMask = [X.KeyPress, X.MotionNotify] 34 | 35 | self.the_display = display.Display() 36 | self.keymap = self.the_display._keymap_codes 37 | 38 | self.atom_NET_WM_NAME = self.the_display.intern_atom('_NET_WM_NAME') 39 | self.atom_UTF8_STRING = self.the_display.intern_atom('UTF8_STRING') 40 | 41 | self.db_conn = db_conn 42 | self.last_screenshot_hash = None 43 | 44 | def run(self): 45 | while True: 46 | event = self.the_display.next_event() 47 | self.process_event(event) 48 | 49 | def process_event(self, event): 50 | cur_class, cur_window, cur_name = self.get_cur_window() 51 | if cur_class: 52 | cur_geo = self.get_geometry(cur_window) 53 | if cur_geo: 54 | self.screen_hook(cur_class, 55 | cur_name, 56 | cur_geo.xpos, 57 | cur_geo.ypos, 58 | cur_geo.width, 59 | cur_geo.height) 60 | 61 | screenshot = ImageGrab.grab() 62 | screenshot_hash = hash(screenshot.tobytes()) 63 | if screenshot_hash != self.last_screenshot_hash: 64 | self.last_screenshot_hash = screenshot_hash 65 | # Save screenshot to database 66 | process_id = self.get_process_id(cur_class) 67 | window_id = self.get_window_id(cur_name, process_id) 68 | geometry_id = self.save_geometry(cur_geo) 69 | self.save_screenshot(process_id, window_id, geometry_id, screenshot) 70 | 71 | if event.type in [X.KeyPress]: 72 | self.key_hook(*self.key_event(event)) 73 | elif event.type in [X.ButtonPress]: 74 | self.mouse_button_hook(*self.button_event(event)) 75 | elif event.type == X.MotionNotify: 76 | self.mouse_move_hook(event.root_x, event.root_y) 77 | elif event.type == X.MappingNotify: 78 | self.the_display.refresh_keyboard_mapping() 79 | newkeymap = self.the_display._keymap_codes 80 | print('Change keymap!', newkeymap == self.keymap) 81 | self.keymap = newkeymap 82 | 83 | def get_key_name(self, keycode, state): 84 | state_idx = state_to_idx(state) 85 | cn = self.keymap[keycode][state_idx] 86 | if cn < 256: 87 | return chr(cn).decode('latin1') 88 | else: 89 | return self.lookup_keysym(cn) 90 | 91 | def key_event(self, event): 92 | flags = event.state 93 | modifiers = [] 94 | if flags & X.ControlMask: 95 | modifiers.append('Ctrl') 96 | if flags & X.Mod1Mask: # Mod1 is the alt key 97 | modifiers.append('Alt') 98 | if flags & X.Mod4Mask: # Mod4 should be super/windows key 99 | modifiers.append('Super') 100 | if flags & X.ShiftMask: 101 | modifiers.append('Shift') 102 | return (event.detail, 103 | modifiers, 104 | self.get_key_name(event.detail, event.state), 105 | event.sequence_number == 1) 106 | 107 | def button_event(self, event): 108 | return event.detail, event.root_x, event.root_y 109 | 110 | def lookup_keysym(self, keysym): 111 | if keysym in self.keysymdict: 112 | return self.keysymdict[keysym] 113 | return "[%d]" % keysym 114 | 115 | def get_wm_name(self, win): 116 | """ 117 | Custom method to query for _NET_WM_NAME first, before falling back to 118 | python-xlib's method, which (currently) only queries WM_NAME with 119 | type=STRING.""" 120 | 121 | 122 | d = win.get_full_property(self.atom_NET_WM_NAME, self.atom_UTF8_STRING) 123 | if d is None or d.format != 8: 124 | # Fallback. 125 | r = win.get_wm_name() 126 | if r: 127 | return r.decode('latin1') # WM_NAME with type=STRING. 128 | else: 129 | try: 130 | return d.value.decode('utf8') 131 | except UnicodeError: 132 | return d.value.encode('utf8').decode('utf8') 133 | 134 | def get_cur_window(self): 135 | i = 0 136 | cur_class = None 137 | cur_window = None 138 | cur_name = None 139 | while i < 10: 140 | try: 141 | cur_window = self.the_display.get_input_focus().focus 142 | cur_class = None 143 | cur_name = None 144 | while cur_class is None: 145 | if type(cur_window) is int: 146 | return None, None, None 147 | 148 | cur_name = self.get_wm_name(cur_window) 149 | cur_class = cur_window.get_wm_class() 150 | 151 | if cur_class: 152 | cur_class = cur_class[1] 153 | if not cur_class: 154 | cur_window = cur_window.query_tree().parent 155 | except XError: 156 | i += 1 157 | continue 158 | break 159 | cur_class = cur_class or '' 160 | cur_name = cur_name or '' 161 | return cur_class.decode('latin1'), cur_window, cur_name 162 | 163 | def get_geometry(self, cur_window): 164 | i = 0 165 | geo = None 166 | while i < 10: 167 | try: 168 | geo = cur_window.get_geometry() 169 | break 170 | except XError: 171 | i += 1 172 | return Geometry(geo.x, geo.y, geo.width, geo.height) if geo else None 173 | 174 | def get_process_id(self, process_name): 175 | process = self.db_conn.execute("SELECT id FROM process WHERE name = ?", (process_name,)).fetchone() 176 | if process: 177 | return process[0] 178 | else: 179 | self.db_conn.execute("INSERT INTO process (name) VALUES (?)", (process_name,)) 180 | self.db_conn.commit() 181 | return self.db_conn.execute("SELECT last_insert_rowid()").fetchone()[0] 182 | 183 | def get_window_id(self, window_title, process_id): 184 | window = self.db_conn.execute("SELECT id FROM window WHERE title = ? AND process_id = ?", 185 | (window_title, process_id)).fetchone() 186 | if window: 187 | return window[0] 188 | else: 189 | self.db_conn.execute("INSERT INTO window (title, process_id) VALUES (?, ?)", 190 | (window_title, process_id)) 191 | self.db_conn.commit() 192 | return self.db_conn.execute("SELECT last_insert_rowid()").fetchone()[0] 193 | 194 | def save_geometry(self, geometry): 195 | self.db_conn.execute(""" 196 | INSERT INTO geometry (xpos, ypos, width, height) 197 | VALUES (?, ?, ?, ?) 198 | """, (geometry.xpos, geometry.ypos, geometry.width, geometry.height)) 199 | self.db_conn.commit() 200 | return self.db_conn.execute("SELECT last_insert_rowid()").fetchone()[0] 201 | 202 | def save_screenshot(self, process_id, window_id, geometry_id, screenshot): 203 | self.db_conn.execute(""" 204 | INSERT INTO screenshot (process_id, window_id, geometry_id, image) 205 | VALUES (?, ?, ?, ?) 206 | """, (process_id, window_id, geometry_id, screenshot.tobytes())) 207 | self.db_conn.commit() -------------------------------------------------------------------------------- /Base/stats.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import sys 4 | import re 5 | import datetime 6 | import time 7 | from collections import Counter 8 | from typing import Optional, Union, List, Tuple 9 | 10 | import argparse 11 | import configparser 12 | 13 | from Base import models 14 | from Base.period import Period 15 | 16 | import codecs 17 | sys.stdout = codecs.getwriter('utf8')(sys.stdout) 18 | 19 | ACTIVE_SECONDS = 180 20 | PERIOD_LOOKUP = {'s': 'seconds', 'm': 'minutes', 'h': 'hours', 'd': 'days', 'w': 'weeks'} 21 | ACTIVITY_ACTIONS = {'active', 'periods', 'pactive', 'tactive', 'ratios'} 22 | SUMMARY_ACTIONS = ACTIVITY_ACTIONS.union({'pkeys', 'tkeys', 'key_freqs', 'clicks', 'ratios'}) 23 | 24 | PROCESS_ACTIONS = {'pkeys', 'pactive'} 25 | WINDOW_ACTIONS = {'tkeys', 'tactive'} 26 | 27 | BUTTON_MAP = [('button1', 'left'), 28 | ('button2', 'middle'), 29 | ('button3', 'right'), 30 | ('button4', 'up'), 31 | ('button5', 'down')] 32 | 33 | 34 | def pretty_seconds(secs: int) -> str: 35 | secs = int(secs) 36 | active = False 37 | outs = '' 38 | days = secs // (3600 * 24) 39 | if days: 40 | active = True 41 | outs += f'{days} days, ' 42 | secs -= days * (3600 * 24) 43 | 44 | hours = secs // 3600 45 | if hours: 46 | active = True 47 | if active: 48 | outs += f'{hours}h ' 49 | secs -= hours * 3600 50 | 51 | minutes = secs // 60 52 | if minutes: 53 | active = True 54 | if active: 55 | outs += f'{minutes}m ' 56 | secs -= minutes * 60 57 | 58 | outs += f'{secs}s' 59 | 60 | return outs 61 | 62 | 63 | def make_time_string(dates: Optional[Union[List[str], str]], clock: Optional[str]) -> Tuple[str, datetime.datetime]: 64 | now = datetime.datetime.now() 65 | now2 = datetime.datetime.now() 66 | 67 | if dates is None: 68 | dates = [] 69 | 70 | if isinstance(dates, list) and len(dates) > 0: 71 | if isinstance(dates[0], str): 72 | datesstr = " ".join(dates) 73 | else: 74 | print(f'{dates[0]} is of incompatible type list of {type(dates[0])}.') 75 | elif isinstance(dates, str): 76 | datesstr = dates 77 | else: 78 | datesstr = now.strftime('%Y %m %d') 79 | dates = datesstr.split() # any whitespace 80 | 81 | if len(dates) > 3: 82 | print('Max three arguments to date', dates) 83 | sys.exit(1) 84 | 85 | try: 86 | dates = [int(d) for d in dates] 87 | if len(dates) == 3: 88 | now = now.replace(year=dates[0]) 89 | if len(dates) >= 2: 90 | now = now.replace(month=dates[-2]) 91 | if len(dates) >= 1: 92 | now = now.replace(day=dates[-1]) 93 | 94 | if len(dates) == 2: 95 | if now > now2: 96 | now = now.replace(year=now.year - 1) 97 | 98 | if len(dates) == 1: 99 | if now > now2: 100 | m = now.month - 1 101 | if m: 102 | now = now.replace(month=m) 103 | else: 104 | now = now.replace(year=now.year - 1, month=12) 105 | except ValueError: 106 | print('Malformed date', dates) 107 | sys.exit(1) 108 | 109 | if clock: 110 | try: 111 | hour, minute = [int(v) for v in clock.split(':')] 112 | except ValueError: 113 | print('Malformed clock', clock) 114 | sys.exit(1) 115 | 116 | now = now.replace(hour=hour, minute=minute, second=0) 117 | 118 | if now > now2: 119 | now -= datetime.timedelta(days=1) 120 | 121 | return now.strftime('%Y-%m-%d %H:%M'), now 122 | 123 | 124 | def make_period(q, period: Union[List[str], str], who: str, start: Optional[datetime.datetime], prop): 125 | if isinstance(period, list) and len(period) > 0: 126 | if isinstance(period[0], str): 127 | periodstr = "".join(period) 128 | else: 129 | print(f'{who} is of incompatible type list of {type(period[0])}.') 130 | elif isinstance(period, str): 131 | periodstr = period.translate(str.maketrans('', '', " \t")) 132 | else: 133 | print(f'{who} is of incompatible type {type(period)}.') 134 | sys.exit(1) 135 | pmatch = re.match(r"(\d+)(["+"".join(PERIOD_LOOKUP.keys())+"]?)", periodstr) 136 | if pmatch is None: 137 | print(f'{who} has an unrecognizable format: {periodstr}') 138 | sys.exit(1) 139 | period = [pmatch.group(1)] + ([pmatch.group(2)] if pmatch.group(2) else []) 140 | 141 | d = {} 142 | val = int(period[0]) 143 | if len(period) == 1: 144 | d['hours'] = val 145 | else: 146 | if period[1] not in PERIOD_LOOKUP: 147 | print(f'--limit unit "{period[1]}" not one of {list(PERIOD_LOOKUP.keys())}') 148 | sys.exit(1) 149 | d[PERIOD_LOOKUP[period[1]]] = val 150 | 151 | if start: 152 | return q.filter(prop <= start + datetime.timedelta(**d)) 153 | else: 154 | start = datetime.datetime.now() - datetime.timedelta(**d) 155 | return q.filter(prop >= start), start 156 | 157 | 158 | def create_times(row) -> List[float]: 159 | current_time = time.mktime(row.created_at.timetuple()) 160 | abs_times = [current_time] 161 | for t in row.load_timings(): 162 | current_time -= t 163 | abs_times.append(current_time) 164 | abs_times.reverse() 165 | return abs_times 166 | 167 | 168 | class Selfstats: 169 | def __init__(self, db_name: str, args: argparse.Namespace): 170 | self.args = args 171 | self.conn = models.initialize(db_name) 172 | self.inmouse = False 173 | 174 | self.check_needs() 175 | 176 | def do(self): 177 | if self.need_summary: 178 | self.calc_summary() 179 | self.show_summary() 180 | else: 181 | self.show_rows() 182 | 183 | def check_needs(self): 184 | self.need_text = False 185 | self.need_activity = False 186 | self.need_timings = False 187 | self.need_keys = False 188 | self.need_humanreadable = False 189 | self.need_summary = False 190 | self.need_process = any(self.args[k] for k in PROCESS_ACTIONS) 191 | self.need_window = any(self.args[k] for k in WINDOW_ACTIONS) 192 | 193 | if self.args['body'] is not None: 194 | self.need_text = True 195 | if self.args['showtext']: 196 | self.need_text = True 197 | cutoff = [self.args[k] for k in ACTIVITY_ACTIONS if self.args[k]] 198 | if cutoff: 199 | if any(c != cutoff[0] for c in cutoff): 200 | print('You must give the same time argument to the different parameters in the --active family, when you use several in the same query.') 201 | sys.exit(1) 202 | self.need_activity = cutoff[0] 203 | self.need_timings = True 204 | if self.args['key_freqs']: 205 | self.need_keys = True 206 | if self.args['human_readable']: 207 | self.need_humanreadable = True 208 | 209 | if any(self.args[k] for k in SUMMARY_ACTIONS): 210 | self.need_summary = True 211 | 212 | def maybe_reg_filter(self, q, name: str, names: str, table, source_prop: str, target_prop): 213 | if self.args[name] is not None: 214 | ids = [] 215 | try: 216 | reg = re.compile(self.args[name], re.I) 217 | except re.error as e: 218 | print(f'Error in regular expression {str(e)}') 219 | sys.exit(1) 220 | 221 | for x in self.conn.execute(f"SELECT * FROM {table}").fetchall(): 222 | if reg.search(getattr(x, source_prop)): 223 | ids.append(x.id) 224 | if not self.inmouse: 225 | print(f'{len(ids)} {names} matched') 226 | if ids: 227 | q = q.filter(target_prop.in_(ids)) 228 | else: 229 | return q, False 230 | return q, True 231 | 232 | def filter_prop(self, prop, startprop): 233 | q = self.conn.table(prop).order_by(prop.id) 234 | 235 | if self.args['date'] or self.args['clock']: 236 | s, start = make_time_string(self.args['date'], self.args['clock']) 237 | q = q.filter(prop.created_at >= s) 238 | if self.args['limit'] is not None: 239 | q = make_period(q, self.args['limit'], '--limit', start, startprop) 240 | elif self.args['id'] is not None: 241 | q = q.filter(prop.id >= self.args['id']) 242 | if self.args['limit'] is not None: 243 | q = q.filter(prop.id < self.args['id'] + int(self.args['limit'][0])) 244 | elif self.args['back'] is not None: 245 | q, start = make_period(q, self.args['back'], '--back', None, startprop) 246 | if self.args['limit'] is not None: 247 | q = make_period(q, self.args['limit'], '--limit', start, startprop) 248 | 249 | q, found = self.maybe_reg_filter(q, 'process', 'process(es)', 'process', 'name', prop.process_id) 250 | if not found: 251 | return None 252 | 253 | q, found = self.maybe_reg_filter(q, 'title', 'title(s)', 'window', 'title', prop.window_id) 254 | if not found: 255 | return None 256 | 257 | return q 258 | 259 | def filter_keys(self): 260 | q = self.filter_prop('keys', 'started') 261 | if q is None: 262 | return 263 | 264 | if self.args['min_keys'] is not None: 265 | q = q.filter(models.Keys.nrkeys >= self.args['min_keys']) 266 | 267 | if self.args['body']: 268 | try: 269 | bodrex = re.compile(self.args['body'], re.I) 270 | except re.error as e: 271 | print(f'Error in regular expression {str(e)}') 272 | sys.exit(1) 273 | for x in q.execute().fetchall(): 274 | body = x.decrypt_text() 275 | if bodrex.search(body): 276 | yield x 277 | else: 278 | for x in q.execute(): 279 | yield x 280 | 281 | def filter_clicks(self): 282 | self.inmouse = True 283 | q = self.filter_prop('click', 'created_at') 284 | if q is None: 285 | return 286 | 287 | for x in q.execute(): 288 | yield x 289 | 290 | def show_rows(self): 291 | fkeys = self.filter_keys() 292 | rows = 0 293 | print(' ', 294 | '' if self.args['showtext'] else '') 295 | 296 | for row in fkeys: 297 | rows += 1 298 | print(f"{row.id} {row.started} {pretty_seconds((row.created_at - row.started).total_seconds())} " 299 | f"{row.process.name} \"{row.window.title}\" {row.nrkeys}", 300 | f"{row.decrypt_text().decode('utf8')}" if self.args['showtext'] else '') 301 | print(f'{rows} rows') 302 | 303 | def calc_summary(self): 304 | def updict(d1, d2, activity_times, sub=None): 305 | if sub is not None: 306 | if sub not in d1: 307 | d1[sub] = {} 308 | d1 = d1[sub] 309 | 310 | for key, val in d2.items(): 311 | d1[key] = d1.get(key, 0) + val 312 | 313 | if self.need_activity: 314 | if 'activity' not in d1: 315 | d1['activity'] = Period(self.need_activity, time.time()) 316 | d1['activity'].extend(activity_times) 317 | 318 | sumd = {} 319 | processes = {} 320 | windows = {} 321 | timings = [] 322 | keys = Counter() 323 | for row in self.filter_keys(): 324 | d = {'nr': 1, 325 | 'keystrokes': len(row.load_timings())} 326 | 327 | if self.need_activity: 328 | timings = create_times(row) 329 | if self.need_process: 330 | updict(processes, d, timings, sub=row.process.name) 331 | if self.need_window: 332 | updict(windows, d, timings, sub=row.window.title) 333 | updict(sumd, d, timings) 334 | 335 | if self.args['key_freqs']: 336 | keys.update(row.decrypt_keys()) 337 | 338 | for click in self.filter_clicks(): 339 | d = {'noscroll_clicks': click.button not in [4, 5], 340 | 'clicks': 1, 341 | f'button{click.button}': 1, 342 | 'mousings': click.nrmoves} 343 | if self.need_activity: 344 | timings = [time.mktime(click.created_at.timetuple())] 345 | if self.need_process: 346 | updict(processes, d, timings, sub=click.process.name) 347 | if self.need_window: 348 | updict(windows, d, timings, sub=click.window.title) 349 | updict(sumd, d, timings) 350 | 351 | self.processes = processes 352 | self.windows = windows 353 | self.summary = sumd 354 | if self.args['key_freqs']: 355 | self.summary['key_freqs'] = keys 356 | 357 | def show_summary(self): 358 | print(f"{self.summary.get('keystrokes', 0)} keystrokes in {self.summary.get('nr', 0)} key sequences,", 359 | f"{self.summary.get('clicks', 0)} clicks ({self.summary.get('noscroll_clicks', 0)} excluding scroll),", 360 | f"{self.summary.get('mousings', 0)} mouse movements") 361 | print() 362 | 363 | if self.need_activity: 364 | act = self.summary.get('activity') 365 | 366 | if act: 367 | act = act.calc_total() 368 | else: 369 | act = 0 370 | print(f'Total time active: {pretty_seconds(act)}') 371 | print() 372 | 373 | if self.args['clicks']: 374 | print('Mouse clicks:') 375 | for key, name in BUTTON_MAP: 376 | print(f"{self.summary.get(key, 0)} {name}") 377 | print() 378 | 379 | if self.args['key_freqs']: 380 | print('Key frequencies:') 381 | for key, val in self.summary['key_freqs'].most_common(): 382 | print(f"{key} {val}") 383 | print() 384 | 385 | if self.args['pkeys']: 386 | print('Processes sorted by keystrokes:') 387 | pdata = list(self.processes.items()) 388 | pdata.sort(key=lambda x: x[1].get('keystrokes', 0), reverse=True) 389 | for name, data in pdata: 390 | print(f"{name} {data.get('keystrokes', 0)}") 391 | print() 392 | 393 | if self.args['tkeys']: 394 | print('Window titles sorted by keystrokes:') 395 | wdata = list(self.windows.items()) 396 | wdata.sort(key=lambda x: x[1].get('keystrokes', 0), reverse=True) 397 | for name, data in wdata: 398 | print(f"{name} {data.get('keystrokes', 0)}") 399 | print() 400 | if self.args['pactive']: 401 | print('Processes sorted by activity:') 402 | for p in self.processes.values(): 403 | p['active_time'] = int(p['activity'].calc_total()) 404 | pdata = list(self.processes.items()) 405 | pdata.sort(key=lambda x: x[1]['active_time'], reverse=True) 406 | for name, data in pdata: 407 | print(f"{name}, {pretty_seconds(data['active_time'])}") 408 | print() 409 | 410 | if self.args['tactive']: 411 | print('Window titles sorted by activity:') 412 | for w in self.windows.values(): 413 | w['active_time'] = int(w['activity'].calc_total()) 414 | wdata = list(self.windows.items()) 415 | wdata.sort(key=lambda x: x[1]['active_time'], reverse=True) 416 | for name, data in wdata: 417 | print(f"{name}, {pretty_seconds(data['active_time'])}") 418 | print() 419 | 420 | if self.args['periods']: 421 | if 'activity' in self.summary: 422 | print('Active periods:') 423 | for t1, t2 in self.summary['activity'].times: 424 | d1 = datetime.datetime.fromtimestamp(t1).replace(microsecond=0) 425 | d2 = datetime.datetime.fromtimestamp(t2).replace(microsecond=0) 426 | print(f"{d1.isoformat(' ')} - {str(d2.time()).split('.')[0]}") 427 | else: 428 | print('No active periods.') 429 | print() 430 | 431 | if self.args['ratios']: 432 | def tryget(prop): 433 | return float(max(1, self.summary.get(prop, 1))) 434 | 435 | mousings = tryget('mousings') 436 | clicks = tryget('clicks') 437 | keys = tryget('keystrokes') 438 | print(f"Keys / Clicks: {keys / clicks:.1f}") 439 | print(f"Active seconds / Keys: {act / keys:.1f}") 440 | print() 441 | print(f"Mouse movements / Keys: {mousings / keys:.1f}") 442 | print(f"Mouse movements / Clicks: {mousings / clicks:.1f}") 443 | print() 444 | 445 | def parse_config(): 446 | conf_parser = argparse.ArgumentParser(description=__doc__, add_help=False, 447 | formatter_class=argparse.RawDescriptionHelpFormatter) 448 | 449 | conf_parser.add_argument("-c", "--config", 450 | help="""Config file with defaults. Command line parameters will override those given in the config file. Options to Base goes in the "[Defaults]" section, followed by [argument]=[value] on each line. Options specific to selfstats should be in the "[Selfstats]" section, though "data-dir" is still read from "[Defaults]".""", metavar="FILE") 451 | args, remaining_argv = conf_parser.parse_known_args() 452 | 453 | defaults = {} 454 | if args.config: 455 | if not os.path.exists(args.config): 456 | raise EnvironmentError(f"Config file {args.config} doesn't exist.") 457 | config = configparser.ConfigParser() 458 | config.read([args.config]) 459 | defaults = dict(config.items('Defaults') + config.items("Selfstats")) 460 | 461 | parser = argparse.ArgumentParser(description="""Calculate statistics on Base data. Per default it will show non-text information that matches the filter. Adding '-s' means also show text. Adding any of the summary options will show those summaries over the given filter instead of the listing. Multiple summary options can be given to print several summaries over the same filter.""", epilog="""See the README file or http://gurgeh.github.com/Base for examples.""", parents=[conf_parser]) 462 | parser.set_defaults(**defaults) 463 | parser.add_argument('-d', '--data-dir', help='Data directory for Base, where the database is stored. Remember that Base must have read/write access. Default is %s' % cfg.DATA_DIR, default=cfg.DATA_DIR) 464 | 465 | parser.add_argument('-s', '--showtext', action='store_true', help='Also show the text column. This switch is ignored if at least one of the summary options are used.') 466 | 467 | parser.add_argument('-D', '--date', nargs='+', help='Which date to start the listing or summarizing from. If only one argument is given (--date 13) it is interpreted as the closest date in the past on that day. If two arguments are given (--date 03 13) it is interpreted as the closest date in the past on that month and that day, in that order. If three arguments are given (--date 2012 03 13) it is interpreted as YYYY MM DD') 468 | parser.add_argument('-C', '--clock', type=str, help='Time to start the listing or summarizing from. Given in 24 hour format as --clock 13:25. If no --date is given, interpret the time as today if that results in sometimes in the past, otherwise as yesterday.') 469 | 470 | parser.add_argument('-i', '--id', type=int, help='Which row ID to start the listing or summarizing from. If --date and/or --clock is given, this option is ignored.') 471 | 472 | parser.add_argument('-b', '--back', nargs='+', type=str, help='--back [] Start the listing or summary this much back in time. Use this as an alternative to --date, --clock and --id. If any of those are given, this option is ignored. is either "s" (seconds), "m" (minutes), "h" (hours), "d" (days) or "w" (weeks). If no unit is given, it is assumed to be hours.') 473 | 474 | parser.add_argument('-l', '--limit', help='--limit []. If the start is given in --date/--clock, the limit is a time period given by . is either "s" (seconds), "m" (minutes), "h" (hours), "d" (days) or "w" (weeks). If no unit is given, it is assumed to be hours. If the start is given with --id, limit has no unit and means that the maximum row ID is --id + --limit.', nargs='+', type=str) 475 | 476 | parser.add_argument('-m', '--min-keys', type=int, metavar='nr', help='Only allow entries with at least keystrokes') 477 | 478 | parser.add_argument('-T', '--title', type=str, metavar='regexp', help='Only allow entries where a search for this in the window title matches something. All regular expressions are case insensitive.') 479 | parser.add_argument('-P', '--process', type=str, metavar='regexp', help='Only allow entries where a search for this in the process matches something.') 480 | parser.add_argument('-B', '--body', type=str, metavar='regexp', help='Only allow entries where a search for this in the body matches something. Do not use this filter when summarizing ratios or activity, as it has no effect on mouse clicks. Requires password.') 481 | 482 | parser.add_argument('--clicks', action='store_true', help='Summarize number of mouse button clicks for all buttons.') 483 | 484 | parser.add_argument('--key-freqs', action='store_true', help='Summarize a table of absolute and relative number of keystrokes for each used key during the time period. Requires password.') 485 | 486 | parser.add_argument('--human-readable', action='store_true', help='This modifies the --body entry and honors backspace.') 487 | parser.add_argument('--active', type=int, metavar='seconds', nargs='?', const=ACTIVE_SECONDS, help='Summarize total time spent active during the period. The optional argument gives how many seconds after each mouse click (including scroll up or down) or keystroke that you are considered active. Default is %d.' % ACTIVE_SECONDS) 488 | 489 | parser.add_argument('--ratios', type=int, metavar='seconds', nargs='?', const=ACTIVE_SECONDS, help='Summarize the ratio between different metrics in the given period. "Clicks" will not include up or down scrolling. The optional argument is the "seconds" cutoff for calculating active use, like --active.') 490 | 491 | parser.add_argument('--periods', type=int, metavar='seconds', nargs='?', const=ACTIVE_SECONDS, help='List active time periods. Optional argument works same as for --active.') 492 | 493 | parser.add_argument('--pactive', type=int, metavar='seconds', nargs='?', const=ACTIVE_SECONDS, help='List processes, sorted by time spent active in them. Optional argument works same as for --active.') 494 | parser.add_argument('--tactive', type=int, metavar='seconds', nargs='?', const=ACTIVE_SECONDS, help='List window titles, sorted by time spent active in them. Optional argument works same as for --active.') 495 | 496 | parser.add_argument('--pkeys', action='store_true', help='List processes sorted by number of keystrokes.') 497 | parser.add_argument('--tkeys', action='store_true', help='List window titles sorted by number of keystrokes.') 498 | 499 | return parser.parse_args() 500 | 501 | 502 | def main(): 503 | try: 504 | args = vars(parse_config()) 505 | except EnvironmentError as e: 506 | print(str(e)) 507 | sys.exit(1) 508 | 509 | args['data_dir'] = os.path.expanduser(args['data_dir']) 510 | ss = Selfstats(os.path.join(args['data_dir'], models.DBNAME), args) 511 | 512 | ss.do() 513 | 514 | 515 | if __name__ == '__main__': 516 | main() 517 | 518 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | build: 2 | @echo "Nothing to build. Only install. Destination is: " $(DESTDIR) 3 | 4 | install: 5 | mkdir -p $(DESTDIR)/var/lib/Base 6 | install Base/*.py $(DESTDIR)/var/lib/Base 7 | mkdir -p $(DESTDIR)/usr/bin 8 | ln -s $(DESTDIR)/var/lib/Base/__init__.py $(DESTDIR)/usr/bin/Base 9 | ln -s $(DESTDIR)/var/lib/Base/stats.py $(DESTDIR)/usr/bin/Baseview 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### What is this? 2 | Base is a daemon for linux that continuously monitors and stores what you are doing on your computer. This way, pull statistics, visualize how you spend your time, attach a model to be a semantic memory store, and more. It is inspired by the [Quantified Self](http://en.wikipedia.org/wiki/Quantified_Self)-movement and [Stephen Wolfram's personal key logging](http://blog.stephenwolfram.com/2012/03/the-personal-analytics-of-my-life/) asa well as [SelfSpy](https://github.com/selfspy/selfspy). 3 | 4 | See Example Statistics, below, for some of the fabulous things you can do with this data. 5 | ### Installation 6 | 7 | To install Base, follow these steps: 8 | 9 | 1. Clone the Base repository: 10 | ``` 11 | git clone https://github.com/Alignment-Lab-AI/Base.git 12 | ``` 13 | 14 | 2. Navigate to the Base directory: 15 | ``` 16 | cd Base 17 | ``` 18 | 19 | 3. Run the installation command with sudo: 20 | ``` 21 | sudo make install 22 | ``` 23 | 24 | This command will create the necessary directories and symlinks: 25 | - It creates a `/var/lib/Base` directory and installs the Python files from the `Base/` directory into it. 26 | - It creates a `/usr/bin` directory if it doesn't already exist. 27 | - It creates symlinks in `/usr/bin` for the `__init__.py` and `stats.py` files, making them accessible as `Base` and `Baseview` commands respectively. 28 | 29 | Note: The installation destination can be customized by setting the `DESTDIR` variable when running `make install`. For example: 30 | ``` 31 | sudo make install DESTDIR=/path/to/custom/directory 32 | ``` 33 | 34 | 4. After installation, you can run Base using the `Base` command and view statistics using the `Baseview` command. 35 | 36 | That's it! Base should now be installed on your system and ready to use. 37 | 38 | 39 | 40 | ### Running Base 41 | You run Base with `Base`. You should probably start with `Base --help` to get to know the command line arguments. As of this writing, it should look like this: 42 | 43 | ``` 44 | optional arguments: 45 | -h, --help show this help message and exit 46 | -c FILE, --config FILE 47 | Config file with defaults. Command line parameters 48 | will override those given in the config file. The 49 | config file must start with a "[Defaults]" section, 50 | followed by [argument]=[value] on each line. 51 | 52 | -d DATA_DIR, --data-dir DATA_DIR 53 | Data directory for Base, where the database is 54 | stored. Remember that Base must have read/write 55 | access. Default is ~/.Base 56 | -n, --no-text Do not store what you type. This will make your 57 | database smaller and less sensitive to security 58 | breaches. Process name, window titles, window 59 | geometry, mouse clicks, number of keys pressed and key 60 | timings will still be stored, but not the actual 61 | letters. Key timings are stored to enable activity 62 | calculation in selfstats. If this switch is used, 63 | you will never be asked for password. 64 | -r, --no-repeat Do not store special characters as repeated 65 | characters. 66 | ``` 67 | 68 | Everything you do is stored in a Sqlite database in your DATA_DIR. Each day theyre saved to parquet files which are unilaterally queried along with the sqlite database via duckdb they contain a variety of useful columns, like process names and window titles 69 | 70 | Unless you use the --no-text flag, Base will store everything you type in two columns in the database. 71 | 72 | 73 | ### Example Statistics 74 | *"OK, so now all this data will be stored, but what can I use it for?"* 75 | 76 | While you can access the Sqlite/duckdb tables directly or, if you like Python, import `models.py` from the Base directory and use those duckdb tables, the standard way to query your data is intended to be via a language model. 77 | 78 | Here are some model-agnostic use cases: 79 | 80 | *"Damn! The browser just threw away everything I wrote, because I was not logged in."* 81 | "Show me everything I have written the last 30 minutes." 82 | 83 | `selfstats --back 30 m --showtext` 84 | 85 | 86 | 87 | *"Hmm.. what is my password for Hoolaboola.com?"* 88 | "show me everything I have ever written in Chrome, where the window title contained something with "Hoolaboola"." 89 | 90 | `selfstats -T "Hoolaboola" -P Google-chrome --showtext` 91 | 92 | 93 | 94 | *"I need to remember what I worked on a few days ago, for my time report."* 95 | "What buffers did I have open in Emacs on the tenth of this month and one day forward? Sort by how many keystrokes I wrote in each. This only works if I have set Emacs to display the current buffer in the window title. In general, try to set your programs (editors, terminals, web apps, ...) to include information on what you are doing in the window title. This will make it easier to search for later. On a related but opposite note: if you have the option, remove information like "mails unread" or "unread count" (for example in Gmail and Google Reader) from the window titles, to make it easier to group them in --tactive and --tkeys." 96 | 97 | 98 | `selfstats --date 10 --limit 1 d -P emacs --tkeys` 99 | 100 | 101 | *"Also, when and how much have I used my computer this last week?"* 102 | "display my active time periods for the last week. consider it inactive when I have not clicked or used the keyboard in 180 seconds." 103 | 104 | `selfstats -b 1 w --periods 180` 105 | 106 | 107 | *"How effective have I been this week?"* 108 | "show ratios informing me about how much I have written per active second and how much I have clicked vs used the keyboard. cause = a lot of clicking means too much browsing or inefficient use of my tools." 109 | 110 | `selfstats -b 1 w --ratios` 111 | 112 | 113 | 114 | *"I remember that I wrote something to her about the IP address of our printer a few months ago. I can't quite remember if it was a chat, a tweet, a mail, a facebook post, or what.. Should I search them separately? No."* 115 | "Find all texts where I mentioned the word 'printer' in the last 10 weeks" 116 | `selfstats --body printer -s --back 40 w` 117 | 118 | *"What programs do I use the most?"* 119 | "List all programs I have ever used, ordered by time spent active in them" 120 | `selfstats --pactive` 121 | 122 | *"Which questions on the website Stack Overflow did I visit yesterday?"* 123 | "Show me all window titles containing 'Stack Overflow' from the last 32 hours, sorted by active time" 124 | `./selfstats -T "Stack Overflow" -P Google-chrome --back 32 h --tactive` 125 | 126 | *"How much have I browsed today?"* 127 | "List all the different pages I visited in Chrome today, ordered by active time" 128 | `selfstats -P Google-chrome --clock 00:00 --tactive` 129 | 130 | *"Who needs Qwerty? I am going to make an alternative super-programmer-keymap. I wonder what keys I use the most when I code C++?"* 131 | "Show me the most frequently pressed keys in Emacs while editing files with 'cpp' in the name" 132 | `selfstats --key-freq -P Emacs -T cpp` 133 | 134 | *"While we are at it, which cpp files have I edited the most this month?"* 135 | "List the cpp files I edited in Emacs this month, sorted by amount typed" 136 | `selfstats -P Emacs -T cpp --tkeys --date 1` 137 | Selfstats is a swiss army knife of self knowledge. Experiment with it when you have acquired a few days of data. Remember that if you know SQL or SqlAlchemy, it is easy to construct your own queries against the database to get exactly the information you want, make pretty graphs, etc. There are a few stored properties, like coordinates of a mouse click and window geometry, that you can currently only reach through the database. 138 | 139 | ### Selfstats Reference 140 | 141 | The --help is a beast that right now looks something like this: 142 | 143 | ``` 144 | usage: selfstats [-h] [-c FILE] [-d DATA_DIR] [-s] 145 | [-D DATE [DATE ...]] [-C CLOCK] [-i ID] 146 | [-b BACK [BACK ...]] [-l LIMIT [LIMIT ...]] [-m nr] 147 | [-T regexp] [-P regexp] [-B regexp] [--ratios] [--clicks] 148 | [--key-freqs] [--human-readable] [--active [seconds]] [--periods [seconds]] 149 | [--pactive [seconds]] [--tactive [seconds]] [--pkeys] 150 | [--tkeys] 151 | 152 | Calculate statistics on Base data. Per default it will show non-text 153 | information that matches the filter. Adding '-s' means also show text. Adding 154 | any of the summary options will show those summaries over the given filter 155 | instead of the listing. Multiple summary options can be given to print several 156 | summaries over the same filter. If you give arguments that need to access text 157 | / keystrokes, you will be asked for the decryption password. 158 | 159 | optional arguments: 160 | -h, --help show this help message and exit 161 | -c FILE, --config FILE 162 | Config file with defaults. Command line parameters 163 | will override those given in the config file. Options 164 | to Base goes in the "[Defaults]" section, followed 165 | by [argument]=[value] on each line. Options specific 166 | to selfstats should be in the "[Selfstats]" section, 167 | though "data-dir" is still read from "[Defaults]". 168 | -d DATA_DIR, --data-dir DATA_DIR 169 | Data directory for Base, where the database is 170 | stored. Remember that Base must have read/write 171 | access. Default is ~/.Base 172 | -s, --showtext Also show the text column. This switch is ignored if 173 | at least one of the summary options are used. Requires 174 | password. 175 | -D DATE [DATE ...], --date DATE [DATE ...] 176 | Which date to start the listing or summarizing from. 177 | If only one argument is given (--date 13) it is 178 | interpreted as the closest date in the past on that 179 | day. If two arguments are given (--date 03 13) it is 180 | interpreted as the closest date in the past on that 181 | month and that day, in that order. If three arguments 182 | are given (--date 2012 03 13) it is interpreted as 183 | YYYY MM DD 184 | -C CLOCK, --clock CLOCK 185 | Time to start the listing or summarizing from. Given 186 | in 24 hour format as --clock 13:25. If no --date is 187 | given, interpret the time as today if that results in 188 | sometimes in the past, otherwise as yesterday. 189 | -i ID, --id ID Which row ID to start the listing or summarizing from. 190 | If --date and/or --clock is given, this option is 191 | ignored. 192 | -b BACK [BACK ...], --back BACK [BACK ...] 193 | --back [] Start the listing or summary 194 | this much back in time. Use this as an alternative to 195 | --date, --clock and --id. If any of those are given, 196 | this option is ignored. is either "s" 197 | (seconds), "m" (minutes), "h" (hours), "d" (days) or 198 | "w" (weeks). If no unit is given, it is assumed to be 199 | hours. 200 | -l LIMIT [LIMIT ...], --limit LIMIT [LIMIT ...] 201 | --limit []. If the start is given in 202 | --date/--clock, the limit is a time period given by 203 | . is either "s" (seconds), "m" (minutes), 204 | "h" (hours), "d" (days) or "w" (weeks). If no unit is 205 | given, it is assumed to be hours. If the start is 206 | given with --id, limit has no unit and means that the 207 | maximum row ID is --id + --limit. 208 | -m nr, --min-keys nr Only allow entries with at least keystrokes 209 | -T regexp, --title regexp 210 | Only allow entries where a search for this in 211 | the window title matches something. All regular expressions 212 | are case insensitive. 213 | -P regexp, --process regexp 214 | Only allow entries where a search for this in 215 | the process matches something. 216 | -B regexp, --body regexp 217 | Only allow entries where a search for this in 218 | the body matches something. Do not use this filter 219 | when summarizing ratios or activity, as it has no 220 | effect on mouse clicks. Requires password. 221 | --clicks Summarize number of mouse button clicks for all 222 | buttons. 223 | --key-freqs Summarize a table of absolute and relative number of 224 | keystrokes for each used key during the time period. 225 | Requires password. 226 | --human-readable This modifies the --body entry and honors backspace. 227 | --active [seconds] Summarize total time spent active during the period. 228 | The optional argument gives how many seconds after 229 | each mouse click (including scroll up or down) or 230 | keystroke that you are considered active. Default is 231 | 180. 232 | --ratios [seconds] Summarize the ratio between different metrics in the 233 | given period. "Clicks" will not include up or down 234 | scrolling. The optional argument is the "seconds" 235 | cutoff for calculating active use, like --active. 236 | --periods [seconds] List active time periods. Optional argument works same 237 | as for --active. 238 | --pactive [seconds] List processes, sorted by time spent active in them. 239 | Optional argument works same as for --active. 240 | --tactive [seconds] List window titles, sorted by time spent active in 241 | them. Optional argument works same as for --active. 242 | --pkeys List processes sorted by number of keystrokes. 243 | --tkeys List window titles sorted by number of keystrokes. 244 | 245 | ``` 246 | 247 | ### Email 248 | To monitor that Base works as it should and to continuously get feedback on yourself, it is good to regularly mail yourself some statistics. I think the easiest way to automate this is using [sendEmail](http://www.debianadmin.com/how-to-sendemail-from-the-command-line-using-a-gmail-account-and-others.html), which can do neat stuff like send through your Gmail account. 249 | 250 | For example, put something like this in your weekly [cron](http://clickmojo.com/code/cron-tutorial.html) jobs: 251 | `/(PATH_TO_FILE)/selfstats --back 1 w --ratios 900 --periods 900 | /usr/bin/sendEmail -q -u "Weekly selfstats" ` 252 | This will give you some interesting feedback on how much and when you have been active this last week and how much you have written vs moused, etc. 253 | 254 | # next steps 255 | 1. integrate a model api, and ideally a model trained to make duckdb queries intellgently 256 | 2. integrate a part of speech tagging model to categorize on an additional axis based on key terms 257 | 3. develop a ui to chat with the model comfortably and display the activity the user was participating in at any given moment 258 | 4. integrate tooling for a local model to manage and schedule a task list based on the context of the current conversation 259 | 5. integrate an efficient stt model to transcribe all audio and store it as well 260 | 6. integrate an OCR model to capture any text which may get missed, and to parse over the already collected screenshots in current databases that lack the OCR functionality 261 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | python-dateutil>=2.8.2 2 | tabulate>=0.9.0 3 | Pillow>=9.0.0 4 | python-xlib>=0.31 5 | duckdb>=0.6.1 6 | 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | req_file = 'requirements.txt' 4 | 5 | with open(os.path.join(os.path.dirname(__file__), req_file)) as f: 6 | requires = [line.strip() for line in f.readlines()] 7 | 8 | print(f'"{requires}"') 9 | 10 | from setuptools import setup, find_packages 11 | 12 | setup(name="Base", 13 | version='0.1.0', 14 | packages=find_packages(), 15 | author="Alignment Lab AI", 16 | author_email='autometa@alignmentlab.ai', 17 | description='Waste nothing! Store literally everything efficiently, and connect it to a local model!', 18 | install_requires=requires, 19 | entry_points={'console_scripts': ['Base=Base:main', 20 | 'Baseview=Base.stats:main']}) 21 | --------------------------------------------------------------------------------