├── .gitignore ├── README.md ├── config.json ├── google_dfir.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | */.env 7 | .env 8 | */.gitignore 9 | 10 | git_data/** 11 | .DS_Store 12 | 13 | .vscode/** 14 | 15 | #secrets 16 | token.pickle 17 | credentials.json 18 | 19 | GeoLite2-City.mmdb 20 | summary.csv 21 | 22 | config.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Google Workspace DFIR Tool 2 | 3 | ## Configuring the API 4 | **Note** This requires an admin account for the Google Workspace organization. 5 | 1. Log in to [https://console.developers.google.com/cloud-resource-manager](https://console.developers.google.com/cloud-resource-manager) and log in with admin credentials. 6 | 2. Create a new project 7 | 3. Go to [https://console.developers.google.com/apis/dashboard](https://console.developers.google.com/apis/dashboard) and ensure the created project is selected at the top left. 8 | 4. Select `Credentials` on the left and choose `OAuth Client ID`. 9 | 5. Choose `Web application` for the application type 10 | 6. Download the JSON credentials file (rename if desired, to something such as `credentials.json`). 11 | 7. Put the file in the path you designated in the `config.json` file 12 | 13 | ## Program Configuration 14 | - The `config.json` file allows you to define the file path where credential and MaxMind DB files are located 15 | - The `Geolite2-City.mmdb` file can be downloaded from the MaxMind website. 16 | 17 | ## Running the Progam 18 | ``` 19 | python3 google_dfir.py -o 20 | ``` 21 | **Note**: The output option should should provide the full path and file name (XLSX). Ex. `/User/bob/results.xlsx` -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | { 2 | "geolocate_db": "./GeoLite2-City.mmdb", 3 | "creds_path": "./credentials.json" 4 | } 5 | -------------------------------------------------------------------------------- /google_dfir.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pickle 3 | import os.path 4 | from googleapiclient.discovery import build 5 | from google_auth_oauthlib.flow import InstalledAppFlow 6 | from google.auth.transport.requests import Request 7 | import pandas as pd 8 | import geoip2.database 9 | import sys 10 | from openpyxl import load_workbook 11 | import time 12 | import json 13 | import argparse 14 | 15 | class Google(object): 16 | """ 17 | Class for doing bulk of operations related to Google Workspace DFIR activities 18 | """ 19 | 20 | def __init__(self): 21 | self.output = args.output 22 | self.geolocate_db = config['geolocate_db'] 23 | self.creds_path = config['creds_path'] 24 | self.service = self.google_session() 25 | 26 | def google_session(self): 27 | """ 28 | Establish connection to Google Wrospace. 29 | """ 30 | creds = None 31 | SCOPES = ['https://www.googleapis.com/auth/admin.reports.audit.readonly'] 32 | 33 | # The file token.pickle stores the user's access and refresh tokens, and is 34 | # created automatically when the authorization flow completes for the first 35 | # time. 36 | if os.path.exists('token.pickle'): 37 | with open('token.pickle', 'rb') as token: 38 | creds = pickle.load(token) 39 | # If there are no (valid) credentials available, let the user log in. 40 | if not creds or not creds.valid: 41 | if creds and creds.expired and creds.refresh_token: 42 | creds.refresh(Request()) 43 | else: 44 | flow = InstalledAppFlow.from_client_secrets_file( 45 | self.creds_path, SCOPES) 46 | creds = flow.run_local_server(port=0) 47 | # Save the credentials for the next run 48 | with open('token.pickle', 'wb') as token: 49 | pickle.dump(creds, token) 50 | 51 | service = build('admin', 'reports_v1', credentials=creds) 52 | 53 | return service 54 | 55 | def get_login_activity(self): 56 | 57 | # Call the Admin SDK Reports API 58 | results = self.service.activities().list( 59 | userKey='all', applicationName='login').execute() 60 | activities = results.get('items', []) 61 | 62 | # save logs to Pandas data frame and clean up data 63 | df_activities = pd.json_normalize(activities) 64 | df_events = pd.json_normalize(data=activities, record_path=['events']) 65 | df_logs = df_activities.join(df_events) 66 | df_params = df_logs['parameters'].apply(pd.Series) 67 | df_params = df_params.rename(columns = lambda x : 'param_' + str(x)) 68 | df_logs = df_activities.join(df_params) 69 | df_logs = df_logs.drop(columns=['events', 'kind', 'etag', 'id.uniqueQualifier', 'id.customerId', 'actor.profileId']) 70 | df_logs = df_logs.rename(columns={"id.time": "timestamp", "actor.email": "userEmail", "id.applicationName": "applicationName"}) 71 | df_logs['loginCountry'] = df_logs['ipAddress'].map(lambda ipAddress: self.get_geoip(ipAddress)[0]) 72 | df_logs['loginCity'] = df_logs['ipAddress'].map(lambda ipAddress: self.get_geoip(ipAddress)[1]) 73 | df_logs.to_excel(self.output, "Login Activity", index=False) 74 | 75 | def get_drive_activity(self): 76 | 77 | # Call the Admin SDK Reports API 78 | results = self.service.activities().list( 79 | userKey='all', applicationName='drive').execute() 80 | activities = results.get('items', []) 81 | 82 | # save logs to Pandas data frame and clean up data 83 | df_activities = pd.json_normalize(activities) 84 | df_events = pd.json_normalize(data=activities, record_path=['events']) 85 | df_logs = df_activities.join(df_events) 86 | df_params = df_logs['parameters'].apply(pd.Series) 87 | df_params = df_params.rename(columns = lambda x : 'param_' + str(x)) 88 | df_logs = df_activities.join(df_params) 89 | df_logs = df_logs.drop(columns=['events', 'kind', 'etag', 'id.uniqueQualifier', 'actor.profileId', 'id.customerId']) 90 | df_logs = df_logs.rename(columns={"id.time": "timestamp", "actor.email": "userEmail", "id.applicationName": "applicationName"}) 91 | df_logs['loginCountry'] = df_logs['ipAddress'].apply(lambda ipAddress: self.get_geoip(ipAddress)[0] if type(ipAddress) == str else "") 92 | df_logs['loginCity'] = df_logs['ipAddress'].apply(lambda ipAddress: self.get_geoip(ipAddress)[1] if type(ipAddress) == str else "") 93 | 94 | with pd.ExcelWriter(self.output, engine='openpyxl', mode='a') as writer: 95 | writer.book = load_workbook(self.output) 96 | df_logs.to_excel(writer, "Google Drive Activity", index=False) 97 | 98 | def get_admin_activity(self): 99 | 100 | # Call the Admin SDK Reports API 101 | results = self.service.activities().list( 102 | userKey='all', applicationName='admin').execute() 103 | activities = results.get('items', []) 104 | 105 | # save logs to Pandas data frame and clean up data 106 | df_activities = pd.json_normalize(activities) 107 | df_events = pd.json_normalize(data=activities, record_path=['events']) 108 | df_logs = df_activities.join(df_events) 109 | df_params = df_logs['parameters'].apply(pd.Series) 110 | df_params = df_params.rename(columns = lambda x : 'param_' + str(x)) 111 | df_logs = df_activities.join(df_params) 112 | df_logs = df_logs.drop(columns=['events', 'kind', 'etag', 'id.uniqueQualifier', 'actor.profileId', 'id.customerId']) 113 | df_logs = df_logs.rename(columns={"id.time": "timestamp", "actor.email": "userEmail", "id.applicationName": "applicationName"}) 114 | df_logs['loginCountry'] = df_logs['ipAddress'].apply(lambda ipAddress: self.get_geoip(ipAddress)[0] if type(ipAddress) == str else "") 115 | df_logs['loginCity'] = df_logs['ipAddress'].apply(lambda ipAddress: self.get_geoip(ipAddress)[1] if type(ipAddress) == str else "") 116 | 117 | with pd.ExcelWriter(self.output, engine='openpyxl', mode='a') as writer: 118 | writer.book = load_workbook(self.output) 119 | df_logs.to_excel(writer, "Admin Activity", index=False) 120 | 121 | def get_user_activity(self): 122 | 123 | # Call the Admin SDK Reports API 124 | results = self.service.activities().list( 125 | userKey='all', applicationName='user_accounts').execute() 126 | activities = results.get('items', []) 127 | # save logs to Pandas data frame and clean up data 128 | df_activities = pd.json_normalize(activities) 129 | df_events = pd.json_normalize(data=activities, record_path=['events']) 130 | df_logs = df_activities.join(df_events) 131 | df_logs = df_logs.drop(columns=['events', 'kind', 'etag', 'id.uniqueQualifier', 'actor.profileId', 'id.customerId']) 132 | df_logs = df_logs.rename(columns={"id.time": "timestamp", "actor.email": "userEmail", "id.applicationName": "applicationName"}) 133 | df_logs['loginCountry'] = df_logs['ipAddress'].apply(lambda ipAddress: self.get_geoip(ipAddress)[0] if type(ipAddress) == str else "") 134 | df_logs['loginCity'] = df_logs['ipAddress'].apply(lambda ipAddress: self.get_geoip(ipAddress)[1] if type(ipAddress) == str else "") 135 | df_logs = df_logs[["timestamp", "userEmail", "ipAddress", "loginCountry", "loginCity", "applicationName", "actor.callerType", "type", "name"]] 136 | 137 | with pd.ExcelWriter(self.output, engine='openpyxl', mode='a') as writer: 138 | writer.book = load_workbook(self.output) 139 | df_logs.to_excel(writer, "User Activity", index=False) 140 | 141 | def get_geoip(self, ipAddress): 142 | reader = geoip2.database.Reader( 143 | self.geolocate_db) 144 | response = reader.city(ipAddress) 145 | return [response.country.iso_code, response.city.name] 146 | 147 | 148 | def timeline(self): 149 | sheet_to_df_map = pd.read_excel(self.output, sheet_name=None) 150 | timeline = pd.concat(sheet_to_df_map, axis=0, ignore_index=True) 151 | 152 | with pd.ExcelWriter(self.output, engine='openpyxl', mode='a') as writer: 153 | writer.book = load_workbook(self.output) 154 | timeline.to_excel(writer, "All", index=False) 155 | 156 | def parse_args(): 157 | parser = argparse.ArgumentParser( 158 | description="Retrieve and process Google Workspace logs" 159 | ) 160 | 161 | parser.add_argument( 162 | "-o", "--output", 163 | help="Output path" 164 | ) 165 | 166 | args = parser.parse_args() 167 | 168 | return args 169 | 170 | start_time = time.time() 171 | 172 | args = parse_args() 173 | 174 | with open("config.json") as json_data_file: 175 | config = json.load(json_data_file) 176 | 177 | google = Google() 178 | 179 | vars(google) 180 | 181 | google.get_login_activity() 182 | google.get_drive_activity() 183 | google.get_admin_activity() 184 | google.get_user_activity() 185 | google.timeline() 186 | 187 | elapsed = time.time() - start_time 188 | print(f'Total execution time: {elapsed}') -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | xlrd >= 1.0.0 2 | geoip2==4.0.2 3 | google-api-core==1.22.2 4 | google-api-python-client==1.11.0 5 | google-auth==1.21.1 6 | google-auth-httplib2==0.0.4 7 | google-auth-oauthlib==0.4.1 8 | googleapis-common-protos==1.52.0 9 | openpyxl==3.0.5 10 | pandas==1.1.2 --------------------------------------------------------------------------------