├── alfa ├── __init__.py ├── main │ ├── __init__.py │ ├── activity.py │ ├── event.py │ ├── analyser.py │ ├── alfa.py │ ├── kill_chain.py │ └── collector.py ├── utils │ ├── UTILS.md │ ├── path.py │ ├── summary.py │ ├── event_mitre_remap.py │ └── mappings.yml ├── config │ ├── internals.yml │ ├── __internals__.py │ ├── __init__.py │ ├── logo │ ├── config.yml │ └── event_to_mitre.yml ├── __main__.py ├── project_creator │ └── __init__.py └── cmdline.py ├── .gitignore ├── .github └── dependabot.yaml ├── setup.py ├── requirements.txt ├── LICENSE ├── License.md ├── CREDENTIALS.md ├── README.md └── tutorial └── tutorial.md /alfa/__init__.py: -------------------------------------------------------------------------------- 1 | from .main import Alfa 2 | __version__ = '0.1.0' 3 | -------------------------------------------------------------------------------- /alfa/main/__init__.py: -------------------------------------------------------------------------------- 1 | from .alfa import Alfa, Analyser, KillChain 2 | from .collector import Collector -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv/* 2 | .vscode/* 3 | test/* 4 | __pycache__ 5 | *.json 6 | ALFA.egg-info/ 7 | **/config.yml -------------------------------------------------------------------------------- /alfa/utils/UTILS.md: -------------------------------------------------------------------------------- 1 | Scripts that help perform specific functions. Not necessarily part of the audit process. 2 | Use event_mitre_remap.py to add new mappins to config/event_to_mitre.yml -------------------------------------------------------------------------------- /.github/dependabot.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "pip" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | day: "monday" 8 | time: "09:00" -------------------------------------------------------------------------------- /alfa/config/internals.yml: -------------------------------------------------------------------------------- 1 | # Used internally for storing constants 2 | 3 | project: 4 | dirs: 5 | configs: config 6 | data: data 7 | files: 8 | config: config.yml 9 | creds: credentials.json 10 | token: token.json 11 | -------------------------------------------------------------------------------- /alfa/config/__internals__.py: -------------------------------------------------------------------------------- 1 | #!/bin/env/python3 2 | 3 | import yaml 4 | from types import SimpleNamespace 5 | import os.path 6 | from ..utils.path import CONFIG_DIR 7 | 8 | with open(os.path.join(CONFIG_DIR, 'internals.yml')) as f: 9 | internals_dir = yaml.safe_load(f) 10 | 11 | internals = internals_dir 12 | -------------------------------------------------------------------------------- /alfa/config/__init__.py: -------------------------------------------------------------------------------- 1 | from ..utils.path import rel_path, CONFIG_DIR 2 | import yaml, os.path 3 | 4 | relative_config = './config/config.yml' # used when inside of a project directory 5 | if os.path.exists(relative_config): 6 | config = yaml.safe_load( 7 | open(relative_config)) 8 | else: 9 | config = yaml.safe_load( 10 | open(rel_path(CONFIG_DIR,'config.yml')) 11 | ) 12 | -------------------------------------------------------------------------------- /alfa/__main__.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from .cmdline import Parser 3 | from .config import CONFIG_DIR 4 | 5 | pd.set_option('display.max_colwidth', None) 6 | 7 | with open(f'{CONFIG_DIR}/logo') as f: 8 | logo = f.read() 9 | 10 | def main(): 11 | print(logo) 12 | parser = Parser() 13 | args = parser.parser.parse_args() 14 | args.func(args) 15 | pass 16 | 17 | if __name__ == '__main__': 18 | main() 19 | -------------------------------------------------------------------------------- /alfa/utils/path.py: -------------------------------------------------------------------------------- 1 | #!/bin/python3 2 | ''' 3 | helper script to aid in relative imports 4 | ''' 5 | import os.path 6 | 7 | def rel_path(*args: str) -> str: 8 | if len(args) == 1: #typically __file__ 9 | return os.path.dirname(args[0]) 10 | return os.path.realpath( 11 | os.path.join(*args) 12 | ) 13 | 14 | UTILS_DIR = rel_path(__file__) 15 | ROOT_DIR = rel_path(UTILS_DIR,'..') 16 | CONFIG_DIR = rel_path(ROOT_DIR,'config') 17 | DATA_DIR = rel_path(ROOT_DIR,'data') -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python3 2 | VERSION = '0.1.0' 3 | from setuptools import setup 4 | import os.path 5 | mydir = os.path.dirname(__file__) 6 | req_path = os.path.join(mydir,'requirements.txt') 7 | with open(req_path) as f: 8 | requirements = f.readlines() 9 | setup( 10 | name='ALFA', 11 | description='', 12 | author='', 13 | packages=['alfa'], 14 | install_requires=requirements, 15 | entry_points={ 16 | 'console_scripts': [ 17 | 'alfa = alfa.__main__:main' 18 | ] 19 | }, 20 | version=VERSION 21 | ) 22 | -------------------------------------------------------------------------------- /alfa/config/logo: -------------------------------------------------------------------------------- 1 | 2 | _ ______ 3 | /\ | | | ____| /\ 4 | / \ | | | |__ / \ 5 | / /\ \ | | | __| / /\ \ 6 | / ____ \ | |____ | | / ____ \ 7 | /_/ \_\ |______| |_| /_/ \_\ 8 | 9 | 10 | 11 | Google Workspace Audit Log Forensic Analysis 12 | Copyright (c) 2025 Invictus Incident Response 13 | Original authors (Greg Charitonos & @BertJanCyber) maintained by Invictus Incident Response 14 | 15 | -------------------------------------------------------------------------------- /alfa/utils/summary.py: -------------------------------------------------------------------------------- 1 | from tabulate import tabulate 2 | from ..main.event import Events 3 | from ..main.activity import Activities 4 | 5 | event_columns = ['name','activity_time','activity_id'] 6 | 7 | activity_columns = ['id.time','kind','actor.email','id.applicationName'] 8 | 9 | def summary(data): 10 | ''' 11 | wraps around tabulate 12 | prints a summary of data in a tabled format 13 | ''' 14 | if type(data) == Events: 15 | print(tabulate(data[event_columns],headers=event_columns)) 16 | elif type(data) == Activities: 17 | print(tabulate(data[activity_columns],headers=activity_columns)) 18 | else: 19 | print(tabulate(data)) 20 | 21 | -------------------------------------------------------------------------------- /alfa/main/activity.py: -------------------------------------------------------------------------------- 1 | from pandas.core.series import Series 2 | from pandas.core.frame import DataFrame 3 | 4 | 5 | class Activity(Series): 6 | ''' 7 | A dataframe containing activities 8 | ''' 9 | @property 10 | def _constructor(self): 11 | return Activity 12 | 13 | @property 14 | def _constructor_expanddim(self): 15 | return Activities 16 | 17 | def __init__(self, *args, **kwargs): 18 | super().__init__(*args, **kwargs) 19 | 20 | @staticmethod 21 | def convert(series: Series) -> 'Activity': 22 | return Activity(series._data) 23 | 24 | 25 | class Activities(DataFrame): 26 | @property 27 | def _constructor(self): 28 | return Activities 29 | 30 | @property 31 | def _constructor_sliced(self): 32 | return Activity 33 | 34 | def __init__(self, *args, **kwargs) -> None: 35 | super().__init__(*args, **kwargs) 36 | if 'id.uniqueQualifier' in self.columns: 37 | self.set_index('id.uniqueQualifier',inplace=True) 38 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | asttokens==2.0.5 2 | backcall==0.2.0 3 | cachetools==4.2.4 4 | certifi==2024.7.4 5 | charset-normalizer==2.0.10 6 | decorator==5.1.1 7 | executing==0.8.3 8 | google-api-core==2.4.0 9 | google-api-python-client==2.35.0 10 | google-auth==2.3.3 11 | google-auth-httplib2==0.1.0 12 | google-auth-oauthlib==0.4.6 13 | googleapis-common-protos==1.54.0 14 | httplib2==0.20.2 15 | idna==3.7 16 | ipython==8.10.0 17 | jedi==0.18.1 18 | matplotlib-inline==0.1.3 19 | numpy>=1.26.0,<2.0.0 20 | oauthlib==3.2.2 21 | pandas==1.3.5 22 | parso==0.8.3 23 | pexpect==4.8.0 24 | pickleshare==0.7.5 25 | prompt-toolkit==3.0.29 26 | protobuf==4.25.8 27 | ptyprocess==0.7.0 28 | pure-eval==0.2.2 29 | pyasn1==0.4.8 30 | pyasn1-modules==0.2.8 31 | Pygments==2.15.0 32 | pyparsing==3.0.6 33 | python-dateutil==2.8.2 34 | pytz==2021.3 35 | PyYAML>=6.0.1 36 | requests==2.32.4 37 | requests-oauthlib==1.3.0 38 | rsa==4.8 39 | six==1.16.0 40 | stack-data==0.3.0 41 | tabulate==0.8.9 42 | traitlets==5.3.0 43 | uritemplate==4.1.1 44 | urllib3==2.6.0 45 | wcwidth==0.2.5 46 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Invictus Incident Response 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /License.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Invictus Incident Response 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /alfa/config/config.yml: -------------------------------------------------------------------------------- 1 | scopes: 2 | - https://www.googleapis.com/auth/admin.reports.audit.readonly 3 | 4 | logs: 5 | [ 6 | "chrome", 7 | "admin", 8 | "access_transparency", 9 | "context_aware_access", 10 | "gplus", 11 | "data_studio", 12 | "mobile", 13 | "groups_enterprise", 14 | "calendar", 15 | "chat", 16 | "gcp", 17 | "drive", 18 | "groups", 19 | "keep", 20 | "meet", 21 | "jamboard", 22 | "login", 23 | "token", 24 | "rules", 25 | "saml", 26 | "user_accounts", 27 | ] 28 | 29 | activity_defaults: 30 | columns: 31 | [ 32 | "kind", 33 | "etag", 34 | "ipAddress", 35 | "events", 36 | "id.time", 37 | "id.applicationName", 38 | "id.customerId", 39 | "actor.callerType", 40 | "actor.email", 41 | "actor.profileId", 42 | "actor.key", 43 | ] 44 | index: 45 | - id.uniqueQualifier 46 | 47 | kill_chain: 48 | min_chain_length: 7 # for kill chain discovery (subchains). A subchain only exists if its length >= min_chain_length 49 | min_chain_statistic: 0.6 # a subchain only exists if its kcs has a value >= to min_chain_statistic 50 | index_reducer: max # max / min / mean. Some events have multiple categories. This is the reducer used to assign a single value to the event. 51 | max_slack_width: 5 # how "long" the slack line can be 52 | max_slack_depth: 7 # how "low" the slack line can hang 53 | -------------------------------------------------------------------------------- /alfa/utils/event_mitre_remap.py: -------------------------------------------------------------------------------- 1 | #!/bin/python3 2 | 3 | def event_to_mitre(root: str,_obj: object,event_dict: dict) -> dict: 4 | 5 | ''' 6 | This is typically run as a standalone file, alongside mappings.yml. 7 | Takes the mitre mappings.yml file and inverts it, 8 | such that events map to attacks. event => attack.category. this is saved to config/event_to_mitre.yml 9 | 10 | event_to_mitre takes a root string (initialized as ''), an object to remap, and the event dictionary. It outputs the event dictionary, filled. 11 | 12 | if _obj is a list, then it must be a list of events 13 | for each event: 14 | if the event is not in the dictionary, initialize as empty list [] 15 | append the root to the dictionary 16 | 17 | if _obj is NOT a list, then it must be a dictionary 18 | for each key in the dictionary, prepend the key to the root: 19 | new_root = key + '.' + old_root 20 | then perform recursion, calling event_to_mitre with 21 | new_root, _obj[key] as _obj and the event_dict 22 | 23 | This is a recursive operation. 24 | ''' 25 | 26 | if type(_obj) == list: 27 | for event in _obj: 28 | if not event in event_dict: 29 | event_dict[event] = [] 30 | event_dict[event].append(root[:-1]) 31 | return event_dict 32 | 33 | for key in _obj: 34 | event_to_mitre(f'{key}.{root}',_obj[key],event_dict) 35 | return event_dict 36 | 37 | 38 | if __name__ == '__main__': 39 | import yaml 40 | from utils.path import * 41 | 42 | input_file = rel_path(UTILS_DIR,'mappings.yml') 43 | output_file = rel_path(CONFIG_DIR,'event_to_mitre.yml') 44 | 45 | mappings = yaml.safe_load(open(input_file)) 46 | event_dict = dict() 47 | event_to_mitre('',mappings,event_dict) 48 | 49 | with open(output_file,'w') as f: 50 | yaml.safe_dump(event_dict,f) 51 | 52 | print('saved to', output_file) -------------------------------------------------------------------------------- /alfa/project_creator/__init__.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python3 2 | import os, os.path, shutil 3 | from ..utils.path import CONFIG_DIR 4 | from ..config.__internals__ import internals 5 | 6 | class Project: 7 | 8 | """Creates and sets up a project, utilized in the "alfa init " command.""" 9 | 10 | def __init__(self,path: str): 11 | """ 12 | :path: root path of project. Can be relative. Can be "." 13 | 14 | """ 15 | self._path = path 16 | abs_path = os.path.abspath(path) 17 | print('initializing project:',abs_path) 18 | self.__main__() 19 | print('complete') 20 | print('---') 21 | print('Please copy your credentials.json to config/credentials.json') 22 | pass 23 | 24 | def __check_can_overwrite(self,path: str): 25 | ''' if path exists and is not empty, ask user if overwrite. If it does not exist, it is created ''' 26 | if not os.path.exists(path): 27 | os.mkdir(path) 28 | return True 29 | 30 | isempty = len(os.listdir(path)) 31 | if isempty: 32 | return True 33 | 34 | print(os.path.abspath(path),'is not empty. Are you sure you want to overwrite?') 35 | choice = input('y/[n]: ') 36 | if len(choice) and choice[0].lower() == 'y': 37 | return True 38 | return False 39 | 40 | def create_folder_structure(self,conf_path: str, data_path: str): 41 | ''' creates the folder structure in the root directory ''' 42 | safe_mk_root = self.__check_can_overwrite(self._path) 43 | if not safe_mk_root: 44 | return False, False, False 45 | 46 | safe_mk_conf = self.__check_can_overwrite(conf_path) 47 | 48 | safe_mk_data = self.__check_can_overwrite(data_path) 49 | 50 | return safe_mk_root, safe_mk_conf, safe_mk_data 51 | 52 | def copy_default_config(self,old_conf_path: str, new_conf_path: str): 53 | if os.path.exists(new_conf_path): 54 | return False 55 | shutil.copy(old_conf_path, new_conf_path) 56 | return True 57 | 58 | def __main__(self): 59 | root = self._path 60 | old_conf_path = os.path.join(CONFIG_DIR,'config.yml') 61 | conf_dir = os.path.join(root,internals['project']['dirs']['configs']) 62 | new_conf_path = os.path.join(conf_dir,internals['project']['files']['config']) 63 | data_path = os.path.join(root,internals['project']['dirs']['data']) 64 | ok_root, ok_conf, ok_data = self.create_folder_structure(conf_dir,data_path) 65 | if not all([ok_root, ok_conf, ok_data]): 66 | print('some files may have been overwritten') 67 | if os.path.exists(new_conf_path): 68 | print('config already exists, skipping copying default config') 69 | return True 70 | self.copy_default_config(old_conf_path,new_conf_path) 71 | return True 72 | -------------------------------------------------------------------------------- /alfa/main/event.py: -------------------------------------------------------------------------------- 1 | from pandas.core.series import Series 2 | from pandas.core.frame import DataFrame 3 | from .activity import Activity 4 | 5 | 6 | class Events(DataFrame): 7 | ''' 8 | Events is a dataframe containing events. It has a custom property: parent, which references its Mitre parent. 9 | 10 | Each Event *class* is dynamically generated from the current Events instance. This is because each instance of the class needs a reference 11 | to its parent (Events). 12 | 13 | Each event's Activity can be accessed through the .activity accessor. e.g. events.iloc[0].activity => Activity. 14 | This is done by calling the Mitre.activity_by_id method. 15 | 16 | When accessing an event's activity, the event passes the activity id up the chain, and then the mitre object passes it down: 17 | 18 | event /> Events /> Mitre \> activities \> activity 19 | ''' 20 | @property 21 | def _constructor(self): 22 | return Events 23 | 24 | @property 25 | def _constructor_sliced(self): 26 | return EventConstructor(self) 27 | 28 | _metadata = ['parent'] 29 | 30 | def __init__(self, *args, **kwargs) -> None: 31 | super().__init__(*args, **kwargs) 32 | 33 | def activity(self, uid: str) -> Activity: 34 | return self.parent.activity_by_id(uid) 35 | 36 | def activities(self): 37 | ids = self['activity_id'].unique() 38 | activities = self.parent.activities.loc[ids] # for some reason returns duplicate rows 39 | return activities[~activities.index.duplicated()] 40 | 41 | 42 | def get_event_slices(self, slices: list): 43 | ''' 44 | slices: list of iterables with an internal shape of at least 2 45 | only the first 2 items in the internal shape are regarded. 46 | e.g [ [0,5], [7,22], ...] 47 | returns a list of slices from the events dataframe 48 | ''' 49 | out = [] 50 | for item in slices: 51 | assert len(item) > 1 52 | s = slice(item[0], item[1]) 53 | out.append(self[s]) 54 | return out 55 | 56 | def EventConstructor(parent=None): 57 | class Event(Series): 58 | @property 59 | def _constructor(self): 60 | return Event 61 | 62 | @property 63 | def _constructor_expanddim(self): 64 | return Events 65 | _metadata = ['name', 'parent', 'activity_id'] 66 | 67 | def __init__(self, *args, **kwargs): 68 | super().__init__(*args, **kwargs) 69 | self.parent = parent 70 | 71 | @property 72 | def activity(self): 73 | return self.parent.activity(self['activity_id']) 74 | 75 | return Event 76 | -------------------------------------------------------------------------------- /CREDENTIALS.md: -------------------------------------------------------------------------------- 1 | # Getting Your Credentials 2 | This step by step guide takes you through the process of obtaining your API credentials. 3 | You can also watch [our YouTube video](https://youtu.be/Vk9ySK4E1S4?si=v6YfxnmsLL8hro21) where we show you the whole process

4 | 5 | It is recommended that you register a new project for log collection, separate from other projects. 6 | 7 | ## Permissions 8 | First and foremost, the Admin SDK API used to collect the audit logs requires that the user has admin privileges. You can enable admin privileges through 9 | the following tutorial: https://support.google.com/a/answer/172176 10 | 11 | ## Creating an OAuth app 12 | 1. Go to https://console.cloud.google.com/cloud-resource-manager 13 | 2. Create a project, or use an existing project. 14 | 3. Go to https://console.developers.google.com/apis/dashboard 15 | - Make sure your new project is selected, near the top-left of the website, close to the "Google Cloud Platform" logo, select your project 16 | - In the sidebar, choose "Credentials", then "Oauth Client ID" 17 | 4. Select "Create Credentials", choose "Oauth client ID" 18 | 5. At this point you may be prompted to "configure consent screen". This is dealt with in the next section. If you do not receive this prompt, 19 | skip over to the section "Create OAuth ID" 20 | 21 | ## Configuring The Consent Screen (Optional) 22 | 1. Select "configure consent screen" 23 | 2. select the user type. If you do not know which to select, use the "Internal" type. 24 | 3. You will be prompted to fill in details about the "App" 25 | - give you app any unique name. It is recommended to use a descriptive name such as "Alfa Log Collecting" 26 | - fill out all the required fields. All other fields can be filled to your discretion. 27 | 28 | ### Adding Scopes (Optional) 29 | You will now be prompted to "add or remove scopes". This step is not necessary, as it will not affect the outcome script. 30 | However if you intend to grant access to third-parties, you may wish to disclose the scopes here. 31 | 32 | ## Create OAuth ID 33 | 1. Select "web application" as application type 34 | 2. Give the "web application" a descripting name, e.g."Alfa Log Collecting API" 35 | 3. Under "Authorised redirect URIs" add "http://localhost:8089/" 36 | 3. Click create. A popup will appear with your API credentials. 37 | 4. Download the JSON to ```config/credentials.json``` in your project's folder 38 | 39 | ## Enabling APIs 40 | You will need to enable access to the Admin SDK. 41 | 1. go to https://console.cloud.google.com/apis/library/admin.googleapis.com 42 | 2. enable the SDK 43 | 44 | ## Afterword 45 | Your credentials are now ready for use. When the first query is run, a browser window will open and you will be prompted to grant the appropriate permissions. 46 | After this, a new 'token.json' file will appear in the config/ directory. 47 | 48 | **NOTE**: A common error occurs regarding the "refresh token". This may happen if you delete the token.json file. This is due to a security feature of Google Cloud: 49 | When you first grant permission to the script, the resulting token.json file contains a "refresh-token" parameter. This only occurs **once**. Subsequent recreation 50 | of the token.json file will **not** include this token. 51 | To retrieve the token, you must delete the existing permissions and re-grant them. 52 | The following explains how to remove third-party apps: https://support.google.com/accounts/answer/3466521 53 | After removal, simply run a query and re-grant the permissions 54 | -------------------------------------------------------------------------------- /alfa/main/analyser.py: -------------------------------------------------------------------------------- 1 | #!/bin/python3 2 | import yaml 3 | import pandas as pd 4 | 5 | from ..utils.path import rel_path, CONFIG_DIR, DATA_DIR 6 | from ..config import config 7 | from .kill_chain import KillChain 8 | 9 | class Analyser: 10 | ''' 11 | uses /config/event_to_mitre.yml to map events in the log to the mitre attack framework. 12 | 13 | analyse... takes a df_name (e.g. login) as input and returns a dataframe of all suspicious records. These are records where 14 | at least 1 event exists within the event_to_mitre yml database. 15 | 16 | Each event for each record is given new attributes 'attack.label' 'attack.category' and 'attack.index' for all associated mitre attacks for that event. 17 | ''' 18 | def __init__(self) -> None: 19 | self.event_mapping = yaml.safe_load(open(rel_path(CONFIG_DIR,'event_to_mitre.yml'))) 20 | 21 | def analyse_all_files(self, email: list=None,filter=True, subdir=None) -> pd.DataFrame: 22 | '''Takes all files, analyses and concats into a single DataFrame''' 23 | df = pd.DataFrame() 24 | for log in config['logs']: 25 | log_df = self.analyse_from_file(log,email,filter=filter, subdir=subdir) 26 | df = df.append(log_df) 27 | return df 28 | def analyse_all(self,log_dict,email: list=None,filter=True) -> pd.DataFrame: 29 | '''takes dict of logs, and analyses and concats them into a single DataFrame''' 30 | df = pd.DataFrame() 31 | for log in log_dict: 32 | log_df = self.analyse(log_dict[log],email,filter=filter) 33 | df = df.append(log_df) 34 | return df 35 | 36 | def load_file(self,logtype,subdir=None): 37 | '''Loads file from data/ directory. If a subdir is given, will load from data/''' 38 | df_name = logtype+'.pkl' 39 | if subdir: 40 | filename = rel_path(DATA_DIR,subdir,df_name) 41 | else: 42 | filename = rel_path(DATA_DIR,df_name) 43 | return pd.read_pickle(filename) 44 | 45 | def analyse_from_file(self,logtype: str,email=None,filter=True, subdir=None): 46 | '''load file and pass to analyse method''' 47 | df = self.load_file(logtype, subdir=subdir) 48 | return self.analyse(df,email,filter=filter) 49 | 50 | def label_row(self,row: pd.Series,email: list=None) -> pd.DataFrame: 51 | '''labels given row from config/event_to_mitre.yml. If email is passed, will filter on those email/s''' 52 | has_labels = False 53 | if email: 54 | if 'actor.email' not in row: 55 | return None, has_labels 56 | if row['actor.email'] not in email: 57 | return None, has_labels 58 | for event in row['events']: 59 | if event['name'] in self.event_mapping: 60 | has_labels = True 61 | attack_label = self.event_mapping[event['name']] 62 | attack_category = [label.split('.')[-1] for label in attack_label] 63 | event['attack.label'] = attack_label 64 | event['attack.category'] = attack_category 65 | event['attack.index'] = KillChain.reduce_category_list(attack_category) 66 | return row, has_labels 67 | 68 | def analyse(self,df: pd.DataFrame,email: list=None, filter: bool=True) -> pd.DataFrame: 69 | '''takes a DataFrame, outputs labelled, *filtered, DataFrame. Filter will filter out benign events. If email is passed, will only contain events from that email address.''' 70 | mitre_df = [] 71 | for row in df.iloc: 72 | row, add_row = self.label_row(row,email) 73 | if (filter and add_row) or not filter: 74 | mitre_df.append(row) 75 | return pd.DataFrame(mitre_df) 76 | -------------------------------------------------------------------------------- /alfa/utils/mappings.yml: -------------------------------------------------------------------------------- 1 | # used by event_mitre_remap.py 2 | # remapped to /config/event_to_mitre.yml 3 | 4 | persistence: 5 | account_manipulation: 6 | [ 7 | 2sv_disable, 8 | 2sv_enroll, 9 | password_change, 10 | recovery_info_change, 11 | recovery_phone_edit, 12 | titanium_change, 13 | titanium_unenroll, 14 | GRANT_ADMIN_PRIVILEGE, 15 | ADD_RECOVERY_PHONE, 16 | REVOKE_ADMIN_PRIVILEGE, 17 | REVOKE_ASP, 18 | ENABLE_USER_IP_WHITELIST, 19 | CHANGE_USER_ORGANIZATION, 20 | CHANGE_RECOVERY_EMAIL, 21 | CHANGE_RECOVERY_PHONE, 22 | GRANT_DELEGATED_ADMIN_PRIVILEGES, 23 | CHANGE_PASSWORD, 24 | CHANGE_PASSWORD_ON_NEXT_LOGIN, 25 | REMOVE_RECOVERY_EMAIL, 26 | REMOVE_RECOVERY_PHONE, 27 | RESET_SIGNIN_COOKIES, 28 | REVOKE_SECURITY_KEY, 29 | UNBLOCK_USER_SESSION, 30 | UNENROLL_USER_FROM_TITANIUM, 31 | DELETE_USER, 32 | UNENROLL_USER_FROM_STRONG_AUTH, 33 | UNDELETE_USER, 34 | UNSUSPEND_USER, 35 | add_member, 36 | add_member_role, 37 | add_security_setting, 38 | ban_member_with_moderation, 39 | change_security_setting, 40 | delete_group, 41 | join, 42 | remove_membership_expiry, 43 | remove_info_setting, 44 | remove_member, 45 | remove_member_role, 46 | remove_security_setting, 47 | unban_member, 48 | password_change, 49 | account_disabled_generic, 50 | MOBILE_DEVICE_APPROVE, 51 | ] 52 | 53 | create_account: 54 | cloud_account: [CREATE_USER] 55 | 56 | valid_accounts: 57 | default_accounts: [] 58 | cloud_accounts: 59 | [ 60 | suspicious_login, 61 | suspicious_login_less_secure_app, 62 | suspicious_programmatic_login, 63 | login_failure, 64 | login_challenge, 65 | login_verification, 66 | login_success, 67 | ] 68 | 69 | privilege_escalation: 70 | valid_accounts: 71 | default_accounts: [] 72 | cloud_accounts: 73 | [ 74 | suspicious_login, 75 | suspicious_login_less_secure_app, 76 | suspicious_programmatic_login, 77 | login_challenge, 78 | login_verification, 79 | login_success, 80 | ADD_PRIVILEGE, 81 | ASSIGN_ROLE, 82 | GRANT_ADMIN_PRIVILEGE, 83 | change_user_access 84 | ] 85 | 86 | defense_evasion: 87 | use_alternate_authentication_material: 88 | application_access_token: 89 | [ 90 | ADD_TO_TRUSTED_OAUTH2_APPS, 91 | REMOVE_FROM_BLOCKED_OAUTH2_APPS, 92 | TRUST_DOMAIN_OWNED_OAUTH2_APPS, 93 | authorize, 94 | ] 95 | web_session_cookie: [] 96 | 97 | valid_accounts: 98 | default_accounts: [] 99 | cloud_accounts: 100 | [ 101 | suspicious_login, 102 | suspicious_login_less_secure_app, 103 | suspicious_programmatic_login, 104 | login_challenge, 105 | login_verification, 106 | login_success, 107 | ALERT_CENTER_BATCH_DELETE_ALERTS, 108 | ALERT_CENTER_DELETE_ALERT 109 | ] 110 | 111 | credential_access: 112 | brute_force: 113 | password_guessing: [login_failure, login_challenge, login_verification, login_success] 114 | password_spraying: [login_failure, login_challenge, login_verification, login_success] 115 | credential_stuffing: [login_failure, login_challenge, login_verification, login_success] 116 | 117 | forge_web_credentials: 118 | saml_tokens: [login_success] 119 | 120 | steal_application_access_token: [authorize] 121 | steal_web_session_cookie: [] 122 | unsecured_credentials: [] 123 | 124 | discovery: 125 | account_discovery: 126 | email_account: [DOWNLOAD_UNMANAGED_USERS_LIST, DOWNLOAD_USERLIST_CSV] 127 | cloud_account: [DOWNLOAD_UNMANAGED_USERS_LIST, DOWNLOAD_USERLIST_CSV] 128 | 129 | cloud_service_dashboard: [ALERT_CENTER_VIEW] 130 | cloud_service_discovery: [] 131 | permission_groups_discovery: 132 | cloud_groups: [GROUP_LIST_DOWNLOAD, GROUP_MEMBERS_DOWNLOAD] 133 | software_discovery: 134 | security_software_discovery: [UNTRUST_DOMAIN_OWNED_OAUTH2_APPS, TRUST_DOMAIN_OWNED_OAUTH2_APPS, action_complete] 135 | 136 | lateral_movement: 137 | internal_spearphishing: [] 138 | use_alternate_authentication_material: 139 | application_access_token: [activity] 140 | web_session_cookie: [] 141 | 142 | collection: 143 | data_from_information_repositories: 144 | [download, add_subscription, IMPORT_SSH_PUBLIC_KEY, VIEW_SITE_DETAILS, DOWNLOAD_REPORT, EXPORT_DEVICES] 145 | email_collection: 146 | remote_email_collection: [EMAIL_LOG_SEARCH] 147 | email_forwarding_rule: 148 | [email_forwarding_change, email_forwarding_out_of_domain, CREATE_EMAIL_MONITOR, CREATE_GMAIL_SETTING] 149 | -------------------------------------------------------------------------------- /alfa/cmdline.py: -------------------------------------------------------------------------------- 1 | #!/bin/python3 2 | ''' 3 | holds the parser configuration for the command line 4 | ''' 5 | from argparse import ArgumentParser 6 | from .project_creator import Project 7 | from .main import * 8 | from IPython import embed 9 | import os.path, yaml 10 | 11 | from pprint import pprint 12 | from tabulate import tabulate 13 | 14 | from .utils.summary import summary 15 | 16 | banner = ''' 17 | use 'A' to access the Alfa object. A? for more info 18 | ''' 19 | 20 | class Parser: 21 | def __init__(self): 22 | self.parser = ArgumentParser() 23 | self.subparsers = self.parser.add_subparsers(title='subcommands',required=True,dest='subcommand', 24 | metavar='init, acquire, analyze, load') 25 | self.parser_init = self.subparsers.add_parser('init', 26 | help='intialize a project directory') 27 | self.parser_acquire = self.subparsers.add_parser('acquire',aliases=['a','ac'], 28 | help='acquire audit log data and save to the data/ directory') 29 | self.parser_analyze = self.subparsers.add_parser('analyze',aliases=['aa','an'], 30 | help='acquire and analyze audit log data, dropping into an interactive shell') 31 | self.parser_load = self.subparsers.add_parser('load',aliases=['l'], 32 | help='load offline data, analyze and drop into a shell') 33 | 34 | self.add_init_args() 35 | self.add_load_args() 36 | self.add_default_args(self.parser_acquire) 37 | self.add_default_args(self.parser_analyze) 38 | self.add_analyze_args() 39 | 40 | self.parser_init.set_defaults(func=self.handle_init) 41 | self.parser_acquire.set_defaults(func=self.handle_acquire) 42 | self.parser_analyze.set_defaults(func=self.handle_analyze) 43 | self.parser_load.set_defaults(func=self.handle_load) 44 | 45 | def add_init_args(self): 46 | self.parser_init.add_argument('path',type=str, 47 | help='path to project directory') 48 | pass 49 | 50 | def add_load_args(self): 51 | self.parser_load.add_argument('-l','--logtype',type=str,default='all', 52 | help='log type to load e.g. "drive"') 53 | self.parser_load.add_argument('-p','--path', type=str, required=True, 54 | help='directory to load, e.g. --path data/foo') 55 | pass 56 | 57 | def add_analyze_args(self): 58 | self.parser_analyze.add_argument('-s','--save',action='store_true', 59 | help='save data to data/ to load later') 60 | pass 61 | 62 | def add_default_args(self, subparser): 63 | subparser.add_argument('-l','--logtype',type=str,default='all', 64 | help='log type to load e.g. "drive"') 65 | subparser.add_argument('-p','--path',type=str, 66 | help='save under path e.g. --path data/foobar') 67 | subparser.add_argument('--user', required=False, type=str, default='all') 68 | subparser.add_argument('--no-filter', required=False, action='store_false', dest='filter', 69 | help='disable filtering of benign activities from dataset') 70 | subparser.add_argument('--max-results',type=int,required=False,default=1000, 71 | help='max results per page. max value = 1000 (default)') 72 | subparser.add_argument('--max-pages',type=int,required=False,default=None, 73 | help='max number of pages to collect (default = as many as possible)') 74 | subparser.add_argument('-st','--start-time',type=str,required=False,default=None, 75 | help='start collecting from date (RFC3339 format)') 76 | subparser.add_argument('-et','--end-time',type=str,required=False,default=None, 77 | help='collect until date (RFC3339 format)') 78 | subparser.add_argument('-q','--query',type=str, 79 | help='supply a yaml file containing query information. e.g. logtype, save path etc.') 80 | subparser.add_argument('--nd',action='store_true',help='save data as newline delimited') 81 | 82 | def handle_init(self, args): 83 | project = Project(args.path) 84 | print('now run "alfa analyze"!') 85 | pass 86 | 87 | def handle_load(self, args): 88 | A = Alfa.load(args.logtype, path=args.path) 89 | # code.interact(banner=banner,local=locals()) 90 | print(banner) 91 | embed(display_banner=False) 92 | pass 93 | 94 | def handle_acquire(self, args): 95 | if args.query: 96 | query = self.load_query(args.query) 97 | query['save'] = True 98 | A = Alfa.acquire(**query) 99 | else: 100 | query = vars(args) 101 | query['save'] = True 102 | A = Alfa.acquire(**query) 103 | # should interactivity be a thing for acquiring? 104 | # code.interact(banner=banner, local=locals()) 105 | pass 106 | 107 | def handle_analyze(self, args): 108 | if args.query: 109 | query = self.load_query(args.query) 110 | A = Alfa.query(**query) 111 | else: 112 | A = Alfa.query(**vars(args)) 113 | print(banner) 114 | embed(display_banner=False) 115 | pass 116 | 117 | def load_query(self,filename: str) -> dict: 118 | if not os.path.exists(filename): 119 | print('cannot find file:',filename) 120 | return dict() 121 | with open(filename) as f: 122 | query = yaml.safe_load(f) 123 | return query 124 | 125 | def do_summary(self,A: Alfa): 126 | print('\n\n---------- Events ---------------\n\n') 127 | pprint(A.events[['type','attack.category']].head()) 128 | print('\n\n') 129 | print('num_events:',A.events.shape[0]) 130 | print('num_activities:',A.activities.shape[0]) 131 | print('\n--------------------------------------\n\n') 132 | print('\n---------- Kill Chains ---------------\n\n') 133 | print('kill chain statistic: ', A.kcs()) 134 | print('subchains discovered: ') 135 | print(tabulate(A.subchains(),headers=['start','end','kcs'],tablefmt='fancy_grid')) 136 | print('\n--------------------------------------\n\n') 137 | pass 138 | -------------------------------------------------------------------------------- /alfa/config/event_to_mitre.yml: -------------------------------------------------------------------------------- 1 | 2sv_disable: 2 | - account_manipulation.persistence 3 | 2sv_enroll: 4 | - account_manipulation.persistence 5 | ADD_PRIVILEGE: 6 | - cloud_accounts.valid_accounts.privilege_escalation 7 | ADD_RECOVERY_PHONE: 8 | - account_manipulation.persistence 9 | ADD_TO_TRUSTED_OAUTH2_APPS: 10 | - application_access_token.use_alternate_authentication_material.defense_evasion 11 | ALERT_CENTER_BATCH_DELETE_ALERTS: 12 | - cloud_accounts.valid_accounts.defense_evasion 13 | ALERT_CENTER_DELETE_ALERT: 14 | - cloud_accounts.valid_accounts.defense_evasion 15 | ALERT_CENTER_VIEW: 16 | - cloud_service_dashboard.discovery 17 | ASSIGN_ROLE: 18 | - cloud_accounts.valid_accounts.privilege_escalation 19 | CHANGE_PASSWORD: 20 | - account_manipulation.persistence 21 | CHANGE_PASSWORD_ON_NEXT_LOGIN: 22 | - account_manipulation.persistence 23 | CHANGE_RECOVERY_EMAIL: 24 | - account_manipulation.persistence 25 | CHANGE_RECOVERY_PHONE: 26 | - account_manipulation.persistence 27 | CHANGE_USER_ORGANIZATION: 28 | - account_manipulation.persistence 29 | CREATE_EMAIL_MONITOR: 30 | - email_forwarding_rule.email_collection.collection 31 | CREATE_GMAIL_SETTING: 32 | - email_forwarding_rule.email_collection.collection 33 | CREATE_USER: 34 | - cloud_account.create_account.persistence 35 | DELETE_USER: 36 | - account_manipulation.persistence 37 | DOWNLOAD_REPORT: 38 | - data_from_information_repositories.collection 39 | DOWNLOAD_UNMANAGED_USERS_LIST: 40 | - email_account.account_discovery.discovery 41 | - cloud_account.account_discovery.discovery 42 | DOWNLOAD_USERLIST_CSV: 43 | - email_account.account_discovery.discovery 44 | - cloud_account.account_discovery.discovery 45 | EMAIL_LOG_SEARCH: 46 | - remote_email_collection.email_collection.collection 47 | ENABLE_USER_IP_WHITELIST: 48 | - account_manipulation.persistence 49 | EXPORT_DEVICES: 50 | - data_from_information_repositories.collection 51 | GRANT_ADMIN_PRIVILEGE: 52 | - account_manipulation.persistence 53 | - cloud_accounts.valid_accounts.privilege_escalation 54 | GRANT_DELEGATED_ADMIN_PRIVILEGES: 55 | - account_manipulation.persistence 56 | GROUP_LIST_DOWNLOAD: 57 | - cloud_groups.permission_groups_discovery.discovery 58 | GROUP_MEMBERS_DOWNLOAD: 59 | - cloud_groups.permission_groups_discovery.discovery 60 | IMPORT_SSH_PUBLIC_KEY: 61 | - data_from_information_repositories.collection 62 | MOBILE_DEVICE_APPROVE: 63 | - account_manipulation.persistence 64 | REMOVE_FROM_BLOCKED_OAUTH2_APPS: 65 | - application_access_token.use_alternate_authentication_material.defense_evasion 66 | REMOVE_RECOVERY_EMAIL: 67 | - account_manipulation.persistence 68 | REMOVE_RECOVERY_PHONE: 69 | - account_manipulation.persistence 70 | RESET_SIGNIN_COOKIES: 71 | - account_manipulation.persistence 72 | REVOKE_ADMIN_PRIVILEGE: 73 | - account_manipulation.persistence 74 | REVOKE_ASP: 75 | - account_manipulation.persistence 76 | REVOKE_SECURITY_KEY: 77 | - account_manipulation.persistence 78 | TRUST_DOMAIN_OWNED_OAUTH2_APPS: 79 | - application_access_token.use_alternate_authentication_material.defense_evasion 80 | - security_software_discovery.software_discovery.discovery 81 | UNBLOCK_USER_SESSION: 82 | - account_manipulation.persistence 83 | UNDELETE_USER: 84 | - account_manipulation.persistence 85 | UNENROLL_USER_FROM_STRONG_AUTH: 86 | - account_manipulation.persistence 87 | UNENROLL_USER_FROM_TITANIUM: 88 | - account_manipulation.persistence 89 | UNSUSPEND_USER: 90 | - account_manipulation.persistence 91 | UNTRUST_DOMAIN_OWNED_OAUTH2_APPS: 92 | - security_software_discovery.software_discovery.discovery 93 | VIEW_SITE_DETAILS: 94 | - data_from_information_repositories.collection 95 | account_disabled_generic: 96 | - account_manipulation.persistence 97 | action_complete: 98 | - security_software_discovery.software_discovery.discovery 99 | activity: 100 | - application_access_token.use_alternate_authentication_material.lateral_movement 101 | add_member: 102 | - account_manipulation.persistence 103 | add_member_role: 104 | - account_manipulation.persistence 105 | add_security_setting: 106 | - account_manipulation.persistence 107 | add_subscription: 108 | - data_from_information_repositories.collection 109 | authorize: 110 | - application_access_token.use_alternate_authentication_material.defense_evasion 111 | - steal_application_access_token.credential_access 112 | ban_member_with_moderation: 113 | - account_manipulation.persistence 114 | change_security_setting: 115 | - account_manipulation.persistence 116 | change_user_access: 117 | - cloud_accounts.valid_accounts.privilege_escalation 118 | delete_group: 119 | - account_manipulation.persistence 120 | download: 121 | - data_from_information_repositories.collection 122 | email_forwarding_change: 123 | - email_forwarding_rule.email_collection.collection 124 | email_forwarding_out_of_domain: 125 | - email_forwarding_rule.email_collection.collection 126 | join: 127 | - account_manipulation.persistence 128 | login_challenge: 129 | - cloud_accounts.valid_accounts.persistence 130 | - cloud_accounts.valid_accounts.privilege_escalation 131 | - cloud_accounts.valid_accounts.defense_evasion 132 | - password_guessing.brute_force.credential_access 133 | - password_spraying.brute_force.credential_access 134 | - credential_stuffing.brute_force.credential_access 135 | login_failure: 136 | - cloud_accounts.valid_accounts.persistence 137 | - password_guessing.brute_force.credential_access 138 | - password_spraying.brute_force.credential_access 139 | - credential_stuffing.brute_force.credential_access 140 | login_success: 141 | - cloud_accounts.valid_accounts.persistence 142 | - cloud_accounts.valid_accounts.privilege_escalation 143 | - cloud_accounts.valid_accounts.defense_evasion 144 | - password_guessing.brute_force.credential_access 145 | - password_spraying.brute_force.credential_access 146 | - credential_stuffing.brute_force.credential_access 147 | - saml_tokens.forge_web_credentials.credential_access 148 | login_verification: 149 | - cloud_accounts.valid_accounts.persistence 150 | - cloud_accounts.valid_accounts.privilege_escalation 151 | - cloud_accounts.valid_accounts.defense_evasion 152 | - password_guessing.brute_force.credential_access 153 | - password_spraying.brute_force.credential_access 154 | - credential_stuffing.brute_force.credential_access 155 | password_change: 156 | - account_manipulation.persistence 157 | - account_manipulation.persistence 158 | recovery_info_change: 159 | - account_manipulation.persistence 160 | recovery_phone_edit: 161 | - account_manipulation.persistence 162 | remove_info_setting: 163 | - account_manipulation.persistence 164 | remove_member: 165 | - account_manipulation.persistence 166 | remove_member_role: 167 | - account_manipulation.persistence 168 | remove_membership_expiry: 169 | - account_manipulation.persistence 170 | remove_security_setting: 171 | - account_manipulation.persistence 172 | suspicious_login: 173 | - cloud_accounts.valid_accounts.persistence 174 | - cloud_accounts.valid_accounts.privilege_escalation 175 | - cloud_accounts.valid_accounts.defense_evasion 176 | suspicious_login_less_secure_app: 177 | - cloud_accounts.valid_accounts.persistence 178 | - cloud_accounts.valid_accounts.privilege_escalation 179 | - cloud_accounts.valid_accounts.defense_evasion 180 | suspicious_programmatic_login: 181 | - cloud_accounts.valid_accounts.persistence 182 | - cloud_accounts.valid_accounts.privilege_escalation 183 | - cloud_accounts.valid_accounts.defense_evasion 184 | titanium_change: 185 | - account_manipulation.persistence 186 | titanium_unenroll: 187 | - account_manipulation.persistence 188 | unban_member: 189 | - account_manipulation.persistence 190 | -------------------------------------------------------------------------------- /alfa/main/alfa.py: -------------------------------------------------------------------------------- 1 | #!/bin/python3 2 | import os 3 | from .analyser import Analyser 4 | from .activity import Activities, Activity 5 | from .event import Events 6 | from pandas.core.series import Series 7 | from pandas import to_datetime 8 | from typing import Tuple, Union 9 | from functools import reduce 10 | 11 | from ..config import config 12 | from .kill_chain import KillChain 13 | from .collector import Collector 14 | 15 | class Alfa: 16 | '''Takes all suspicious activities and creates a separate "events" 17 | attribute that holds all events. 18 | Each event contains a reference to its corresponding activity. 19 | 20 | Can be initialized as empty, or with an Activities dataframe. 21 | Typically will be initialized through static methods: 22 | Alfa.load, Alfa.load_unfiltered, or Alfa.query 23 | ''' 24 | activities = Activities(**config['activity_defaults']) 25 | events = Events() 26 | 27 | def __init__(self, activity_list: list = None) -> None: 28 | self.collector = Collector() 29 | if activity_list is not None: 30 | self.activities = Activities(activity_list) 31 | self.events = self.initialize_events() 32 | self.activities = self.activities.fillna('') 33 | pass 34 | 35 | def __get_events(self, activity: Series) -> list: 36 | return activity['events'] 37 | 38 | def __get_all_events(self) -> Tuple[list, list]: 39 | all_events = [] 40 | if self.activities.shape[0] == 0: 41 | return [] 42 | for activity in self.activities.iloc: 43 | activity_id = activity.name 44 | new_events = self.__get_events(activity) 45 | for event in new_events: 46 | event['activity_id'] = activity_id 47 | event['activity_time'] = to_datetime(activity['id.time']) 48 | all_events = all_events + new_events 49 | 50 | return all_events 51 | 52 | def __create_events(self, all_events: list) -> Events: 53 | E = Events(all_events) 54 | if 'activity_time' not in E: 55 | print('warning: no data in dataset!') 56 | E.parent = self 57 | return E 58 | # throws an error if dataframe is empty 59 | E = E.sort_values('activity_time', ignore_index=True) 60 | E.parent = self 61 | return E 62 | 63 | def initialize_events(self) -> Events: 64 | all_events = self.__get_all_events() 65 | return self.__create_events(all_events) 66 | 67 | def activity_by_id(self, uid: str) -> Activity: 68 | return self.activities.loc[uid] 69 | 70 | def filter(self, filter_array: Series) -> 'Alfa': 71 | ''' 72 | Filters on *activities* and returns a new Alfa object. 73 | Input should be slice of an Activites dataframe. 74 | e.g. A.activities[A.activities['actor.email'].str.startswith('attacker')] 75 | will return activities whose email starts with 'attacker' 76 | ''' 77 | filtered_activities = self.activities[filter_array] 78 | return Alfa(filtered_activities) 79 | 80 | def kcs(self, start_index: int = 0, end_index: int = None): 81 | ''' 82 | return a kill_chain_statistic for the entire set of events, 83 | if called with no params, else acts on a slice. 84 | ''' 85 | E = self.events['attack.index'] 86 | if end_index and end_index > start_index: 87 | E = E.iloc[slice(start_index, end_index)] 88 | return KillChain.generate_kill_chain_statistic(list(E)) 89 | 90 | def subchains(self, min_length=None, min_stat=None): 91 | subchains = KillChain.discern_subchains( 92 | self.events['attack.index'], min_length, min_stat) 93 | return sorted(subchains, key=lambda x: x[2], reverse=True) 94 | 95 | @staticmethod 96 | def acquire(logtype: str, *args, **kwargs) -> Union[list, dict]: 97 | ''' 98 | Collect records from API, do not process them 99 | This is a wrapper around Collector, see Collector.query for for details 100 | ''' 101 | C = Collector() 102 | res = C.query(logtype, *args, **kwargs) # this return a dataframe 103 | return res 104 | 105 | @staticmethod 106 | def query(logtype: str, filter=True, *args, **kwargs): 107 | ''' 108 | Query API directly, returns an Alfa object. See collector 109 | ''' 110 | C = Collector() 111 | A = Analyser() 112 | Q = C.query(logtype, *args, **kwargs) 113 | records = A.analyse(Q, filter=filter) 114 | return Alfa(Activities(records)) 115 | 116 | @staticmethod 117 | def load(logtype: str, path: str = None, email: list = None, filter: bool = True) -> None: 118 | ''' 119 | load a log (or all logs), the data/ folder label and *filter* and 120 | return an Alfa object. Optionally filter by email. 121 | See analyser for details 122 | ''' 123 | A = Analyser() 124 | C = Collector() 125 | if logtype == 'all': 126 | all_ = C.load_all(path) 127 | records = A.analyse(all_, email=None, filter=filter) 128 | return Alfa(Activities(records)) 129 | Q = C.load(os.path.join(path, logtype+'.json')) 130 | records = A.analyse(Q, email=None, filter=filter) 131 | return Alfa(Activities(records)) 132 | 133 | def __aoi(self, concat: bool = True): 134 | ''' 135 | Activities of Interest 136 | Automates the following: 137 | 1. get subchains 138 | 2. join subchains that are close by 139 | 3. grab event slices from those subchains 140 | 4. list out the unique activities associated with those subchains 141 | concat: bool, if True (default) then append 142 | the activity slices to one another 143 | ''' 144 | subchains = self.subchains() 145 | long_chains = KillChain.join_subchains_loop(subchains) 146 | event_slices = self.events.get_event_slices(long_chains) 147 | activity_slices = [e.activities() for e in event_slices] 148 | if len(activity_slices) == 0: # prevent possible concat on empty list 149 | return activity_slices 150 | if concat: 151 | res = reduce(lambda a, b: a.append(b), activity_slices) 152 | res = res[~res.index.duplicated()] 153 | return res 154 | return activity_slices 155 | 156 | def aoi(self, export: str = None, nd: bool=False): 157 | ''' 158 | wrapper around __aoi (above) 159 | adds the export functionality 160 | exports data ras a JSON file 161 | ''' 162 | if len(self.events) != 0: 163 | aoi = self.__aoi() 164 | if export is not None: 165 | aoi['events'] = aoi['events'].apply(self.list_to_string) #parsing to string so the list doesn't crash when parsed to json 166 | if nd: 167 | with open(export, 'w') as f: 168 | for _, row in aoi.iterrows(): 169 | f.write(row.to_json()+'\n') 170 | else: 171 | aoi.to_json(export, orient="records") 172 | print('saved to', export) 173 | return aoi 174 | else: 175 | print("[!] Error - please provide an export file as follow : A.aoi(export='example.json')") 176 | else: 177 | print("[!] Error - no data to export") 178 | 179 | def list_to_string(self, lst): 180 | return ','.join(map(str, lst)) 181 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ALFA 2 | ## Automated Audit Log Forensic Analysis for Google Workspace 3 | Copyright (c) 2025 Invictus Incident Response
4 | Original authors [Greg Charitonos](https://www.linkedin.com/in/charitonos/) & [BertJanCyber](https://twitter.com/BertJanCyber) maintained by Invictus Incident Response 5 | 6 | # Before you start 7 | A note on supported operating systems, ALFA is tested on several Linux distributions (Debian and Ubuntu).
8 | While it might be possible to use it on Windows or macOS, results can vary, so proceed at your own risk. 9 | 10 | # Installation 11 | 1. Download this project 12 | 2. Navigate to the folder in your terminal and run ```sudo pip install -e .``` or ```sudo pip3 install -e .``` 13 | 14 | ## Installation on macOS 15 | 1. **Install Python 3.9 using `pyenv`** 16 | ALFA works best with Python 3.9. You can use `pyenv` to manage and install this version. 17 | - If you haven’t installed `pyenv` yet, you can do so using Homebrew: 18 | ```bash 19 | brew install pyenv 20 | ``` 21 | - Set up `pyenv` in your shell by adding the following to your shell configuration file (e.g., `~/.zshrc` or `~/.bashrc`): 22 | ```bash 23 | export PYENV_ROOT="$HOME/.pyenv" 24 | export PATH="$PYENV_ROOT/bin:$PATH" 25 | eval "$(pyenv init --path)" 26 | ``` 27 | After updating the file, reload your shell: 28 | ```bash 29 | source ~/.zshrc # or source ~/.bashrc 30 | ``` 31 | - Install Python 3.9 using `pyenv`: 32 | ```bash 33 | pyenv install 3.9 34 | ``` 35 | - Set Python 3.9 as the current version: 36 | ```bash 37 | pyenv global 3.9 38 | ``` 39 | or just for the current session: 40 | ```bash 41 | pyenv shell 3.9 42 | ``` 43 | 2. **(Optional) Create a Virtual Environment** 44 | It’s a good practice to run projects in a virtual environment to keep dependencies isolated. Using the 3.9 version of Python you just set, create a virtual environment: 45 | ```bash 46 | python -m venv alfa 47 | ``` 48 | - Activate the virtual environment: 49 | ```bash 50 | source alfa/bin/activate 51 | ``` 52 | 3. **Install ALFA** 53 | Inside the folder where the ALFA project is located, install it using `pip`: 54 | ```bash 55 | pip install -e . 56 | ``` 57 | If you’re not using a virtual environment, you may need `sudo`: 58 | ```bash 59 | sudo pip install -e . 60 | ``` 61 | 62 | 4. **Verify Installation** 63 | After installation, you can verify that ALFA is installed by attempting to run its command-line tool `alfa`. 64 | 65 | ## Using ALFA 66 | NOTE: For retrieving credentials.json, please see ```CREDENTIALS.md``` 67 | 68 | ## Setup 69 | 1. The first step is to initialize ALFA do this by running ```alfa init projectname``` this command will create a new directory to store your configuration and data. E.g. ```alfa init project_x``` 70 | 2. A new project has now been generated called 'project_x'. Within that folder copy your credentials.json into the config/ folder. **(See CREDENTIALS.md) for instructions. ** 71 | 3. After you have copied over the credentials.json file you are ready to use ALFA. 72 | 73 | ALFA has 3 options as explained below: 74 | 75 | ## 1. ALFA Acquire 76 | ## Acquire all Google Workspace Audit Logs 77 | 1. From inside "project_x" (or whatever name you chose before) run ```alfa acquire``` 78 | 2. ALFA will now grab all logtypes for all users and save them to a subdirectory in the 'data' folder a .json file will be generated per logtype 79 | 3. To see what other options you have type ```alfa acquire -h``` 80 | 81 | ## Advanced acquisitions with ALFA 82 | You can do all kinds of filtering to limit the data you are acquiring some examples below: 83 | - Only grab the 'admin' logtype ```alfa acquire --logtype=admin``` 84 | - Save the output to a specific folder ```alfa acquire -d /tmp/project_secret``` 85 | - Only grab logs for a specific user ```alfa acquire --user=insert_username``` 86 | - Grab logs within a defined timeperiod ```alfa acquire --start-time=2022-07-10T10:00:00Z --end-time=2022-07-11T14:26:01Z``` the timeformat is (RFC3339) 87 | 88 | Now you know how to acquire data time for some fancy stuff to unleash the power of ALFA. 89 | 90 | ## 2. ALFA Analyze 91 | The analyze function automatically analysis all audit log data for a given Google Workspace to identify suspicious activity. 92 | ### How this works 93 | 1. Categorization 94 | Each individual event is categorized based on a mapping that is made alfa/config/event_to_mitre.yml. If an event matches that lists it is mapped to a technique that is part of the MITRE ATT&CK Cloud Framework (https://attack.mitre.org/matrices/enterprise/cloud/). 95 | 96 | 2. Scoring 97 | Next ALFA will analyze all mapped events in chronological order to try to identify kill chains or logical attack paths. 98 | E.G. An event that is mapped to the Persistence phase followed by an event that is mapped to the Credential Access phase will result in a higher score. 99 | 100 | 3. Result 101 | Ultimately ALFA will give the analyst a list of identified 'subchains' that can be further analyzed. 102 | 103 | ### How to use ALFA analyze? 104 | 1. First run ```alfa analyze``` which will automatically identify (or not if none were found). It will also drop you in a shell where you can perform follow up activities. 105 | 2. To get more information on a given subchain you can simply run A.subchains() which will show you the chain using the following format (number_of_first_event_in_chain,number_of_last_event_in_chain,killchain_score). Where a score 1 means a perfect chain was identified and the closer it gets to 0 the weaker the chain is. 106 | 3. In order to access the suspicious events that caused this chain use ```A.aoi(export='activities.json')``` to export all identified subchains to a file, that you can then use for further analysis. 107 | 108 | 109 | ## 3. ALFA Load 110 | ## Load data from local storage 111 | ### From Local Storage 112 | Use ```A = Alfa.load([logname])``` to load and analyse logs from local storage Use ```A = Alfa.load('all')``` to load all logs. Alfa *filters* benign activities out, by default. To load all activities and events, unfiltered, use ```Alfa.load([logname], filter=False)```. 113 | 114 | 115 | ## Making Changes 116 | ### Adding new event mappings. 117 | It is possible to edit the config/event_to_mitre.yml file directly, but ill-advised. The layout of this file is unintuitive. Instead, consider making amendments to utils/mappings.yml. Then repopulate config/event_to_mitre.yml by running utils/event_mitre_remap.py 118 | 119 | ### Amending Kill Chain Discovery methods 120 | The kill chain discovery function utilizes hard-coded constants. These can be found in the config/config.yml. 121 | Forensic analysts are advised to review the values and make amendments for their workspace as necessary. 122 | These constants are responsible for both the kill chain statistic (kcs) and kill chain discovery (subchains). 123 | 124 | ## FAQ 125 | Want to know more about the statistics and algorithm used for ALFA, we wrote a blog post about it here(https://medium.com/@invictus-ir/automated-forensic-analysis-of-google-workspace-859ed50c5c92) 126 | 127 | ## Known Errors 128 | ### ValueError: missing config/credentials.json
129 | You have to add a credentials.json file to the project folder in the 'config' subdirectory. Instructions in the 'CREDENTIALS.md' file. 130 | 131 | ### OSError: [Errno 98] Address already in use 132 | This means that port 8089 is already in use by another application, this could happen if you have a webserver running on this port and also Splunk uses port 8089 by default. Solution is to (temporarily) stop that port from being used as it's required for the authentication flow that the port is available. 133 | 134 | ### ValueError: Authorized user info was not in the expected format, missing fields refresh_token. 135 | Sometimes the authorization info needs to be updated the easiest way to do this is removing the 'token.json' from the project_name/config folder. And then rerunning the command. If that still gives issues then remove token.json and credentials.json and reregister the OAuth application as described in 136 | ```CREDENTIALS.MD``` 137 | 138 | ### Access is blocked: This app's request is invalid // Error 400: redirect_uri_mismatch 139 | Make sure you have the trailing backslash (``/``) in the ``Authorized redirect URIs`` URI ``http://localhost:8089`` of your application's ``OAuth 2.0 Client IDs``. 140 | ![image](https://github.com/user-attachments/assets/22ef159c-9f94-43fa-804f-349a13227449) 141 | -------------------------------------------------------------------------------- /alfa/main/kill_chain.py: -------------------------------------------------------------------------------- 1 | #!/bin/python3 2 | 3 | from pandas import isna 4 | from ..config import config 5 | kc_conf = config['kill_chain'] 6 | 7 | 8 | class KillChain: 9 | ''' 10 | KillChain takes a list of events and assigns a value from chain_dict, based on the even't category. 11 | The KillChain Statistic is a value, from -1 to 1, that indicates how well the traditional kill-chain attack is followed. 12 | 13 | -1 indicates that the kill_chain was followed in reverse. 0 is totally random. 1 indicates a 100% follow-through. 14 | ''' 15 | 16 | reductive_methods = { 17 | 'mean': lambda x: sum(x)/len(x), 18 | 'min': min, 19 | 'max': max 20 | } 21 | 22 | chain_dict = { 23 | "persistence": 1, 24 | "privilege_escalation": 2, 25 | "defense_evasion": 3, 26 | "credential_access": 4, 27 | "discovery": 5, 28 | "lateral_movement": 6, 29 | "collection": 7 30 | } 31 | 32 | @staticmethod 33 | def generate_kill_chain_statistic(chain_index_list: list) -> float: 34 | ''' 35 | Input: list of attack_indexes 36 | Output: statistic 37 | 38 | sum -> 0, count-> 0 39 | For each index: 40 | if prev_index < index: 41 | sum += 1, count +=1 42 | if prev_index > index: 43 | sum -= 1, count += 1 44 | 45 | Output -> sum / count 46 | ''' 47 | result = 0 48 | chain_size = len(chain_index_list) 49 | total_unique_indexes = 0 50 | prev_value = chain_index_list[0] 51 | for i in range(1, chain_size): 52 | if isna(chain_index_list[i]): 53 | continue 54 | if chain_index_list[i] > prev_value: 55 | result += 1 56 | total_unique_indexes += 1 57 | prev_value = chain_index_list[i] 58 | 59 | elif chain_index_list[i] < prev_value: 60 | result -= 1 61 | total_unique_indexes += 1 62 | prev_value = chain_index_list[i] 63 | elif chain_index_list[i] == prev_value: 64 | result -= 1/len(chain_index_list) # decrement result slowly if previous value is the same as current value 65 | if total_unique_indexes == 0: 66 | return 0 67 | return result / total_unique_indexes 68 | 69 | @staticmethod 70 | def assign_index(category): 71 | return KillChain.chain_dict[category] 72 | 73 | @staticmethod 74 | def reduce_category_list(category_list): 75 | ''' 76 | Used to reduce attack indexes to a single value. It takes the categories (tactics) and maps them according to KillChain.chain_dict 77 | Then it uses the chosen reductive method (in config.yml), to reduce the value to a single number. Reductive methods available: 78 | mean, min, & max. 79 | ''' 80 | category_as_indexes = [ 81 | KillChain.assign_index(c) for c in category_list] 82 | return KillChain.reductive_methods[kc_conf['index_reducer']](category_as_indexes) 83 | 84 | @staticmethod 85 | def __discern_single_subchain(chain_index_list: list, start_index: int, min_length: int, min_stat: float, max_slack_width: int=kc_conf['max_slack_width'], max_slack_depth: int= kc_conf['max_slack_depth']) -> list: 86 | ''' 87 | Output: [start_index, end_index, statistic] 88 | 89 | growing phase: Not shrinking phase 90 | while True 91 | growing phase: 92 | if better (or equal) stats: 93 | set candidate 94 | grow (end_index += 1) 95 | repeat 96 | else: 97 | set phase to shrinking 98 | 99 | if shrinking: 100 | shrink (start_index += 1) 101 | if better stats: 102 | set candidate 103 | if too small: 104 | end 105 | if run out of tries (max_shrink_no_change): 106 | end 107 | repeat 108 | 109 | check statistic for slice between start_index, end_index 110 | if slice better than prev_slice, set it as candidate 111 | if growing: 112 | increase end_index by 1 113 | if shrinking: 114 | increase start_index by 1 115 | 116 | start by growing, and change to shrinking when: 117 | end_index is length of chain 118 | stat no longer increases 119 | 120 | End loop when: 121 | in shrinking phase and: 122 | stat no longer increasing 123 | length of slice is min_length 124 | ''' 125 | end_index = min(start_index + min_length, len(chain_index_list)) 126 | 127 | prev_stat = 0 128 | candidate = None 129 | 130 | shrinking_phase = False 131 | max_shrink_no_change = min_length 132 | shrink_amount = 0 133 | slack_width = 0 134 | 135 | while True: 136 | if shrinking_phase: 137 | shrink_amount += 1 138 | SI = start_index + shrink_amount 139 | subchain = chain_index_list[SI:end_index] 140 | stat = KillChain.generate_kill_chain_statistic(subchain) 141 | 142 | if stat > min_stat: 143 | if not shrinking_phase: # First phase "Growing" phase 144 | SI = start_index 145 | # Greedy. Greater than OR equal to. Try and grow as much as possible. 146 | if stat >= prev_stat: 147 | candidate = [SI, end_index, stat] 148 | prev_stat = stat 149 | end_index += 1 150 | slack_width = 0 151 | if end_index >= len(chain_index_list): 152 | shrinking_phase = True 153 | end_index = len(chain_index_list) 154 | continue 155 | elif slack_width < max_slack_width: 156 | final_index = chain_index_list[end_index-1] 157 | index_before_slack = chain_index_list[end_index - (2+slack_width)] 158 | if index_before_slack - final_index < max_slack_depth: 159 | slack_width += 1 160 | else: 161 | shrinking_phase = True 162 | slack_width = 0 163 | end_index -= 1 164 | else: 165 | shrinking_phase = True 166 | slack_width = 0 167 | end_index -= 1+slack_width 168 | continue 169 | 170 | elif shrinking_phase: # Second phase 171 | if stat > prev_stat: # Lazy. Shrink as little as possible. Shrink ONLY IF GREATER THAN 172 | candidate = [SI, end_index, stat] 173 | prev_stat = stat 174 | if (end_index - start_index <= min_length): # shrunk too small 175 | break 176 | elif shrink_amount < max_shrink_no_change: # still opportunity to shrink 177 | continue 178 | break 179 | return candidate 180 | 181 | 182 | @staticmethod 183 | def join_close_subchains(subchain_list: list, min_chain_length: int = kc_conf['min_chain_length']) -> list: 184 | subchain_list.sort(key=lambda x: x[0]) 185 | i = len(subchain_list) - 1 186 | new_chains = [] 187 | change_count = 0 188 | while i >= 0: 189 | if i == 0: 190 | new_chains.insert(0, subchain_list[0][:2]) 191 | break 192 | 193 | curr_chain = subchain_list[i] 194 | prev_chain = subchain_list[i-1] 195 | if curr_chain[0] - prev_chain[1] < min_chain_length: 196 | new_chains.insert(0,[prev_chain[0], curr_chain[1]]) 197 | i -= 2 198 | change_count += 1 199 | else: 200 | new_chains.insert(0,curr_chain[:2]) 201 | i -= 1 202 | return new_chains, change_count 203 | 204 | @staticmethod 205 | def join_subchains_loop(chain_index_list: list, min_chain_length: int = kc_conf['min_chain_length']) -> list: 206 | jsc, count = KillChain.join_close_subchains( 207 | chain_index_list, min_chain_length) 208 | while count: 209 | jsc, count = KillChain.join_close_subchains(jsc, min_chain_length) 210 | return jsc 211 | 212 | @staticmethod 213 | def discern_subchains(chain_index_list: list, min_length: int = None, min_stat: int = None) -> list: 214 | ''' 215 | Takes in a list of attack_indexes, outputs subchains within it. Output in the form -> [ [start_index, end_index, statistic], ...] 216 | Discover subchains within a series. Uses the configs in the config.yaml file if not defined: 217 | - min_chain_length 218 | - min_chain_statistic 219 | ''' 220 | if min_length == None: 221 | min_length = kc_conf['min_chain_length'] 222 | if min_stat == None: 223 | min_stat = kc_conf['min_chain_statistic'] 224 | 225 | chain_index_list = list(chain_index_list) 226 | subchains = [] 227 | start_index = 0 228 | while start_index < (len(chain_index_list) - min_length): 229 | candidate = KillChain.__discern_single_subchain( 230 | chain_index_list, start_index, min_length, min_stat) 231 | if candidate: 232 | subchains.append(candidate) 233 | start_index = candidate[1] # end_index 234 | else: 235 | start_index += 1 236 | return subchains 237 | -------------------------------------------------------------------------------- /alfa/main/collector.py: -------------------------------------------------------------------------------- 1 | #!/bin/python3 2 | 3 | """ 4 | https://developers.google.com/admin-sdk/reports/reference/rest/v1/activities/list 5 | 6 | dates: https://www.ietf.org/rfc/rfc3339.txt 7 | 8 | """ 9 | import json 10 | from json.decoder import JSONDecodeError 11 | import os 12 | import os.path 13 | from datetime import datetime 14 | 15 | import pandas as pd 16 | from google.auth.transport.requests import Request 17 | from google.oauth2.credentials import Credentials 18 | from google_auth_oauthlib.flow import InstalledAppFlow 19 | from googleapiclient.discovery import Resource, build 20 | 21 | from ..config import config 22 | from ..config.__internals__ import internals 23 | from ..utils.path import * 24 | 25 | PORT = 8089 26 | 27 | creds_path = os.path.join( 28 | internals["project"]["dirs"]["configs"], internals["project"]["files"]["creds"] 29 | ) 30 | token_path = os.path.join( 31 | internals["project"]["dirs"]["configs"], internals["project"]["files"]["token"] 32 | ) 33 | 34 | DATA_DIR = internals["project"]["dirs"]["data"] 35 | 36 | creds_instructions = """ 37 | === Missing "config/credentials.json" === 38 | 1. Go to https://console.developers.google.com/cloud-resource-manager 39 | 2. Create a project 40 | 3. Go to https://console.developers.google.com/apis/dashboard and choose "Credentials", then "Oauth Client ID" 41 | 4. Select "web application" as application type 42 | 5. copy the resulting credentials to config/credentials.json 43 | """ 44 | 45 | 46 | class Collector: 47 | """ 48 | Begins authentication flow at init. 49 | 50 | the .query method collects logs. logs are given 51 | params: 52 | logtype: either as a string (single log, e.g. "admin"), a list ["admin", "drive"], or the string "all" to collect all logs 53 | user: str='all' | userId or email of user. 'all' => all users 54 | max_results: int=1000 | max results per page 55 | max_pages: int = None | max number of pages (requests per log) 56 | start_time: str=None | rfc3339 date string. must be less than end time 57 | end_time: str=None | rfc3339 date string. must be greater than start time 58 | """ 59 | 60 | SCOPES = config["scopes"] 61 | 62 | def __init__(self) -> None: 63 | self.api_ready = False 64 | pass 65 | 66 | def __init_api_creds(self): 67 | """ 68 | should be called before interacting with api 69 | """ 70 | self.creds = self.get_credentials() 71 | self.service = self.connect_api() 72 | self.request_count = 0 73 | self.api_ready = True 74 | pass 75 | 76 | def __create_path(self, path: str): 77 | """create path if non-existent""" 78 | full_path = rel_path(path) 79 | if os.path.exists(full_path): 80 | return full_path 81 | os.makedirs(full_path) 82 | return full_path 83 | 84 | def get_credentials(self): 85 | creds = False 86 | if os.path.exists(token_path): 87 | creds = Credentials.from_authorized_user_file(token_path) 88 | 89 | if not creds or not creds.valid: 90 | if not os.path.exists(creds_path): 91 | print(creds_instructions) 92 | raise ValueError("missing config/credentials.json") 93 | if creds and creds.refresh_token: 94 | creds.refresh(Request()) 95 | else: 96 | flow = InstalledAppFlow.from_client_secrets_file( 97 | creds_path, self.SCOPES 98 | ) 99 | creds = flow.run_local_server(port=PORT) 100 | with open(token_path, "w") as token: 101 | token.write(creds.to_json()) 102 | return creds 103 | 104 | def connect_api(self): 105 | service = build("admin", "reports_v1", credentials=self.creds) 106 | return service 107 | 108 | def query_one( 109 | self, 110 | save_path: str, 111 | save, 112 | logtype: str, 113 | user: str = "all", 114 | max_results: int = 1000, 115 | max_pages: int = None, 116 | start_time: str = None, 117 | end_time: str = None, 118 | **kwargs, 119 | ) -> list: 120 | """ 121 | used by the .query method 122 | collects activities from a single logtype, and returns them as a list 123 | """ 124 | activities = self.service.activities() 125 | req = activities.list( 126 | userKey=user, 127 | applicationName=logtype, 128 | maxResults=max_results, 129 | startTime=start_time, 130 | endTime=end_time, 131 | ) 132 | page_index = 0 133 | result = 0 134 | 135 | while req is not None: 136 | if max_pages and page_index > max_pages: 137 | break 138 | 139 | self.request_count += 1 140 | resp = req.execute() 141 | my_activities = resp.get("items", []) 142 | result += len(my_activities) 143 | 144 | if my_activities: # Only open file if there is data 145 | if not (save_path[0] == "/" or save_path.startswith("./")): 146 | save_path = "./" + save_path 147 | if not save_path.endswith("/"): 148 | save_path = save_path + "/" 149 | full_path = self.__create_path(save_path) 150 | with open(rel_path(save_path, logtype + ".json"), "a") as f: 151 | for activity in my_activities: 152 | f.write(json.dumps(activity) + "\n") 153 | 154 | req = activities.list_next(req, resp) 155 | page_index += 1 156 | return result 157 | 158 | def query( 159 | self, 160 | logtype: str, 161 | user: str = "all", 162 | max_results: int = 1000, 163 | max_pages: int = None, 164 | start_time: str = None, 165 | end_time: str = None, 166 | save=False, 167 | nd=False, 168 | path=None, 169 | return_as_df=True, 170 | **kwargs, 171 | ) -> list: 172 | """ 173 | Queries the API directly. Returns a DataFrame of all log files. 174 | Args: 175 | logtype: 'all' or a logtype such as 'admin' or 'login'. 176 | user: 'all' (default) or a userId or user email address 177 | max_results: maximum results per page (default 1000, max) 178 | max_pages: max number of pages (default: None, as many pages as available) 179 | start_time: in rfc3339 format 180 | end_time: in rfc3339 format 181 | save: should this query be saved directly to storage 182 | path: directory to save under 183 | """ 184 | if not self.api_ready: # first initialize the api 185 | self.__init_api_creds() 186 | 187 | if logtype == "all": 188 | logtype = config["logs"] # all logs 189 | elif type(logtype) == str: 190 | logtype = [logtype] # convert to list of len 1 191 | 192 | results = {"activities": dict()} 193 | total_activity_count = 0 194 | save_path = self.__default_path_name() 195 | if path: 196 | save_path = path 197 | for typ in logtype: 198 | res = self.query_one( 199 | save_path, save, typ, user, max_results, max_pages, start_time, end_time 200 | ) 201 | total_activity_count += res 202 | print(f"{typ:>25}:", f"{res:>6}", "activities") 203 | 204 | print("\n", total_activity_count, "activities saved to:", save_path) 205 | 206 | if return_as_df: 207 | return self.load_all(f"{save_path}") 208 | return results 209 | 210 | def compute_df(self, activities_json: dict) -> pd.DataFrame: 211 | return pd.json_normalize(activities_json) 212 | 213 | def get_activities_df(self, data: dict) -> pd.DataFrame: 214 | """ 215 | expects a json file in the form: {query: {...}, activities: [...] | {...} 216 | extracts the activities, normalizes, and returns as a dataframe. 217 | if activities is a dict, it assumes that the dict is in the form {'logtype': [...activities...]}, and 218 | appends a new column, 'logtype' to the dataframe 219 | """ 220 | activities = data["activities"] 221 | if type(activities) == list: 222 | return pd.json_normalize(activities) 223 | if type(activities) == dict: 224 | prev_df = pd.DataFrame() 225 | for key in activities: 226 | df = pd.json_normalize(activities[key]) 227 | df["logtype"] = key 228 | prev_df = pd.concat([prev_df, df], ignore_index=True) 229 | return prev_df 230 | return None 231 | 232 | def __default_path_name(self): 233 | """the default naming convention for paths. This is produced as a datetime string corresponding approx. to when the query was initiated""" 234 | return os.path.join(DATA_DIR, datetime.utcnow().strftime("%y%m%d.%H%M%S")) 235 | 236 | def save(self, data: dict, save_path: str, filename: str, nd: bool): 237 | """saves the raw JSON along with metadata""" 238 | if not (save_path[0] == "/" or save_path.startswith("./")): 239 | save_path = "./" + save_path 240 | if not save_path.endswith("/"): 241 | save_path = save_path + "/" 242 | full_path = self.__create_path(save_path) 243 | 244 | if "activities" in data and type(data["activities"]) == list and nd: 245 | with open(rel_path(full_path, filename), "w") as f: 246 | for record in data["activities"]: 247 | f.write(json.dumps(record) + "\n") 248 | else: 249 | with open(rel_path(full_path, filename), "w") as f: 250 | json.dump(data, f) 251 | return data 252 | 253 | def load(self, json_file: str, as_activities_df: bool = True): 254 | """ 255 | loads a dataset from a json file. Expects to be normal JSON. 256 | If it encounters a JSONDecodeError, assumes it is in NDJSON format (newline delimited) 257 | and attempts to access each record separately. 258 | """ 259 | with open(json_file) as f: 260 | try: 261 | data = json.load(f) 262 | except JSONDecodeError: 263 | f.seek(0) 264 | activities = [] 265 | for line in f: 266 | activities.append(json.loads(line)) 267 | data = {"activities": activities} 268 | if as_activities_df: 269 | return self.get_activities_df(data) 270 | return data 271 | 272 | def load_all(self, data_folder: str, as_activities_df: bool = True): 273 | all_files = os.listdir(data_folder) 274 | all_files = [os.path.join(data_folder, x) for x in all_files] 275 | only_json = filter( 276 | lambda x: os.path.isfile(x) and x.endswith(".json"), all_files 277 | ) 278 | result = {"activities": {}} 279 | for f in only_json: 280 | data = self.load(f, as_activities_df=False) 281 | logtype = os.path.basename(f).split(".json")[0] 282 | activities = data["activities"] 283 | result["activities"][logtype] = activities 284 | if as_activities_df: 285 | return self.get_activities_df(result) 286 | return result 287 | -------------------------------------------------------------------------------- /tutorial/tutorial.md: -------------------------------------------------------------------------------- 1 | # ALFA Tutorial 2 | 3 | ## Setup 4 | Having installed ALFA (see the [README](../README.md)), begin by initializing a new project directory. 5 | 6 | ```alfa init tutorial``` 7 | 8 | You should now have a new directory called "tutorial". ```cd`` into it. 9 | Inside, you'll find a structure similar to the following: 10 | ``` 11 | . 12 | ├── config 13 | │   └── config.yml 14 | └── data 15 | ``` 16 | 17 | Ordinarily, you would place a ```credentials.json``` file into the "config" directory. This won't be necessary for the tutorial. 18 | Instead, a pre-made dataset will be used. 19 | ```cd``` into the "data" directory, and 20 | clone the sample dataset 21 | [here](https://github.com/invictus-ir/gws_dataset.git). 22 | ``` 23 | git clone https://github.com/invictus-ir/gws_dataset.git 24 | ``` 25 | 26 | Your directory structure should now look like: 27 | ``` 28 | . 29 | ├── config 30 | │   └── config.yml 31 | └── data 32 | └── gws_dataset 33 | ├── LICENSE 34 | ├── README.md 35 | ├── admin.json 36 | ├── calendar.json 37 | ├── drive.json 38 | ├── groups_enterprise.json 39 | ├── login.json 40 | ├── token.json 41 | └── user_accounts.json 42 | ``` 43 | 44 | You are now ready to run ALFA against the dataset. 45 | 46 | ## Running ALFA 47 | ```cd``` back into the root of your project folder. 48 | Load the entire gws_dataset directory using the following command: 49 | 50 | ``` 51 | alfa load -p data/gws_dataset 52 | ``` 53 | ALFA will automatically load every json file in the directory into its dataset. 54 | 55 | You have dropped into a Python shell with access to an ALFA object, variable ```A```. 56 | ```A``` has 2 important attributes: ```A.events```, and ```A.activities```. 57 | 58 | These are datasets that represent the events and activities present in the logs. 59 | In Google Audit Logs, every action is represented by an "activity". Each "activity" contains a list of "events". These events are essentially building blocks for activities. When loaded, ALFA will automatically analyze the events and classify specific events in accordance with the "[MITRE ATT&CK Cloud Matrix Framework](https://attack.mitre.org/matrices/enterprise/cloud/)". 60 | 61 | Events and activities have a lot of data. Let's explore the dataset to garner an understanding for the dataset. 62 | 63 | ``` 64 | A.events.shape # (rows, columns) 65 | A.activities.shape # (rows, columns) 66 | ``` 67 | We have 9825 events and 9789 activities. Too many to list off. 68 | Let's select a random sample of 10 from each, and produce a summary, to get an idea of what kind of data we're looking at. 69 | Starting with events: 70 | ``` 71 | summary(A.events.sample(10)) 72 | 73 | name activity_time activity_id 74 | ---- --------- -------------------------------- -------------------- 75 | 6417 authorize 2022-07-19 12:21:35.002000+00:00 -6241941505348084839 76 | 6295 authorize 2022-07-19 12:18:15.883000+00:00 4289095237957192984 77 | 6257 authorize 2022-07-19 12:17:15.685000+00:00 -3533409653085737212 78 | 5378 authorize 2022-07-19 10:51:12.786000+00:00 -3453280989539057832 79 | 6786 authorize 2022-07-19 12:31:54.947000+00:00 4287956763103796901 80 | 8773 authorize 2022-07-19 13:53:10.243000+00:00 4880599880504672506 81 | 7812 authorize 2022-07-19 13:00:25.098000+00:00 -872164451340203101 82 | 2310 authorize 2022-07-19 09:10:31.579000+00:00 7105495284863502655 83 | 3080 authorize 2022-07-19 09:31:51.872000+00:00 4333762167492228608 84 | 3902 authorize 2022-07-19 10:09:28.796000+00:00 -4366756655797941829 85 | ``` 86 | 87 | and moving onto activities: 88 | ``` 89 | summary(A.activities.sample(10)) 90 | 91 | id.time kind actor.email id.applicationName 92 | -------------------- ------------------------ ---------------------- ------------------------ -------------------- 93 | -6010887833366957832 2022-07-19T08:00:47.766Z admin#reports#activity admin@cloud-response.com token 94 | 7764407026099878510 2022-07-19T08:53:11.775Z admin#reports#activity admin@cloud-response.com token 95 | 2407139138637242658 2022-07-19T08:26:52.110Z admin#reports#activity admin@cloud-response.com token 96 | -6978932130952386443 2022-07-19T09:12:01.777Z admin#reports#activity admin@cloud-response.com token 97 | 2956946567129110438 2022-07-19T10:35:40.534Z admin#reports#activity admin@cloud-response.com token 98 | -5470615884955105544 2022-07-19T14:02:30.155Z admin#reports#activity admin@cloud-response.com token 99 | 4429007405214477146 2022-07-19T10:22:22.940Z admin#reports#activity admin@cloud-response.com token 100 | 3378976036216458085 2022-07-19T08:30:11.927Z admin#reports#activity admin@cloud-response.com token 101 | 6860468462391716631 2022-08-02T13:41:54.960Z admin#reports#activity admin@cloud-response.com token 102 | 1895767229440860272 2022-07-19T10:48:12.979Z admin#reports#activity admin@cloud-response.com token 103 | ``` 104 | 105 | From the summary note the following: 106 | - Each event has a name, and belongs to an activity 107 | - Each activity has a kind. Some have emails belonging to the user that initiated that activity. 108 | 109 | Suppose you wanted to take a deeper look at the events belonging to one of these activities. 110 | We'll select the activity_id of first activity, "-6010887833366957832" for this example. 111 | 112 | ``` 113 | A.activities.loc['-6010887833366957832'].events 114 | 115 | [{'name': 'authorize', 116 | 'parameters': [{'name': 'client_id', 'value': '106850843410684334493'}, 117 | {'name': 'app_name', 'value': '106850843410684334493'}, 118 | {'name': 'client_type', 'value': 'WEB'}, 119 | {'name': 'scope_data', 120 | 'multiMessageValue': [{'parameter': [{'name': 'scope_name', 121 | 'value': 'https://www.googleapis.com/auth/admin.reports.audit.readonly'}, 122 | {'name': 'product_bucket', 'multiValue': ['GSUITE_ADMIN']}]}]}, 123 | {'name': 'scope', 124 | 'multiValue': ['https://www.googleapis.com/auth/admin.reports.audit.readonly']}], 125 | 'attack.label': ['application_access_token.use_alternate_authentication_material.defense_evasion', 126 | 'steal_application_access_token.credential_access'], 127 | 'attack.category': ['defense_evasion', 'credential_access'], 128 | 'attack.index': 4, 129 | 'activity_id': '-6010887833366957832', 130 | 'activity_time': Timestamp('2022-07-19 08:00:47.766000+0000', tz='UTC')}] 131 | 132 | ``` 133 | 134 | Reading through the mess of data, we see it's an authorize event (like the events in our sample). Aside from the data that Google provides, the events are also marked with an "attack.label", "attack.category" and "attack.index". 135 | 136 | These columns are added by ALFA during analysis. "attack.label" contains a list of the *full* MITRE ATT&CK path. "attack.category" is the last portion of the path, and the index is a value that corresponds to the label. The higher the "attack.index", the further along the event is in the MITRE ATT&CK Cloud Matrix Framework. This is useful for calculating "Kill Chains", as will be explored in the following section. 137 | 138 | ## Kill Chain Analysis 139 | Every ALFA object (```A```) is a collection of activities and events. The events can be analysed to assess how closely they fit a "kill chain", using the "kill chain statistic". 140 | The "kill chain statistic", or kcs, is defined as the "tendency for a set of chronologically ordered events to escalate up the MITRE ATT&CK Cloud Matrix Framework". In other words, "how well does my dataset fit a kill chain?". It is a floating point score between -1 and 1. 1 indicates a perfect kill chain, -1 indicates moving in the complete opposite direction. A score close to 0 indicates undirected events (no pattern). 141 | 142 | Let's grab the kcs for the entire dataset: 143 | ``` 144 | A.kcs() 145 | 146 | 0.09208973121583104 147 | ``` 148 | 149 | We have a score just shy of 0.1. This low score is to be expected for the entire dataset. However, there may exist kill chains _within_ the dataset. To discern these, ALFA has a "subchains" method. Let's find some subchains. 150 | 151 | ``` 152 | summary( A.subchains() ) 153 | 154 | ---- ---- -------- 155 | 9428 9435 0.928571 156 | 9680 9687 0.857143 157 | 12 19 0.714286 158 | 26 33 0.714286 159 | 33 40 0.714286 160 | 151 158 0.714286 161 | 9366 9373 0.714286 162 | 9814 9821 0.714286 163 | 76 83 0.714286 164 | 9500 9507 0.714286 165 | ---- ---- -------- 166 | 167 | ``` 168 | 169 | Here we have a list of kill chains, each with 3 values. The first value is the start index of the kill chain. This is index of the first event in the kill chain. e.g 170 | ```A.events.loc[9428]```. The second value is the end_index of the kill chain. Lastly, there is the kcs for the given kill chain. Note that the subchains are ordered by highest-kcs first. 171 | 172 | 0.93 is a very high score! Let's take a closer look at those events: 173 | ``` 174 | summary(A.events[9428:9435]) 175 | 176 | name activity_time activity_id 177 | ---- ------------------ -------------------------------- -------------------- 178 | 9428 change_user_access 2022-08-02 07:12:37.638000+00:00 -8593694044162584673 179 | 9429 create 2022-08-02 07:12:37.638000+00:00 -8593694044162584673 180 | 9430 change_acl_editors 2022-08-02 07:12:37.638000+00:00 -8593694044162584673 181 | 9431 add_to_folder 2022-08-02 07:12:37.638000+00:00 -8593694044162584673 182 | 9432 login_verification 2022-08-02 07:13:37.996000+00:00 258855114937 183 | 9433 login_success 2022-08-02 07:13:37.996000+00:00 258855114937 184 | 9434 download 2022-08-02 07:13:57.079000+00:00 3276112931527544503 185 | ``` 186 | 187 | The event names can help display an overview of what occurred in this moment. By displaying the activity_time, they can also help direct you at particular points in the log which may be of interest. 188 | 189 | Perhaps looking at the activities these events belong to is helpful. Note that there are only 3 activities associated with these 7 events. 190 | 191 | ``` 192 | summary( A.events[9428:9435].activities() ) 193 | id.time kind actor.email id.applicationName 194 | -------------------- ------------------------ ---------------------- ---------------------------- -------------------- 195 | -8593694044162584673 2022-08-02T07:12:37.638Z admin#reports#activity workspace@cloud-response.com drive 196 | 258855114937 2022-08-02T09:32:37.406Z admin#reports#activity workspace@cloud-response.com login 197 | 3276112931527544503 2022-08-02T07:13:57.079Z admin#reports#activity workspace@cloud-response.com drive 198 | 199 | ``` 200 | 201 | Here we can see which account is associated with the behavior, and where it originated from. 202 | 203 | ## Activities of Interest 204 | 205 | As mentioned above, finding interesting activities can aid the discovery of interesting portions of the dataset. 206 | To automate this, one can utilise "activites of interest" (aoi) method: 207 | 208 | ``` 209 | In [26]: summary( A.aoi() ) 210 | id.time kind actor.email id.applicationName 211 | -------------------- ------------------------ ---------------------- ----------------------------- -------------------- 212 | -8637423948085216889 2022-03-14T18:07:54.887Z admin#reports#activity admin@cloud-response.com admin 213 | 768087181562 2022-03-19T15:24:52.241Z admin#reports#activity greg@cloud-response.com login 214 | -7684398170435703864 2022-03-14T20:36:48.966Z admin#reports#activity greg@cloud-response.com calendar 215 | -5686198897511485377 2022-03-19T19:38:47.642Z admin#reports#activity admin@cloud-response.com groups_enterprise 216 | 4000677510509368906 2022-03-19T21:13:29.295Z admin#reports#activity greg@cloud-response.com token 217 | -4185571506150141986 2022-03-19T21:31:14.993Z admin#reports#activity greg@cloud-response.com token 218 | 8275857749769031410 2022-03-19T21:35:04.656Z admin#reports#activity greg@cloud-response.com token 219 | -4582372916506076442 2022-03-19T22:17:46.663Z admin#reports#activity admin@cloud-response.com token 220 | 722534617001 2022-08-15T16:24:08.797Z admin#reports#activity admin@cloud-response.com login 221 | ... 222 | ``` 223 | 224 | The ```aoi``` method will return a list of all activities whose events appeared in a subchain. As such, it's a quick shortcut for finding interesting sections of the logs. 225 | 226 | These activities can be exported to a json file, to be fed into a tool of your choosing: 227 | ``` 228 | A.aoi(export='wow.json') 229 | ``` 230 | 231 | 232 | --------------------------------------------------------------------------------