├── alfa
├── __init__.py
├── main
│ ├── __init__.py
│ ├── activity.py
│ ├── event.py
│ ├── analyser.py
│ ├── alfa.py
│ ├── kill_chain.py
│ └── collector.py
├── utils
│ ├── UTILS.md
│ ├── path.py
│ ├── summary.py
│ ├── event_mitre_remap.py
│ └── mappings.yml
├── config
│ ├── internals.yml
│ ├── __internals__.py
│ ├── __init__.py
│ ├── logo
│ ├── config.yml
│ └── event_to_mitre.yml
├── __main__.py
├── project_creator
│ └── __init__.py
└── cmdline.py
├── .gitignore
├── .github
└── dependabot.yaml
├── setup.py
├── requirements.txt
├── LICENSE
├── License.md
├── CREDENTIALS.md
├── README.md
└── tutorial
└── tutorial.md
/alfa/__init__.py:
--------------------------------------------------------------------------------
1 | from .main import Alfa
2 | __version__ = '0.1.0'
3 |
--------------------------------------------------------------------------------
/alfa/main/__init__.py:
--------------------------------------------------------------------------------
1 | from .alfa import Alfa, Analyser, KillChain
2 | from .collector import Collector
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .venv/*
2 | .vscode/*
3 | test/*
4 | __pycache__
5 | *.json
6 | ALFA.egg-info/
7 | **/config.yml
--------------------------------------------------------------------------------
/alfa/utils/UTILS.md:
--------------------------------------------------------------------------------
1 | Scripts that help perform specific functions. Not necessarily part of the audit process.
2 | Use event_mitre_remap.py to add new mappins to config/event_to_mitre.yml
--------------------------------------------------------------------------------
/.github/dependabot.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "pip"
4 | directory: "/"
5 | schedule:
6 | interval: "weekly"
7 | day: "monday"
8 | time: "09:00"
--------------------------------------------------------------------------------
/alfa/config/internals.yml:
--------------------------------------------------------------------------------
1 | # Used internally for storing constants
2 |
3 | project:
4 | dirs:
5 | configs: config
6 | data: data
7 | files:
8 | config: config.yml
9 | creds: credentials.json
10 | token: token.json
11 |
--------------------------------------------------------------------------------
/alfa/config/__internals__.py:
--------------------------------------------------------------------------------
1 | #!/bin/env/python3
2 |
3 | import yaml
4 | from types import SimpleNamespace
5 | import os.path
6 | from ..utils.path import CONFIG_DIR
7 |
8 | with open(os.path.join(CONFIG_DIR, 'internals.yml')) as f:
9 | internals_dir = yaml.safe_load(f)
10 |
11 | internals = internals_dir
12 |
--------------------------------------------------------------------------------
/alfa/config/__init__.py:
--------------------------------------------------------------------------------
1 | from ..utils.path import rel_path, CONFIG_DIR
2 | import yaml, os.path
3 |
4 | relative_config = './config/config.yml' # used when inside of a project directory
5 | if os.path.exists(relative_config):
6 | config = yaml.safe_load(
7 | open(relative_config))
8 | else:
9 | config = yaml.safe_load(
10 | open(rel_path(CONFIG_DIR,'config.yml'))
11 | )
12 |
--------------------------------------------------------------------------------
/alfa/__main__.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from .cmdline import Parser
3 | from .config import CONFIG_DIR
4 |
5 | pd.set_option('display.max_colwidth', None)
6 |
7 | with open(f'{CONFIG_DIR}/logo') as f:
8 | logo = f.read()
9 |
10 | def main():
11 | print(logo)
12 | parser = Parser()
13 | args = parser.parser.parse_args()
14 | args.func(args)
15 | pass
16 |
17 | if __name__ == '__main__':
18 | main()
19 |
--------------------------------------------------------------------------------
/alfa/utils/path.py:
--------------------------------------------------------------------------------
1 | #!/bin/python3
2 | '''
3 | helper script to aid in relative imports
4 | '''
5 | import os.path
6 |
7 | def rel_path(*args: str) -> str:
8 | if len(args) == 1: #typically __file__
9 | return os.path.dirname(args[0])
10 | return os.path.realpath(
11 | os.path.join(*args)
12 | )
13 |
14 | UTILS_DIR = rel_path(__file__)
15 | ROOT_DIR = rel_path(UTILS_DIR,'..')
16 | CONFIG_DIR = rel_path(ROOT_DIR,'config')
17 | DATA_DIR = rel_path(ROOT_DIR,'data')
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/bin/env python3
2 | VERSION = '0.1.0'
3 | from setuptools import setup
4 | import os.path
5 | mydir = os.path.dirname(__file__)
6 | req_path = os.path.join(mydir,'requirements.txt')
7 | with open(req_path) as f:
8 | requirements = f.readlines()
9 | setup(
10 | name='ALFA',
11 | description='',
12 | author='',
13 | packages=['alfa'],
14 | install_requires=requirements,
15 | entry_points={
16 | 'console_scripts': [
17 | 'alfa = alfa.__main__:main'
18 | ]
19 | },
20 | version=VERSION
21 | )
22 |
--------------------------------------------------------------------------------
/alfa/config/logo:
--------------------------------------------------------------------------------
1 |
2 | _ ______
3 | /\ | | | ____| /\
4 | / \ | | | |__ / \
5 | / /\ \ | | | __| / /\ \
6 | / ____ \ | |____ | | / ____ \
7 | /_/ \_\ |______| |_| /_/ \_\
8 |
9 |
10 |
11 | Google Workspace Audit Log Forensic Analysis
12 | Copyright (c) 2025 Invictus Incident Response
13 | Original authors (Greg Charitonos & @BertJanCyber) maintained by Invictus Incident Response
14 |
15 |
--------------------------------------------------------------------------------
/alfa/utils/summary.py:
--------------------------------------------------------------------------------
1 | from tabulate import tabulate
2 | from ..main.event import Events
3 | from ..main.activity import Activities
4 |
5 | event_columns = ['name','activity_time','activity_id']
6 |
7 | activity_columns = ['id.time','kind','actor.email','id.applicationName']
8 |
9 | def summary(data):
10 | '''
11 | wraps around tabulate
12 | prints a summary of data in a tabled format
13 | '''
14 | if type(data) == Events:
15 | print(tabulate(data[event_columns],headers=event_columns))
16 | elif type(data) == Activities:
17 | print(tabulate(data[activity_columns],headers=activity_columns))
18 | else:
19 | print(tabulate(data))
20 |
21 |
--------------------------------------------------------------------------------
/alfa/main/activity.py:
--------------------------------------------------------------------------------
1 | from pandas.core.series import Series
2 | from pandas.core.frame import DataFrame
3 |
4 |
5 | class Activity(Series):
6 | '''
7 | A dataframe containing activities
8 | '''
9 | @property
10 | def _constructor(self):
11 | return Activity
12 |
13 | @property
14 | def _constructor_expanddim(self):
15 | return Activities
16 |
17 | def __init__(self, *args, **kwargs):
18 | super().__init__(*args, **kwargs)
19 |
20 | @staticmethod
21 | def convert(series: Series) -> 'Activity':
22 | return Activity(series._data)
23 |
24 |
25 | class Activities(DataFrame):
26 | @property
27 | def _constructor(self):
28 | return Activities
29 |
30 | @property
31 | def _constructor_sliced(self):
32 | return Activity
33 |
34 | def __init__(self, *args, **kwargs) -> None:
35 | super().__init__(*args, **kwargs)
36 | if 'id.uniqueQualifier' in self.columns:
37 | self.set_index('id.uniqueQualifier',inplace=True)
38 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | asttokens==2.0.5
2 | backcall==0.2.0
3 | cachetools==4.2.4
4 | certifi==2024.7.4
5 | charset-normalizer==2.0.10
6 | decorator==5.1.1
7 | executing==0.8.3
8 | google-api-core==2.4.0
9 | google-api-python-client==2.35.0
10 | google-auth==2.3.3
11 | google-auth-httplib2==0.1.0
12 | google-auth-oauthlib==0.4.6
13 | googleapis-common-protos==1.54.0
14 | httplib2==0.20.2
15 | idna==3.7
16 | ipython==8.10.0
17 | jedi==0.18.1
18 | matplotlib-inline==0.1.3
19 | numpy>=1.26.0,<2.0.0
20 | oauthlib==3.2.2
21 | pandas==1.3.5
22 | parso==0.8.3
23 | pexpect==4.8.0
24 | pickleshare==0.7.5
25 | prompt-toolkit==3.0.29
26 | protobuf==4.25.8
27 | ptyprocess==0.7.0
28 | pure-eval==0.2.2
29 | pyasn1==0.4.8
30 | pyasn1-modules==0.2.8
31 | Pygments==2.15.0
32 | pyparsing==3.0.6
33 | python-dateutil==2.8.2
34 | pytz==2021.3
35 | PyYAML>=6.0.1
36 | requests==2.32.4
37 | requests-oauthlib==1.3.0
38 | rsa==4.8
39 | six==1.16.0
40 | stack-data==0.3.0
41 | tabulate==0.8.9
42 | traitlets==5.3.0
43 | uritemplate==4.1.1
44 | urllib3==2.6.0
45 | wcwidth==0.2.5
46 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Invictus Incident Response
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/License.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Invictus Incident Response
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/alfa/config/config.yml:
--------------------------------------------------------------------------------
1 | scopes:
2 | - https://www.googleapis.com/auth/admin.reports.audit.readonly
3 |
4 | logs:
5 | [
6 | "chrome",
7 | "admin",
8 | "access_transparency",
9 | "context_aware_access",
10 | "gplus",
11 | "data_studio",
12 | "mobile",
13 | "groups_enterprise",
14 | "calendar",
15 | "chat",
16 | "gcp",
17 | "drive",
18 | "groups",
19 | "keep",
20 | "meet",
21 | "jamboard",
22 | "login",
23 | "token",
24 | "rules",
25 | "saml",
26 | "user_accounts",
27 | ]
28 |
29 | activity_defaults:
30 | columns:
31 | [
32 | "kind",
33 | "etag",
34 | "ipAddress",
35 | "events",
36 | "id.time",
37 | "id.applicationName",
38 | "id.customerId",
39 | "actor.callerType",
40 | "actor.email",
41 | "actor.profileId",
42 | "actor.key",
43 | ]
44 | index:
45 | - id.uniqueQualifier
46 |
47 | kill_chain:
48 | min_chain_length: 7 # for kill chain discovery (subchains). A subchain only exists if its length >= min_chain_length
49 | min_chain_statistic: 0.6 # a subchain only exists if its kcs has a value >= to min_chain_statistic
50 | index_reducer: max # max / min / mean. Some events have multiple categories. This is the reducer used to assign a single value to the event.
51 | max_slack_width: 5 # how "long" the slack line can be
52 | max_slack_depth: 7 # how "low" the slack line can hang
53 |
--------------------------------------------------------------------------------
/alfa/utils/event_mitre_remap.py:
--------------------------------------------------------------------------------
1 | #!/bin/python3
2 |
3 | def event_to_mitre(root: str,_obj: object,event_dict: dict) -> dict:
4 |
5 | '''
6 | This is typically run as a standalone file, alongside mappings.yml.
7 | Takes the mitre mappings.yml file and inverts it,
8 | such that events map to attacks. event => attack.category. this is saved to config/event_to_mitre.yml
9 |
10 | event_to_mitre takes a root string (initialized as ''), an object to remap, and the event dictionary. It outputs the event dictionary, filled.
11 |
12 | if _obj is a list, then it must be a list of events
13 | for each event:
14 | if the event is not in the dictionary, initialize as empty list []
15 | append the root to the dictionary
16 |
17 | if _obj is NOT a list, then it must be a dictionary
18 | for each key in the dictionary, prepend the key to the root:
19 | new_root = key + '.' + old_root
20 | then perform recursion, calling event_to_mitre with
21 | new_root, _obj[key] as _obj and the event_dict
22 |
23 | This is a recursive operation.
24 | '''
25 |
26 | if type(_obj) == list:
27 | for event in _obj:
28 | if not event in event_dict:
29 | event_dict[event] = []
30 | event_dict[event].append(root[:-1])
31 | return event_dict
32 |
33 | for key in _obj:
34 | event_to_mitre(f'{key}.{root}',_obj[key],event_dict)
35 | return event_dict
36 |
37 |
38 | if __name__ == '__main__':
39 | import yaml
40 | from utils.path import *
41 |
42 | input_file = rel_path(UTILS_DIR,'mappings.yml')
43 | output_file = rel_path(CONFIG_DIR,'event_to_mitre.yml')
44 |
45 | mappings = yaml.safe_load(open(input_file))
46 | event_dict = dict()
47 | event_to_mitre('',mappings,event_dict)
48 |
49 | with open(output_file,'w') as f:
50 | yaml.safe_dump(event_dict,f)
51 |
52 | print('saved to', output_file)
--------------------------------------------------------------------------------
/alfa/project_creator/__init__.py:
--------------------------------------------------------------------------------
1 | #!/bin/env python3
2 | import os, os.path, shutil
3 | from ..utils.path import CONFIG_DIR
4 | from ..config.__internals__ import internals
5 |
6 | class Project:
7 |
8 | """Creates and sets up a project, utilized in the "alfa init
" command."""
9 |
10 | def __init__(self,path: str):
11 | """
12 | :path: root path of project. Can be relative. Can be "."
13 |
14 | """
15 | self._path = path
16 | abs_path = os.path.abspath(path)
17 | print('initializing project:',abs_path)
18 | self.__main__()
19 | print('complete')
20 | print('---')
21 | print('Please copy your credentials.json to config/credentials.json')
22 | pass
23 |
24 | def __check_can_overwrite(self,path: str):
25 | ''' if path exists and is not empty, ask user if overwrite. If it does not exist, it is created '''
26 | if not os.path.exists(path):
27 | os.mkdir(path)
28 | return True
29 |
30 | isempty = len(os.listdir(path))
31 | if isempty:
32 | return True
33 |
34 | print(os.path.abspath(path),'is not empty. Are you sure you want to overwrite?')
35 | choice = input('y/[n]: ')
36 | if len(choice) and choice[0].lower() == 'y':
37 | return True
38 | return False
39 |
40 | def create_folder_structure(self,conf_path: str, data_path: str):
41 | ''' creates the folder structure in the root directory '''
42 | safe_mk_root = self.__check_can_overwrite(self._path)
43 | if not safe_mk_root:
44 | return False, False, False
45 |
46 | safe_mk_conf = self.__check_can_overwrite(conf_path)
47 |
48 | safe_mk_data = self.__check_can_overwrite(data_path)
49 |
50 | return safe_mk_root, safe_mk_conf, safe_mk_data
51 |
52 | def copy_default_config(self,old_conf_path: str, new_conf_path: str):
53 | if os.path.exists(new_conf_path):
54 | return False
55 | shutil.copy(old_conf_path, new_conf_path)
56 | return True
57 |
58 | def __main__(self):
59 | root = self._path
60 | old_conf_path = os.path.join(CONFIG_DIR,'config.yml')
61 | conf_dir = os.path.join(root,internals['project']['dirs']['configs'])
62 | new_conf_path = os.path.join(conf_dir,internals['project']['files']['config'])
63 | data_path = os.path.join(root,internals['project']['dirs']['data'])
64 | ok_root, ok_conf, ok_data = self.create_folder_structure(conf_dir,data_path)
65 | if not all([ok_root, ok_conf, ok_data]):
66 | print('some files may have been overwritten')
67 | if os.path.exists(new_conf_path):
68 | print('config already exists, skipping copying default config')
69 | return True
70 | self.copy_default_config(old_conf_path,new_conf_path)
71 | return True
72 |
--------------------------------------------------------------------------------
/alfa/main/event.py:
--------------------------------------------------------------------------------
1 | from pandas.core.series import Series
2 | from pandas.core.frame import DataFrame
3 | from .activity import Activity
4 |
5 |
6 | class Events(DataFrame):
7 | '''
8 | Events is a dataframe containing events. It has a custom property: parent, which references its Mitre parent.
9 |
10 | Each Event *class* is dynamically generated from the current Events instance. This is because each instance of the class needs a reference
11 | to its parent (Events).
12 |
13 | Each event's Activity can be accessed through the .activity accessor. e.g. events.iloc[0].activity => Activity.
14 | This is done by calling the Mitre.activity_by_id method.
15 |
16 | When accessing an event's activity, the event passes the activity id up the chain, and then the mitre object passes it down:
17 |
18 | event /> Events /> Mitre \> activities \> activity
19 | '''
20 | @property
21 | def _constructor(self):
22 | return Events
23 |
24 | @property
25 | def _constructor_sliced(self):
26 | return EventConstructor(self)
27 |
28 | _metadata = ['parent']
29 |
30 | def __init__(self, *args, **kwargs) -> None:
31 | super().__init__(*args, **kwargs)
32 |
33 | def activity(self, uid: str) -> Activity:
34 | return self.parent.activity_by_id(uid)
35 |
36 | def activities(self):
37 | ids = self['activity_id'].unique()
38 | activities = self.parent.activities.loc[ids] # for some reason returns duplicate rows
39 | return activities[~activities.index.duplicated()]
40 |
41 |
42 | def get_event_slices(self, slices: list):
43 | '''
44 | slices: list of iterables with an internal shape of at least 2
45 | only the first 2 items in the internal shape are regarded.
46 | e.g [ [0,5], [7,22], ...]
47 | returns a list of slices from the events dataframe
48 | '''
49 | out = []
50 | for item in slices:
51 | assert len(item) > 1
52 | s = slice(item[0], item[1])
53 | out.append(self[s])
54 | return out
55 |
56 | def EventConstructor(parent=None):
57 | class Event(Series):
58 | @property
59 | def _constructor(self):
60 | return Event
61 |
62 | @property
63 | def _constructor_expanddim(self):
64 | return Events
65 | _metadata = ['name', 'parent', 'activity_id']
66 |
67 | def __init__(self, *args, **kwargs):
68 | super().__init__(*args, **kwargs)
69 | self.parent = parent
70 |
71 | @property
72 | def activity(self):
73 | return self.parent.activity(self['activity_id'])
74 |
75 | return Event
76 |
--------------------------------------------------------------------------------
/CREDENTIALS.md:
--------------------------------------------------------------------------------
1 | # Getting Your Credentials
2 | This step by step guide takes you through the process of obtaining your API credentials.
3 | You can also watch [our YouTube video](https://youtu.be/Vk9ySK4E1S4?si=v6YfxnmsLL8hro21) where we show you the whole process
4 |
5 | It is recommended that you register a new project for log collection, separate from other projects.
6 |
7 | ## Permissions
8 | First and foremost, the Admin SDK API used to collect the audit logs requires that the user has admin privileges. You can enable admin privileges through
9 | the following tutorial: https://support.google.com/a/answer/172176
10 |
11 | ## Creating an OAuth app
12 | 1. Go to https://console.cloud.google.com/cloud-resource-manager
13 | 2. Create a project, or use an existing project.
14 | 3. Go to https://console.developers.google.com/apis/dashboard
15 | - Make sure your new project is selected, near the top-left of the website, close to the "Google Cloud Platform" logo, select your project
16 | - In the sidebar, choose "Credentials", then "Oauth Client ID"
17 | 4. Select "Create Credentials", choose "Oauth client ID"
18 | 5. At this point you may be prompted to "configure consent screen". This is dealt with in the next section. If you do not receive this prompt,
19 | skip over to the section "Create OAuth ID"
20 |
21 | ## Configuring The Consent Screen (Optional)
22 | 1. Select "configure consent screen"
23 | 2. select the user type. If you do not know which to select, use the "Internal" type.
24 | 3. You will be prompted to fill in details about the "App"
25 | - give you app any unique name. It is recommended to use a descriptive name such as "Alfa Log Collecting"
26 | - fill out all the required fields. All other fields can be filled to your discretion.
27 |
28 | ### Adding Scopes (Optional)
29 | You will now be prompted to "add or remove scopes". This step is not necessary, as it will not affect the outcome script.
30 | However if you intend to grant access to third-parties, you may wish to disclose the scopes here.
31 |
32 | ## Create OAuth ID
33 | 1. Select "web application" as application type
34 | 2. Give the "web application" a descripting name, e.g."Alfa Log Collecting API"
35 | 3. Under "Authorised redirect URIs" add "http://localhost:8089/"
36 | 3. Click create. A popup will appear with your API credentials.
37 | 4. Download the JSON to ```config/credentials.json``` in your project's folder
38 |
39 | ## Enabling APIs
40 | You will need to enable access to the Admin SDK.
41 | 1. go to https://console.cloud.google.com/apis/library/admin.googleapis.com
42 | 2. enable the SDK
43 |
44 | ## Afterword
45 | Your credentials are now ready for use. When the first query is run, a browser window will open and you will be prompted to grant the appropriate permissions.
46 | After this, a new 'token.json' file will appear in the config/ directory.
47 |
48 | **NOTE**: A common error occurs regarding the "refresh token". This may happen if you delete the token.json file. This is due to a security feature of Google Cloud:
49 | When you first grant permission to the script, the resulting token.json file contains a "refresh-token" parameter. This only occurs **once**. Subsequent recreation
50 | of the token.json file will **not** include this token.
51 | To retrieve the token, you must delete the existing permissions and re-grant them.
52 | The following explains how to remove third-party apps: https://support.google.com/accounts/answer/3466521
53 | After removal, simply run a query and re-grant the permissions
54 |
--------------------------------------------------------------------------------
/alfa/main/analyser.py:
--------------------------------------------------------------------------------
1 | #!/bin/python3
2 | import yaml
3 | import pandas as pd
4 |
5 | from ..utils.path import rel_path, CONFIG_DIR, DATA_DIR
6 | from ..config import config
7 | from .kill_chain import KillChain
8 |
9 | class Analyser:
10 | '''
11 | uses /config/event_to_mitre.yml to map events in the log to the mitre attack framework.
12 |
13 | analyse... takes a df_name (e.g. login) as input and returns a dataframe of all suspicious records. These are records where
14 | at least 1 event exists within the event_to_mitre yml database.
15 |
16 | Each event for each record is given new attributes 'attack.label' 'attack.category' and 'attack.index' for all associated mitre attacks for that event.
17 | '''
18 | def __init__(self) -> None:
19 | self.event_mapping = yaml.safe_load(open(rel_path(CONFIG_DIR,'event_to_mitre.yml')))
20 |
21 | def analyse_all_files(self, email: list=None,filter=True, subdir=None) -> pd.DataFrame:
22 | '''Takes all files, analyses and concats into a single DataFrame'''
23 | df = pd.DataFrame()
24 | for log in config['logs']:
25 | log_df = self.analyse_from_file(log,email,filter=filter, subdir=subdir)
26 | df = df.append(log_df)
27 | return df
28 | def analyse_all(self,log_dict,email: list=None,filter=True) -> pd.DataFrame:
29 | '''takes dict of logs, and analyses and concats them into a single DataFrame'''
30 | df = pd.DataFrame()
31 | for log in log_dict:
32 | log_df = self.analyse(log_dict[log],email,filter=filter)
33 | df = df.append(log_df)
34 | return df
35 |
36 | def load_file(self,logtype,subdir=None):
37 | '''Loads file from data/ directory. If a subdir is given, will load from data/'''
38 | df_name = logtype+'.pkl'
39 | if subdir:
40 | filename = rel_path(DATA_DIR,subdir,df_name)
41 | else:
42 | filename = rel_path(DATA_DIR,df_name)
43 | return pd.read_pickle(filename)
44 |
45 | def analyse_from_file(self,logtype: str,email=None,filter=True, subdir=None):
46 | '''load file and pass to analyse method'''
47 | df = self.load_file(logtype, subdir=subdir)
48 | return self.analyse(df,email,filter=filter)
49 |
50 | def label_row(self,row: pd.Series,email: list=None) -> pd.DataFrame:
51 | '''labels given row from config/event_to_mitre.yml. If email is passed, will filter on those email/s'''
52 | has_labels = False
53 | if email:
54 | if 'actor.email' not in row:
55 | return None, has_labels
56 | if row['actor.email'] not in email:
57 | return None, has_labels
58 | for event in row['events']:
59 | if event['name'] in self.event_mapping:
60 | has_labels = True
61 | attack_label = self.event_mapping[event['name']]
62 | attack_category = [label.split('.')[-1] for label in attack_label]
63 | event['attack.label'] = attack_label
64 | event['attack.category'] = attack_category
65 | event['attack.index'] = KillChain.reduce_category_list(attack_category)
66 | return row, has_labels
67 |
68 | def analyse(self,df: pd.DataFrame,email: list=None, filter: bool=True) -> pd.DataFrame:
69 | '''takes a DataFrame, outputs labelled, *filtered, DataFrame. Filter will filter out benign events. If email is passed, will only contain events from that email address.'''
70 | mitre_df = []
71 | for row in df.iloc:
72 | row, add_row = self.label_row(row,email)
73 | if (filter and add_row) or not filter:
74 | mitre_df.append(row)
75 | return pd.DataFrame(mitre_df)
76 |
--------------------------------------------------------------------------------
/alfa/utils/mappings.yml:
--------------------------------------------------------------------------------
1 | # used by event_mitre_remap.py
2 | # remapped to /config/event_to_mitre.yml
3 |
4 | persistence:
5 | account_manipulation:
6 | [
7 | 2sv_disable,
8 | 2sv_enroll,
9 | password_change,
10 | recovery_info_change,
11 | recovery_phone_edit,
12 | titanium_change,
13 | titanium_unenroll,
14 | GRANT_ADMIN_PRIVILEGE,
15 | ADD_RECOVERY_PHONE,
16 | REVOKE_ADMIN_PRIVILEGE,
17 | REVOKE_ASP,
18 | ENABLE_USER_IP_WHITELIST,
19 | CHANGE_USER_ORGANIZATION,
20 | CHANGE_RECOVERY_EMAIL,
21 | CHANGE_RECOVERY_PHONE,
22 | GRANT_DELEGATED_ADMIN_PRIVILEGES,
23 | CHANGE_PASSWORD,
24 | CHANGE_PASSWORD_ON_NEXT_LOGIN,
25 | REMOVE_RECOVERY_EMAIL,
26 | REMOVE_RECOVERY_PHONE,
27 | RESET_SIGNIN_COOKIES,
28 | REVOKE_SECURITY_KEY,
29 | UNBLOCK_USER_SESSION,
30 | UNENROLL_USER_FROM_TITANIUM,
31 | DELETE_USER,
32 | UNENROLL_USER_FROM_STRONG_AUTH,
33 | UNDELETE_USER,
34 | UNSUSPEND_USER,
35 | add_member,
36 | add_member_role,
37 | add_security_setting,
38 | ban_member_with_moderation,
39 | change_security_setting,
40 | delete_group,
41 | join,
42 | remove_membership_expiry,
43 | remove_info_setting,
44 | remove_member,
45 | remove_member_role,
46 | remove_security_setting,
47 | unban_member,
48 | password_change,
49 | account_disabled_generic,
50 | MOBILE_DEVICE_APPROVE,
51 | ]
52 |
53 | create_account:
54 | cloud_account: [CREATE_USER]
55 |
56 | valid_accounts:
57 | default_accounts: []
58 | cloud_accounts:
59 | [
60 | suspicious_login,
61 | suspicious_login_less_secure_app,
62 | suspicious_programmatic_login,
63 | login_failure,
64 | login_challenge,
65 | login_verification,
66 | login_success,
67 | ]
68 |
69 | privilege_escalation:
70 | valid_accounts:
71 | default_accounts: []
72 | cloud_accounts:
73 | [
74 | suspicious_login,
75 | suspicious_login_less_secure_app,
76 | suspicious_programmatic_login,
77 | login_challenge,
78 | login_verification,
79 | login_success,
80 | ADD_PRIVILEGE,
81 | ASSIGN_ROLE,
82 | GRANT_ADMIN_PRIVILEGE,
83 | change_user_access
84 | ]
85 |
86 | defense_evasion:
87 | use_alternate_authentication_material:
88 | application_access_token:
89 | [
90 | ADD_TO_TRUSTED_OAUTH2_APPS,
91 | REMOVE_FROM_BLOCKED_OAUTH2_APPS,
92 | TRUST_DOMAIN_OWNED_OAUTH2_APPS,
93 | authorize,
94 | ]
95 | web_session_cookie: []
96 |
97 | valid_accounts:
98 | default_accounts: []
99 | cloud_accounts:
100 | [
101 | suspicious_login,
102 | suspicious_login_less_secure_app,
103 | suspicious_programmatic_login,
104 | login_challenge,
105 | login_verification,
106 | login_success,
107 | ALERT_CENTER_BATCH_DELETE_ALERTS,
108 | ALERT_CENTER_DELETE_ALERT
109 | ]
110 |
111 | credential_access:
112 | brute_force:
113 | password_guessing: [login_failure, login_challenge, login_verification, login_success]
114 | password_spraying: [login_failure, login_challenge, login_verification, login_success]
115 | credential_stuffing: [login_failure, login_challenge, login_verification, login_success]
116 |
117 | forge_web_credentials:
118 | saml_tokens: [login_success]
119 |
120 | steal_application_access_token: [authorize]
121 | steal_web_session_cookie: []
122 | unsecured_credentials: []
123 |
124 | discovery:
125 | account_discovery:
126 | email_account: [DOWNLOAD_UNMANAGED_USERS_LIST, DOWNLOAD_USERLIST_CSV]
127 | cloud_account: [DOWNLOAD_UNMANAGED_USERS_LIST, DOWNLOAD_USERLIST_CSV]
128 |
129 | cloud_service_dashboard: [ALERT_CENTER_VIEW]
130 | cloud_service_discovery: []
131 | permission_groups_discovery:
132 | cloud_groups: [GROUP_LIST_DOWNLOAD, GROUP_MEMBERS_DOWNLOAD]
133 | software_discovery:
134 | security_software_discovery: [UNTRUST_DOMAIN_OWNED_OAUTH2_APPS, TRUST_DOMAIN_OWNED_OAUTH2_APPS, action_complete]
135 |
136 | lateral_movement:
137 | internal_spearphishing: []
138 | use_alternate_authentication_material:
139 | application_access_token: [activity]
140 | web_session_cookie: []
141 |
142 | collection:
143 | data_from_information_repositories:
144 | [download, add_subscription, IMPORT_SSH_PUBLIC_KEY, VIEW_SITE_DETAILS, DOWNLOAD_REPORT, EXPORT_DEVICES]
145 | email_collection:
146 | remote_email_collection: [EMAIL_LOG_SEARCH]
147 | email_forwarding_rule:
148 | [email_forwarding_change, email_forwarding_out_of_domain, CREATE_EMAIL_MONITOR, CREATE_GMAIL_SETTING]
149 |
--------------------------------------------------------------------------------
/alfa/cmdline.py:
--------------------------------------------------------------------------------
1 | #!/bin/python3
2 | '''
3 | holds the parser configuration for the command line
4 | '''
5 | from argparse import ArgumentParser
6 | from .project_creator import Project
7 | from .main import *
8 | from IPython import embed
9 | import os.path, yaml
10 |
11 | from pprint import pprint
12 | from tabulate import tabulate
13 |
14 | from .utils.summary import summary
15 |
16 | banner = '''
17 | use 'A' to access the Alfa object. A? for more info
18 | '''
19 |
20 | class Parser:
21 | def __init__(self):
22 | self.parser = ArgumentParser()
23 | self.subparsers = self.parser.add_subparsers(title='subcommands',required=True,dest='subcommand',
24 | metavar='init, acquire, analyze, load')
25 | self.parser_init = self.subparsers.add_parser('init',
26 | help='intialize a project directory')
27 | self.parser_acquire = self.subparsers.add_parser('acquire',aliases=['a','ac'],
28 | help='acquire audit log data and save to the data/ directory')
29 | self.parser_analyze = self.subparsers.add_parser('analyze',aliases=['aa','an'],
30 | help='acquire and analyze audit log data, dropping into an interactive shell')
31 | self.parser_load = self.subparsers.add_parser('load',aliases=['l'],
32 | help='load offline data, analyze and drop into a shell')
33 |
34 | self.add_init_args()
35 | self.add_load_args()
36 | self.add_default_args(self.parser_acquire)
37 | self.add_default_args(self.parser_analyze)
38 | self.add_analyze_args()
39 |
40 | self.parser_init.set_defaults(func=self.handle_init)
41 | self.parser_acquire.set_defaults(func=self.handle_acquire)
42 | self.parser_analyze.set_defaults(func=self.handle_analyze)
43 | self.parser_load.set_defaults(func=self.handle_load)
44 |
45 | def add_init_args(self):
46 | self.parser_init.add_argument('path',type=str,
47 | help='path to project directory')
48 | pass
49 |
50 | def add_load_args(self):
51 | self.parser_load.add_argument('-l','--logtype',type=str,default='all',
52 | help='log type to load e.g. "drive"')
53 | self.parser_load.add_argument('-p','--path', type=str, required=True,
54 | help='directory to load, e.g. --path data/foo')
55 | pass
56 |
57 | def add_analyze_args(self):
58 | self.parser_analyze.add_argument('-s','--save',action='store_true',
59 | help='save data to data/ to load later')
60 | pass
61 |
62 | def add_default_args(self, subparser):
63 | subparser.add_argument('-l','--logtype',type=str,default='all',
64 | help='log type to load e.g. "drive"')
65 | subparser.add_argument('-p','--path',type=str,
66 | help='save under path e.g. --path data/foobar')
67 | subparser.add_argument('--user', required=False, type=str, default='all')
68 | subparser.add_argument('--no-filter', required=False, action='store_false', dest='filter',
69 | help='disable filtering of benign activities from dataset')
70 | subparser.add_argument('--max-results',type=int,required=False,default=1000,
71 | help='max results per page. max value = 1000 (default)')
72 | subparser.add_argument('--max-pages',type=int,required=False,default=None,
73 | help='max number of pages to collect (default = as many as possible)')
74 | subparser.add_argument('-st','--start-time',type=str,required=False,default=None,
75 | help='start collecting from date (RFC3339 format)')
76 | subparser.add_argument('-et','--end-time',type=str,required=False,default=None,
77 | help='collect until date (RFC3339 format)')
78 | subparser.add_argument('-q','--query',type=str,
79 | help='supply a yaml file containing query information. e.g. logtype, save path etc.')
80 | subparser.add_argument('--nd',action='store_true',help='save data as newline delimited')
81 |
82 | def handle_init(self, args):
83 | project = Project(args.path)
84 | print('now run "alfa analyze"!')
85 | pass
86 |
87 | def handle_load(self, args):
88 | A = Alfa.load(args.logtype, path=args.path)
89 | # code.interact(banner=banner,local=locals())
90 | print(banner)
91 | embed(display_banner=False)
92 | pass
93 |
94 | def handle_acquire(self, args):
95 | if args.query:
96 | query = self.load_query(args.query)
97 | query['save'] = True
98 | A = Alfa.acquire(**query)
99 | else:
100 | query = vars(args)
101 | query['save'] = True
102 | A = Alfa.acquire(**query)
103 | # should interactivity be a thing for acquiring?
104 | # code.interact(banner=banner, local=locals())
105 | pass
106 |
107 | def handle_analyze(self, args):
108 | if args.query:
109 | query = self.load_query(args.query)
110 | A = Alfa.query(**query)
111 | else:
112 | A = Alfa.query(**vars(args))
113 | print(banner)
114 | embed(display_banner=False)
115 | pass
116 |
117 | def load_query(self,filename: str) -> dict:
118 | if not os.path.exists(filename):
119 | print('cannot find file:',filename)
120 | return dict()
121 | with open(filename) as f:
122 | query = yaml.safe_load(f)
123 | return query
124 |
125 | def do_summary(self,A: Alfa):
126 | print('\n\n---------- Events ---------------\n\n')
127 | pprint(A.events[['type','attack.category']].head())
128 | print('\n\n')
129 | print('num_events:',A.events.shape[0])
130 | print('num_activities:',A.activities.shape[0])
131 | print('\n--------------------------------------\n\n')
132 | print('\n---------- Kill Chains ---------------\n\n')
133 | print('kill chain statistic: ', A.kcs())
134 | print('subchains discovered: ')
135 | print(tabulate(A.subchains(),headers=['start','end','kcs'],tablefmt='fancy_grid'))
136 | print('\n--------------------------------------\n\n')
137 | pass
138 |
--------------------------------------------------------------------------------
/alfa/config/event_to_mitre.yml:
--------------------------------------------------------------------------------
1 | 2sv_disable:
2 | - account_manipulation.persistence
3 | 2sv_enroll:
4 | - account_manipulation.persistence
5 | ADD_PRIVILEGE:
6 | - cloud_accounts.valid_accounts.privilege_escalation
7 | ADD_RECOVERY_PHONE:
8 | - account_manipulation.persistence
9 | ADD_TO_TRUSTED_OAUTH2_APPS:
10 | - application_access_token.use_alternate_authentication_material.defense_evasion
11 | ALERT_CENTER_BATCH_DELETE_ALERTS:
12 | - cloud_accounts.valid_accounts.defense_evasion
13 | ALERT_CENTER_DELETE_ALERT:
14 | - cloud_accounts.valid_accounts.defense_evasion
15 | ALERT_CENTER_VIEW:
16 | - cloud_service_dashboard.discovery
17 | ASSIGN_ROLE:
18 | - cloud_accounts.valid_accounts.privilege_escalation
19 | CHANGE_PASSWORD:
20 | - account_manipulation.persistence
21 | CHANGE_PASSWORD_ON_NEXT_LOGIN:
22 | - account_manipulation.persistence
23 | CHANGE_RECOVERY_EMAIL:
24 | - account_manipulation.persistence
25 | CHANGE_RECOVERY_PHONE:
26 | - account_manipulation.persistence
27 | CHANGE_USER_ORGANIZATION:
28 | - account_manipulation.persistence
29 | CREATE_EMAIL_MONITOR:
30 | - email_forwarding_rule.email_collection.collection
31 | CREATE_GMAIL_SETTING:
32 | - email_forwarding_rule.email_collection.collection
33 | CREATE_USER:
34 | - cloud_account.create_account.persistence
35 | DELETE_USER:
36 | - account_manipulation.persistence
37 | DOWNLOAD_REPORT:
38 | - data_from_information_repositories.collection
39 | DOWNLOAD_UNMANAGED_USERS_LIST:
40 | - email_account.account_discovery.discovery
41 | - cloud_account.account_discovery.discovery
42 | DOWNLOAD_USERLIST_CSV:
43 | - email_account.account_discovery.discovery
44 | - cloud_account.account_discovery.discovery
45 | EMAIL_LOG_SEARCH:
46 | - remote_email_collection.email_collection.collection
47 | ENABLE_USER_IP_WHITELIST:
48 | - account_manipulation.persistence
49 | EXPORT_DEVICES:
50 | - data_from_information_repositories.collection
51 | GRANT_ADMIN_PRIVILEGE:
52 | - account_manipulation.persistence
53 | - cloud_accounts.valid_accounts.privilege_escalation
54 | GRANT_DELEGATED_ADMIN_PRIVILEGES:
55 | - account_manipulation.persistence
56 | GROUP_LIST_DOWNLOAD:
57 | - cloud_groups.permission_groups_discovery.discovery
58 | GROUP_MEMBERS_DOWNLOAD:
59 | - cloud_groups.permission_groups_discovery.discovery
60 | IMPORT_SSH_PUBLIC_KEY:
61 | - data_from_information_repositories.collection
62 | MOBILE_DEVICE_APPROVE:
63 | - account_manipulation.persistence
64 | REMOVE_FROM_BLOCKED_OAUTH2_APPS:
65 | - application_access_token.use_alternate_authentication_material.defense_evasion
66 | REMOVE_RECOVERY_EMAIL:
67 | - account_manipulation.persistence
68 | REMOVE_RECOVERY_PHONE:
69 | - account_manipulation.persistence
70 | RESET_SIGNIN_COOKIES:
71 | - account_manipulation.persistence
72 | REVOKE_ADMIN_PRIVILEGE:
73 | - account_manipulation.persistence
74 | REVOKE_ASP:
75 | - account_manipulation.persistence
76 | REVOKE_SECURITY_KEY:
77 | - account_manipulation.persistence
78 | TRUST_DOMAIN_OWNED_OAUTH2_APPS:
79 | - application_access_token.use_alternate_authentication_material.defense_evasion
80 | - security_software_discovery.software_discovery.discovery
81 | UNBLOCK_USER_SESSION:
82 | - account_manipulation.persistence
83 | UNDELETE_USER:
84 | - account_manipulation.persistence
85 | UNENROLL_USER_FROM_STRONG_AUTH:
86 | - account_manipulation.persistence
87 | UNENROLL_USER_FROM_TITANIUM:
88 | - account_manipulation.persistence
89 | UNSUSPEND_USER:
90 | - account_manipulation.persistence
91 | UNTRUST_DOMAIN_OWNED_OAUTH2_APPS:
92 | - security_software_discovery.software_discovery.discovery
93 | VIEW_SITE_DETAILS:
94 | - data_from_information_repositories.collection
95 | account_disabled_generic:
96 | - account_manipulation.persistence
97 | action_complete:
98 | - security_software_discovery.software_discovery.discovery
99 | activity:
100 | - application_access_token.use_alternate_authentication_material.lateral_movement
101 | add_member:
102 | - account_manipulation.persistence
103 | add_member_role:
104 | - account_manipulation.persistence
105 | add_security_setting:
106 | - account_manipulation.persistence
107 | add_subscription:
108 | - data_from_information_repositories.collection
109 | authorize:
110 | - application_access_token.use_alternate_authentication_material.defense_evasion
111 | - steal_application_access_token.credential_access
112 | ban_member_with_moderation:
113 | - account_manipulation.persistence
114 | change_security_setting:
115 | - account_manipulation.persistence
116 | change_user_access:
117 | - cloud_accounts.valid_accounts.privilege_escalation
118 | delete_group:
119 | - account_manipulation.persistence
120 | download:
121 | - data_from_information_repositories.collection
122 | email_forwarding_change:
123 | - email_forwarding_rule.email_collection.collection
124 | email_forwarding_out_of_domain:
125 | - email_forwarding_rule.email_collection.collection
126 | join:
127 | - account_manipulation.persistence
128 | login_challenge:
129 | - cloud_accounts.valid_accounts.persistence
130 | - cloud_accounts.valid_accounts.privilege_escalation
131 | - cloud_accounts.valid_accounts.defense_evasion
132 | - password_guessing.brute_force.credential_access
133 | - password_spraying.brute_force.credential_access
134 | - credential_stuffing.brute_force.credential_access
135 | login_failure:
136 | - cloud_accounts.valid_accounts.persistence
137 | - password_guessing.brute_force.credential_access
138 | - password_spraying.brute_force.credential_access
139 | - credential_stuffing.brute_force.credential_access
140 | login_success:
141 | - cloud_accounts.valid_accounts.persistence
142 | - cloud_accounts.valid_accounts.privilege_escalation
143 | - cloud_accounts.valid_accounts.defense_evasion
144 | - password_guessing.brute_force.credential_access
145 | - password_spraying.brute_force.credential_access
146 | - credential_stuffing.brute_force.credential_access
147 | - saml_tokens.forge_web_credentials.credential_access
148 | login_verification:
149 | - cloud_accounts.valid_accounts.persistence
150 | - cloud_accounts.valid_accounts.privilege_escalation
151 | - cloud_accounts.valid_accounts.defense_evasion
152 | - password_guessing.brute_force.credential_access
153 | - password_spraying.brute_force.credential_access
154 | - credential_stuffing.brute_force.credential_access
155 | password_change:
156 | - account_manipulation.persistence
157 | - account_manipulation.persistence
158 | recovery_info_change:
159 | - account_manipulation.persistence
160 | recovery_phone_edit:
161 | - account_manipulation.persistence
162 | remove_info_setting:
163 | - account_manipulation.persistence
164 | remove_member:
165 | - account_manipulation.persistence
166 | remove_member_role:
167 | - account_manipulation.persistence
168 | remove_membership_expiry:
169 | - account_manipulation.persistence
170 | remove_security_setting:
171 | - account_manipulation.persistence
172 | suspicious_login:
173 | - cloud_accounts.valid_accounts.persistence
174 | - cloud_accounts.valid_accounts.privilege_escalation
175 | - cloud_accounts.valid_accounts.defense_evasion
176 | suspicious_login_less_secure_app:
177 | - cloud_accounts.valid_accounts.persistence
178 | - cloud_accounts.valid_accounts.privilege_escalation
179 | - cloud_accounts.valid_accounts.defense_evasion
180 | suspicious_programmatic_login:
181 | - cloud_accounts.valid_accounts.persistence
182 | - cloud_accounts.valid_accounts.privilege_escalation
183 | - cloud_accounts.valid_accounts.defense_evasion
184 | titanium_change:
185 | - account_manipulation.persistence
186 | titanium_unenroll:
187 | - account_manipulation.persistence
188 | unban_member:
189 | - account_manipulation.persistence
190 |
--------------------------------------------------------------------------------
/alfa/main/alfa.py:
--------------------------------------------------------------------------------
1 | #!/bin/python3
2 | import os
3 | from .analyser import Analyser
4 | from .activity import Activities, Activity
5 | from .event import Events
6 | from pandas.core.series import Series
7 | from pandas import to_datetime
8 | from typing import Tuple, Union
9 | from functools import reduce
10 |
11 | from ..config import config
12 | from .kill_chain import KillChain
13 | from .collector import Collector
14 |
15 | class Alfa:
16 | '''Takes all suspicious activities and creates a separate "events"
17 | attribute that holds all events.
18 | Each event contains a reference to its corresponding activity.
19 |
20 | Can be initialized as empty, or with an Activities dataframe.
21 | Typically will be initialized through static methods:
22 | Alfa.load, Alfa.load_unfiltered, or Alfa.query
23 | '''
24 | activities = Activities(**config['activity_defaults'])
25 | events = Events()
26 |
27 | def __init__(self, activity_list: list = None) -> None:
28 | self.collector = Collector()
29 | if activity_list is not None:
30 | self.activities = Activities(activity_list)
31 | self.events = self.initialize_events()
32 | self.activities = self.activities.fillna('')
33 | pass
34 |
35 | def __get_events(self, activity: Series) -> list:
36 | return activity['events']
37 |
38 | def __get_all_events(self) -> Tuple[list, list]:
39 | all_events = []
40 | if self.activities.shape[0] == 0:
41 | return []
42 | for activity in self.activities.iloc:
43 | activity_id = activity.name
44 | new_events = self.__get_events(activity)
45 | for event in new_events:
46 | event['activity_id'] = activity_id
47 | event['activity_time'] = to_datetime(activity['id.time'])
48 | all_events = all_events + new_events
49 |
50 | return all_events
51 |
52 | def __create_events(self, all_events: list) -> Events:
53 | E = Events(all_events)
54 | if 'activity_time' not in E:
55 | print('warning: no data in dataset!')
56 | E.parent = self
57 | return E
58 | # throws an error if dataframe is empty
59 | E = E.sort_values('activity_time', ignore_index=True)
60 | E.parent = self
61 | return E
62 |
63 | def initialize_events(self) -> Events:
64 | all_events = self.__get_all_events()
65 | return self.__create_events(all_events)
66 |
67 | def activity_by_id(self, uid: str) -> Activity:
68 | return self.activities.loc[uid]
69 |
70 | def filter(self, filter_array: Series) -> 'Alfa':
71 | '''
72 | Filters on *activities* and returns a new Alfa object.
73 | Input should be slice of an Activites dataframe.
74 | e.g. A.activities[A.activities['actor.email'].str.startswith('attacker')]
75 | will return activities whose email starts with 'attacker'
76 | '''
77 | filtered_activities = self.activities[filter_array]
78 | return Alfa(filtered_activities)
79 |
80 | def kcs(self, start_index: int = 0, end_index: int = None):
81 | '''
82 | return a kill_chain_statistic for the entire set of events,
83 | if called with no params, else acts on a slice.
84 | '''
85 | E = self.events['attack.index']
86 | if end_index and end_index > start_index:
87 | E = E.iloc[slice(start_index, end_index)]
88 | return KillChain.generate_kill_chain_statistic(list(E))
89 |
90 | def subchains(self, min_length=None, min_stat=None):
91 | subchains = KillChain.discern_subchains(
92 | self.events['attack.index'], min_length, min_stat)
93 | return sorted(subchains, key=lambda x: x[2], reverse=True)
94 |
95 | @staticmethod
96 | def acquire(logtype: str, *args, **kwargs) -> Union[list, dict]:
97 | '''
98 | Collect records from API, do not process them
99 | This is a wrapper around Collector, see Collector.query for for details
100 | '''
101 | C = Collector()
102 | res = C.query(logtype, *args, **kwargs) # this return a dataframe
103 | return res
104 |
105 | @staticmethod
106 | def query(logtype: str, filter=True, *args, **kwargs):
107 | '''
108 | Query API directly, returns an Alfa object. See collector
109 | '''
110 | C = Collector()
111 | A = Analyser()
112 | Q = C.query(logtype, *args, **kwargs)
113 | records = A.analyse(Q, filter=filter)
114 | return Alfa(Activities(records))
115 |
116 | @staticmethod
117 | def load(logtype: str, path: str = None, email: list = None, filter: bool = True) -> None:
118 | '''
119 | load a log (or all logs), the data/ folder label and *filter* and
120 | return an Alfa object. Optionally filter by email.
121 | See analyser for details
122 | '''
123 | A = Analyser()
124 | C = Collector()
125 | if logtype == 'all':
126 | all_ = C.load_all(path)
127 | records = A.analyse(all_, email=None, filter=filter)
128 | return Alfa(Activities(records))
129 | Q = C.load(os.path.join(path, logtype+'.json'))
130 | records = A.analyse(Q, email=None, filter=filter)
131 | return Alfa(Activities(records))
132 |
133 | def __aoi(self, concat: bool = True):
134 | '''
135 | Activities of Interest
136 | Automates the following:
137 | 1. get subchains
138 | 2. join subchains that are close by
139 | 3. grab event slices from those subchains
140 | 4. list out the unique activities associated with those subchains
141 | concat: bool, if True (default) then append
142 | the activity slices to one another
143 | '''
144 | subchains = self.subchains()
145 | long_chains = KillChain.join_subchains_loop(subchains)
146 | event_slices = self.events.get_event_slices(long_chains)
147 | activity_slices = [e.activities() for e in event_slices]
148 | if len(activity_slices) == 0: # prevent possible concat on empty list
149 | return activity_slices
150 | if concat:
151 | res = reduce(lambda a, b: a.append(b), activity_slices)
152 | res = res[~res.index.duplicated()]
153 | return res
154 | return activity_slices
155 |
156 | def aoi(self, export: str = None, nd: bool=False):
157 | '''
158 | wrapper around __aoi (above)
159 | adds the export functionality
160 | exports data ras a JSON file
161 | '''
162 | if len(self.events) != 0:
163 | aoi = self.__aoi()
164 | if export is not None:
165 | aoi['events'] = aoi['events'].apply(self.list_to_string) #parsing to string so the list doesn't crash when parsed to json
166 | if nd:
167 | with open(export, 'w') as f:
168 | for _, row in aoi.iterrows():
169 | f.write(row.to_json()+'\n')
170 | else:
171 | aoi.to_json(export, orient="records")
172 | print('saved to', export)
173 | return aoi
174 | else:
175 | print("[!] Error - please provide an export file as follow : A.aoi(export='example.json')")
176 | else:
177 | print("[!] Error - no data to export")
178 |
179 | def list_to_string(self, lst):
180 | return ','.join(map(str, lst))
181 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ALFA
2 | ## Automated Audit Log Forensic Analysis for Google Workspace
3 | Copyright (c) 2025 Invictus Incident Response
4 | Original authors [Greg Charitonos](https://www.linkedin.com/in/charitonos/) & [BertJanCyber](https://twitter.com/BertJanCyber) maintained by Invictus Incident Response
5 |
6 | # Before you start
7 | A note on supported operating systems, ALFA is tested on several Linux distributions (Debian and Ubuntu).
8 | While it might be possible to use it on Windows or macOS, results can vary, so proceed at your own risk.
9 |
10 | # Installation
11 | 1. Download this project
12 | 2. Navigate to the folder in your terminal and run ```sudo pip install -e .``` or ```sudo pip3 install -e .```
13 |
14 | ## Installation on macOS
15 | 1. **Install Python 3.9 using `pyenv`**
16 | ALFA works best with Python 3.9. You can use `pyenv` to manage and install this version.
17 | - If you haven’t installed `pyenv` yet, you can do so using Homebrew:
18 | ```bash
19 | brew install pyenv
20 | ```
21 | - Set up `pyenv` in your shell by adding the following to your shell configuration file (e.g., `~/.zshrc` or `~/.bashrc`):
22 | ```bash
23 | export PYENV_ROOT="$HOME/.pyenv"
24 | export PATH="$PYENV_ROOT/bin:$PATH"
25 | eval "$(pyenv init --path)"
26 | ```
27 | After updating the file, reload your shell:
28 | ```bash
29 | source ~/.zshrc # or source ~/.bashrc
30 | ```
31 | - Install Python 3.9 using `pyenv`:
32 | ```bash
33 | pyenv install 3.9
34 | ```
35 | - Set Python 3.9 as the current version:
36 | ```bash
37 | pyenv global 3.9
38 | ```
39 | or just for the current session:
40 | ```bash
41 | pyenv shell 3.9
42 | ```
43 | 2. **(Optional) Create a Virtual Environment**
44 | It’s a good practice to run projects in a virtual environment to keep dependencies isolated. Using the 3.9 version of Python you just set, create a virtual environment:
45 | ```bash
46 | python -m venv alfa
47 | ```
48 | - Activate the virtual environment:
49 | ```bash
50 | source alfa/bin/activate
51 | ```
52 | 3. **Install ALFA**
53 | Inside the folder where the ALFA project is located, install it using `pip`:
54 | ```bash
55 | pip install -e .
56 | ```
57 | If you’re not using a virtual environment, you may need `sudo`:
58 | ```bash
59 | sudo pip install -e .
60 | ```
61 |
62 | 4. **Verify Installation**
63 | After installation, you can verify that ALFA is installed by attempting to run its command-line tool `alfa`.
64 |
65 | ## Using ALFA
66 | NOTE: For retrieving credentials.json, please see ```CREDENTIALS.md```
67 |
68 | ## Setup
69 | 1. The first step is to initialize ALFA do this by running ```alfa init projectname``` this command will create a new directory to store your configuration and data. E.g. ```alfa init project_x```
70 | 2. A new project has now been generated called 'project_x'. Within that folder copy your credentials.json into the config/ folder. **(See CREDENTIALS.md) for instructions. **
71 | 3. After you have copied over the credentials.json file you are ready to use ALFA.
72 |
73 | ALFA has 3 options as explained below:
74 |
75 | ## 1. ALFA Acquire
76 | ## Acquire all Google Workspace Audit Logs
77 | 1. From inside "project_x" (or whatever name you chose before) run ```alfa acquire```
78 | 2. ALFA will now grab all logtypes for all users and save them to a subdirectory in the 'data' folder a .json file will be generated per logtype
79 | 3. To see what other options you have type ```alfa acquire -h```
80 |
81 | ## Advanced acquisitions with ALFA
82 | You can do all kinds of filtering to limit the data you are acquiring some examples below:
83 | - Only grab the 'admin' logtype ```alfa acquire --logtype=admin```
84 | - Save the output to a specific folder ```alfa acquire -d /tmp/project_secret```
85 | - Only grab logs for a specific user ```alfa acquire --user=insert_username```
86 | - Grab logs within a defined timeperiod ```alfa acquire --start-time=2022-07-10T10:00:00Z --end-time=2022-07-11T14:26:01Z``` the timeformat is (RFC3339)
87 |
88 | Now you know how to acquire data time for some fancy stuff to unleash the power of ALFA.
89 |
90 | ## 2. ALFA Analyze
91 | The analyze function automatically analysis all audit log data for a given Google Workspace to identify suspicious activity.
92 | ### How this works
93 | 1. Categorization
94 | Each individual event is categorized based on a mapping that is made alfa/config/event_to_mitre.yml. If an event matches that lists it is mapped to a technique that is part of the MITRE ATT&CK Cloud Framework (https://attack.mitre.org/matrices/enterprise/cloud/).
95 |
96 | 2. Scoring
97 | Next ALFA will analyze all mapped events in chronological order to try to identify kill chains or logical attack paths.
98 | E.G. An event that is mapped to the Persistence phase followed by an event that is mapped to the Credential Access phase will result in a higher score.
99 |
100 | 3. Result
101 | Ultimately ALFA will give the analyst a list of identified 'subchains' that can be further analyzed.
102 |
103 | ### How to use ALFA analyze?
104 | 1. First run ```alfa analyze``` which will automatically identify (or not if none were found). It will also drop you in a shell where you can perform follow up activities.
105 | 2. To get more information on a given subchain you can simply run A.subchains() which will show you the chain using the following format (number_of_first_event_in_chain,number_of_last_event_in_chain,killchain_score). Where a score 1 means a perfect chain was identified and the closer it gets to 0 the weaker the chain is.
106 | 3. In order to access the suspicious events that caused this chain use ```A.aoi(export='activities.json')``` to export all identified subchains to a file, that you can then use for further analysis.
107 |
108 |
109 | ## 3. ALFA Load
110 | ## Load data from local storage
111 | ### From Local Storage
112 | Use ```A = Alfa.load([logname])``` to load and analyse logs from local storage Use ```A = Alfa.load('all')``` to load all logs. Alfa *filters* benign activities out, by default. To load all activities and events, unfiltered, use ```Alfa.load([logname], filter=False)```.
113 |
114 |
115 | ## Making Changes
116 | ### Adding new event mappings.
117 | It is possible to edit the config/event_to_mitre.yml file directly, but ill-advised. The layout of this file is unintuitive. Instead, consider making amendments to utils/mappings.yml. Then repopulate config/event_to_mitre.yml by running utils/event_mitre_remap.py
118 |
119 | ### Amending Kill Chain Discovery methods
120 | The kill chain discovery function utilizes hard-coded constants. These can be found in the config/config.yml.
121 | Forensic analysts are advised to review the values and make amendments for their workspace as necessary.
122 | These constants are responsible for both the kill chain statistic (kcs) and kill chain discovery (subchains).
123 |
124 | ## FAQ
125 | Want to know more about the statistics and algorithm used for ALFA, we wrote a blog post about it here(https://medium.com/@invictus-ir/automated-forensic-analysis-of-google-workspace-859ed50c5c92)
126 |
127 | ## Known Errors
128 | ### ValueError: missing config/credentials.json
129 | You have to add a credentials.json file to the project folder in the 'config' subdirectory. Instructions in the 'CREDENTIALS.md' file.
130 |
131 | ### OSError: [Errno 98] Address already in use
132 | This means that port 8089 is already in use by another application, this could happen if you have a webserver running on this port and also Splunk uses port 8089 by default. Solution is to (temporarily) stop that port from being used as it's required for the authentication flow that the port is available.
133 |
134 | ### ValueError: Authorized user info was not in the expected format, missing fields refresh_token.
135 | Sometimes the authorization info needs to be updated the easiest way to do this is removing the 'token.json' from the project_name/config folder. And then rerunning the command. If that still gives issues then remove token.json and credentials.json and reregister the OAuth application as described in
136 | ```CREDENTIALS.MD```
137 |
138 | ### Access is blocked: This app's request is invalid // Error 400: redirect_uri_mismatch
139 | Make sure you have the trailing backslash (``/``) in the ``Authorized redirect URIs`` URI ``http://localhost:8089`` of your application's ``OAuth 2.0 Client IDs``.
140 | 
141 |
--------------------------------------------------------------------------------
/alfa/main/kill_chain.py:
--------------------------------------------------------------------------------
1 | #!/bin/python3
2 |
3 | from pandas import isna
4 | from ..config import config
5 | kc_conf = config['kill_chain']
6 |
7 |
8 | class KillChain:
9 | '''
10 | KillChain takes a list of events and assigns a value from chain_dict, based on the even't category.
11 | The KillChain Statistic is a value, from -1 to 1, that indicates how well the traditional kill-chain attack is followed.
12 |
13 | -1 indicates that the kill_chain was followed in reverse. 0 is totally random. 1 indicates a 100% follow-through.
14 | '''
15 |
16 | reductive_methods = {
17 | 'mean': lambda x: sum(x)/len(x),
18 | 'min': min,
19 | 'max': max
20 | }
21 |
22 | chain_dict = {
23 | "persistence": 1,
24 | "privilege_escalation": 2,
25 | "defense_evasion": 3,
26 | "credential_access": 4,
27 | "discovery": 5,
28 | "lateral_movement": 6,
29 | "collection": 7
30 | }
31 |
32 | @staticmethod
33 | def generate_kill_chain_statistic(chain_index_list: list) -> float:
34 | '''
35 | Input: list of attack_indexes
36 | Output: statistic
37 |
38 | sum -> 0, count-> 0
39 | For each index:
40 | if prev_index < index:
41 | sum += 1, count +=1
42 | if prev_index > index:
43 | sum -= 1, count += 1
44 |
45 | Output -> sum / count
46 | '''
47 | result = 0
48 | chain_size = len(chain_index_list)
49 | total_unique_indexes = 0
50 | prev_value = chain_index_list[0]
51 | for i in range(1, chain_size):
52 | if isna(chain_index_list[i]):
53 | continue
54 | if chain_index_list[i] > prev_value:
55 | result += 1
56 | total_unique_indexes += 1
57 | prev_value = chain_index_list[i]
58 |
59 | elif chain_index_list[i] < prev_value:
60 | result -= 1
61 | total_unique_indexes += 1
62 | prev_value = chain_index_list[i]
63 | elif chain_index_list[i] == prev_value:
64 | result -= 1/len(chain_index_list) # decrement result slowly if previous value is the same as current value
65 | if total_unique_indexes == 0:
66 | return 0
67 | return result / total_unique_indexes
68 |
69 | @staticmethod
70 | def assign_index(category):
71 | return KillChain.chain_dict[category]
72 |
73 | @staticmethod
74 | def reduce_category_list(category_list):
75 | '''
76 | Used to reduce attack indexes to a single value. It takes the categories (tactics) and maps them according to KillChain.chain_dict
77 | Then it uses the chosen reductive method (in config.yml), to reduce the value to a single number. Reductive methods available:
78 | mean, min, & max.
79 | '''
80 | category_as_indexes = [
81 | KillChain.assign_index(c) for c in category_list]
82 | return KillChain.reductive_methods[kc_conf['index_reducer']](category_as_indexes)
83 |
84 | @staticmethod
85 | def __discern_single_subchain(chain_index_list: list, start_index: int, min_length: int, min_stat: float, max_slack_width: int=kc_conf['max_slack_width'], max_slack_depth: int= kc_conf['max_slack_depth']) -> list:
86 | '''
87 | Output: [start_index, end_index, statistic]
88 |
89 | growing phase: Not shrinking phase
90 | while True
91 | growing phase:
92 | if better (or equal) stats:
93 | set candidate
94 | grow (end_index += 1)
95 | repeat
96 | else:
97 | set phase to shrinking
98 |
99 | if shrinking:
100 | shrink (start_index += 1)
101 | if better stats:
102 | set candidate
103 | if too small:
104 | end
105 | if run out of tries (max_shrink_no_change):
106 | end
107 | repeat
108 |
109 | check statistic for slice between start_index, end_index
110 | if slice better than prev_slice, set it as candidate
111 | if growing:
112 | increase end_index by 1
113 | if shrinking:
114 | increase start_index by 1
115 |
116 | start by growing, and change to shrinking when:
117 | end_index is length of chain
118 | stat no longer increases
119 |
120 | End loop when:
121 | in shrinking phase and:
122 | stat no longer increasing
123 | length of slice is min_length
124 | '''
125 | end_index = min(start_index + min_length, len(chain_index_list))
126 |
127 | prev_stat = 0
128 | candidate = None
129 |
130 | shrinking_phase = False
131 | max_shrink_no_change = min_length
132 | shrink_amount = 0
133 | slack_width = 0
134 |
135 | while True:
136 | if shrinking_phase:
137 | shrink_amount += 1
138 | SI = start_index + shrink_amount
139 | subchain = chain_index_list[SI:end_index]
140 | stat = KillChain.generate_kill_chain_statistic(subchain)
141 |
142 | if stat > min_stat:
143 | if not shrinking_phase: # First phase "Growing" phase
144 | SI = start_index
145 | # Greedy. Greater than OR equal to. Try and grow as much as possible.
146 | if stat >= prev_stat:
147 | candidate = [SI, end_index, stat]
148 | prev_stat = stat
149 | end_index += 1
150 | slack_width = 0
151 | if end_index >= len(chain_index_list):
152 | shrinking_phase = True
153 | end_index = len(chain_index_list)
154 | continue
155 | elif slack_width < max_slack_width:
156 | final_index = chain_index_list[end_index-1]
157 | index_before_slack = chain_index_list[end_index - (2+slack_width)]
158 | if index_before_slack - final_index < max_slack_depth:
159 | slack_width += 1
160 | else:
161 | shrinking_phase = True
162 | slack_width = 0
163 | end_index -= 1
164 | else:
165 | shrinking_phase = True
166 | slack_width = 0
167 | end_index -= 1+slack_width
168 | continue
169 |
170 | elif shrinking_phase: # Second phase
171 | if stat > prev_stat: # Lazy. Shrink as little as possible. Shrink ONLY IF GREATER THAN
172 | candidate = [SI, end_index, stat]
173 | prev_stat = stat
174 | if (end_index - start_index <= min_length): # shrunk too small
175 | break
176 | elif shrink_amount < max_shrink_no_change: # still opportunity to shrink
177 | continue
178 | break
179 | return candidate
180 |
181 |
182 | @staticmethod
183 | def join_close_subchains(subchain_list: list, min_chain_length: int = kc_conf['min_chain_length']) -> list:
184 | subchain_list.sort(key=lambda x: x[0])
185 | i = len(subchain_list) - 1
186 | new_chains = []
187 | change_count = 0
188 | while i >= 0:
189 | if i == 0:
190 | new_chains.insert(0, subchain_list[0][:2])
191 | break
192 |
193 | curr_chain = subchain_list[i]
194 | prev_chain = subchain_list[i-1]
195 | if curr_chain[0] - prev_chain[1] < min_chain_length:
196 | new_chains.insert(0,[prev_chain[0], curr_chain[1]])
197 | i -= 2
198 | change_count += 1
199 | else:
200 | new_chains.insert(0,curr_chain[:2])
201 | i -= 1
202 | return new_chains, change_count
203 |
204 | @staticmethod
205 | def join_subchains_loop(chain_index_list: list, min_chain_length: int = kc_conf['min_chain_length']) -> list:
206 | jsc, count = KillChain.join_close_subchains(
207 | chain_index_list, min_chain_length)
208 | while count:
209 | jsc, count = KillChain.join_close_subchains(jsc, min_chain_length)
210 | return jsc
211 |
212 | @staticmethod
213 | def discern_subchains(chain_index_list: list, min_length: int = None, min_stat: int = None) -> list:
214 | '''
215 | Takes in a list of attack_indexes, outputs subchains within it. Output in the form -> [ [start_index, end_index, statistic], ...]
216 | Discover subchains within a series. Uses the configs in the config.yaml file if not defined:
217 | - min_chain_length
218 | - min_chain_statistic
219 | '''
220 | if min_length == None:
221 | min_length = kc_conf['min_chain_length']
222 | if min_stat == None:
223 | min_stat = kc_conf['min_chain_statistic']
224 |
225 | chain_index_list = list(chain_index_list)
226 | subchains = []
227 | start_index = 0
228 | while start_index < (len(chain_index_list) - min_length):
229 | candidate = KillChain.__discern_single_subchain(
230 | chain_index_list, start_index, min_length, min_stat)
231 | if candidate:
232 | subchains.append(candidate)
233 | start_index = candidate[1] # end_index
234 | else:
235 | start_index += 1
236 | return subchains
237 |
--------------------------------------------------------------------------------
/alfa/main/collector.py:
--------------------------------------------------------------------------------
1 | #!/bin/python3
2 |
3 | """
4 | https://developers.google.com/admin-sdk/reports/reference/rest/v1/activities/list
5 |
6 | dates: https://www.ietf.org/rfc/rfc3339.txt
7 |
8 | """
9 | import json
10 | from json.decoder import JSONDecodeError
11 | import os
12 | import os.path
13 | from datetime import datetime
14 |
15 | import pandas as pd
16 | from google.auth.transport.requests import Request
17 | from google.oauth2.credentials import Credentials
18 | from google_auth_oauthlib.flow import InstalledAppFlow
19 | from googleapiclient.discovery import Resource, build
20 |
21 | from ..config import config
22 | from ..config.__internals__ import internals
23 | from ..utils.path import *
24 |
25 | PORT = 8089
26 |
27 | creds_path = os.path.join(
28 | internals["project"]["dirs"]["configs"], internals["project"]["files"]["creds"]
29 | )
30 | token_path = os.path.join(
31 | internals["project"]["dirs"]["configs"], internals["project"]["files"]["token"]
32 | )
33 |
34 | DATA_DIR = internals["project"]["dirs"]["data"]
35 |
36 | creds_instructions = """
37 | === Missing "config/credentials.json" ===
38 | 1. Go to https://console.developers.google.com/cloud-resource-manager
39 | 2. Create a project
40 | 3. Go to https://console.developers.google.com/apis/dashboard and choose "Credentials", then "Oauth Client ID"
41 | 4. Select "web application" as application type
42 | 5. copy the resulting credentials to config/credentials.json
43 | """
44 |
45 |
46 | class Collector:
47 | """
48 | Begins authentication flow at init.
49 |
50 | the .query method collects logs. logs are given
51 | params:
52 | logtype: either as a string (single log, e.g. "admin"), a list ["admin", "drive"], or the string "all" to collect all logs
53 | user: str='all' | userId or email of user. 'all' => all users
54 | max_results: int=1000 | max results per page
55 | max_pages: int = None | max number of pages (requests per log)
56 | start_time: str=None | rfc3339 date string. must be less than end time
57 | end_time: str=None | rfc3339 date string. must be greater than start time
58 | """
59 |
60 | SCOPES = config["scopes"]
61 |
62 | def __init__(self) -> None:
63 | self.api_ready = False
64 | pass
65 |
66 | def __init_api_creds(self):
67 | """
68 | should be called before interacting with api
69 | """
70 | self.creds = self.get_credentials()
71 | self.service = self.connect_api()
72 | self.request_count = 0
73 | self.api_ready = True
74 | pass
75 |
76 | def __create_path(self, path: str):
77 | """create path if non-existent"""
78 | full_path = rel_path(path)
79 | if os.path.exists(full_path):
80 | return full_path
81 | os.makedirs(full_path)
82 | return full_path
83 |
84 | def get_credentials(self):
85 | creds = False
86 | if os.path.exists(token_path):
87 | creds = Credentials.from_authorized_user_file(token_path)
88 |
89 | if not creds or not creds.valid:
90 | if not os.path.exists(creds_path):
91 | print(creds_instructions)
92 | raise ValueError("missing config/credentials.json")
93 | if creds and creds.refresh_token:
94 | creds.refresh(Request())
95 | else:
96 | flow = InstalledAppFlow.from_client_secrets_file(
97 | creds_path, self.SCOPES
98 | )
99 | creds = flow.run_local_server(port=PORT)
100 | with open(token_path, "w") as token:
101 | token.write(creds.to_json())
102 | return creds
103 |
104 | def connect_api(self):
105 | service = build("admin", "reports_v1", credentials=self.creds)
106 | return service
107 |
108 | def query_one(
109 | self,
110 | save_path: str,
111 | save,
112 | logtype: str,
113 | user: str = "all",
114 | max_results: int = 1000,
115 | max_pages: int = None,
116 | start_time: str = None,
117 | end_time: str = None,
118 | **kwargs,
119 | ) -> list:
120 | """
121 | used by the .query method
122 | collects activities from a single logtype, and returns them as a list
123 | """
124 | activities = self.service.activities()
125 | req = activities.list(
126 | userKey=user,
127 | applicationName=logtype,
128 | maxResults=max_results,
129 | startTime=start_time,
130 | endTime=end_time,
131 | )
132 | page_index = 0
133 | result = 0
134 |
135 | while req is not None:
136 | if max_pages and page_index > max_pages:
137 | break
138 |
139 | self.request_count += 1
140 | resp = req.execute()
141 | my_activities = resp.get("items", [])
142 | result += len(my_activities)
143 |
144 | if my_activities: # Only open file if there is data
145 | if not (save_path[0] == "/" or save_path.startswith("./")):
146 | save_path = "./" + save_path
147 | if not save_path.endswith("/"):
148 | save_path = save_path + "/"
149 | full_path = self.__create_path(save_path)
150 | with open(rel_path(save_path, logtype + ".json"), "a") as f:
151 | for activity in my_activities:
152 | f.write(json.dumps(activity) + "\n")
153 |
154 | req = activities.list_next(req, resp)
155 | page_index += 1
156 | return result
157 |
158 | def query(
159 | self,
160 | logtype: str,
161 | user: str = "all",
162 | max_results: int = 1000,
163 | max_pages: int = None,
164 | start_time: str = None,
165 | end_time: str = None,
166 | save=False,
167 | nd=False,
168 | path=None,
169 | return_as_df=True,
170 | **kwargs,
171 | ) -> list:
172 | """
173 | Queries the API directly. Returns a DataFrame of all log files.
174 | Args:
175 | logtype: 'all' or a logtype such as 'admin' or 'login'.
176 | user: 'all' (default) or a userId or user email address
177 | max_results: maximum results per page (default 1000, max)
178 | max_pages: max number of pages (default: None, as many pages as available)
179 | start_time: in rfc3339 format
180 | end_time: in rfc3339 format
181 | save: should this query be saved directly to storage
182 | path: directory to save under
183 | """
184 | if not self.api_ready: # first initialize the api
185 | self.__init_api_creds()
186 |
187 | if logtype == "all":
188 | logtype = config["logs"] # all logs
189 | elif type(logtype) == str:
190 | logtype = [logtype] # convert to list of len 1
191 |
192 | results = {"activities": dict()}
193 | total_activity_count = 0
194 | save_path = self.__default_path_name()
195 | if path:
196 | save_path = path
197 | for typ in logtype:
198 | res = self.query_one(
199 | save_path, save, typ, user, max_results, max_pages, start_time, end_time
200 | )
201 | total_activity_count += res
202 | print(f"{typ:>25}:", f"{res:>6}", "activities")
203 |
204 | print("\n", total_activity_count, "activities saved to:", save_path)
205 |
206 | if return_as_df:
207 | return self.load_all(f"{save_path}")
208 | return results
209 |
210 | def compute_df(self, activities_json: dict) -> pd.DataFrame:
211 | return pd.json_normalize(activities_json)
212 |
213 | def get_activities_df(self, data: dict) -> pd.DataFrame:
214 | """
215 | expects a json file in the form: {query: {...}, activities: [...] | {...}
216 | extracts the activities, normalizes, and returns as a dataframe.
217 | if activities is a dict, it assumes that the dict is in the form {'logtype': [...activities...]}, and
218 | appends a new column, 'logtype' to the dataframe
219 | """
220 | activities = data["activities"]
221 | if type(activities) == list:
222 | return pd.json_normalize(activities)
223 | if type(activities) == dict:
224 | prev_df = pd.DataFrame()
225 | for key in activities:
226 | df = pd.json_normalize(activities[key])
227 | df["logtype"] = key
228 | prev_df = pd.concat([prev_df, df], ignore_index=True)
229 | return prev_df
230 | return None
231 |
232 | def __default_path_name(self):
233 | """the default naming convention for paths. This is produced as a datetime string corresponding approx. to when the query was initiated"""
234 | return os.path.join(DATA_DIR, datetime.utcnow().strftime("%y%m%d.%H%M%S"))
235 |
236 | def save(self, data: dict, save_path: str, filename: str, nd: bool):
237 | """saves the raw JSON along with metadata"""
238 | if not (save_path[0] == "/" or save_path.startswith("./")):
239 | save_path = "./" + save_path
240 | if not save_path.endswith("/"):
241 | save_path = save_path + "/"
242 | full_path = self.__create_path(save_path)
243 |
244 | if "activities" in data and type(data["activities"]) == list and nd:
245 | with open(rel_path(full_path, filename), "w") as f:
246 | for record in data["activities"]:
247 | f.write(json.dumps(record) + "\n")
248 | else:
249 | with open(rel_path(full_path, filename), "w") as f:
250 | json.dump(data, f)
251 | return data
252 |
253 | def load(self, json_file: str, as_activities_df: bool = True):
254 | """
255 | loads a dataset from a json file. Expects to be normal JSON.
256 | If it encounters a JSONDecodeError, assumes it is in NDJSON format (newline delimited)
257 | and attempts to access each record separately.
258 | """
259 | with open(json_file) as f:
260 | try:
261 | data = json.load(f)
262 | except JSONDecodeError:
263 | f.seek(0)
264 | activities = []
265 | for line in f:
266 | activities.append(json.loads(line))
267 | data = {"activities": activities}
268 | if as_activities_df:
269 | return self.get_activities_df(data)
270 | return data
271 |
272 | def load_all(self, data_folder: str, as_activities_df: bool = True):
273 | all_files = os.listdir(data_folder)
274 | all_files = [os.path.join(data_folder, x) for x in all_files]
275 | only_json = filter(
276 | lambda x: os.path.isfile(x) and x.endswith(".json"), all_files
277 | )
278 | result = {"activities": {}}
279 | for f in only_json:
280 | data = self.load(f, as_activities_df=False)
281 | logtype = os.path.basename(f).split(".json")[0]
282 | activities = data["activities"]
283 | result["activities"][logtype] = activities
284 | if as_activities_df:
285 | return self.get_activities_df(result)
286 | return result
287 |
--------------------------------------------------------------------------------
/tutorial/tutorial.md:
--------------------------------------------------------------------------------
1 | # ALFA Tutorial
2 |
3 | ## Setup
4 | Having installed ALFA (see the [README](../README.md)), begin by initializing a new project directory.
5 |
6 | ```alfa init tutorial```
7 |
8 | You should now have a new directory called "tutorial". ```cd`` into it.
9 | Inside, you'll find a structure similar to the following:
10 | ```
11 | .
12 | ├── config
13 | │ └── config.yml
14 | └── data
15 | ```
16 |
17 | Ordinarily, you would place a ```credentials.json``` file into the "config" directory. This won't be necessary for the tutorial.
18 | Instead, a pre-made dataset will be used.
19 | ```cd``` into the "data" directory, and
20 | clone the sample dataset
21 | [here](https://github.com/invictus-ir/gws_dataset.git).
22 | ```
23 | git clone https://github.com/invictus-ir/gws_dataset.git
24 | ```
25 |
26 | Your directory structure should now look like:
27 | ```
28 | .
29 | ├── config
30 | │ └── config.yml
31 | └── data
32 | └── gws_dataset
33 | ├── LICENSE
34 | ├── README.md
35 | ├── admin.json
36 | ├── calendar.json
37 | ├── drive.json
38 | ├── groups_enterprise.json
39 | ├── login.json
40 | ├── token.json
41 | └── user_accounts.json
42 | ```
43 |
44 | You are now ready to run ALFA against the dataset.
45 |
46 | ## Running ALFA
47 | ```cd``` back into the root of your project folder.
48 | Load the entire gws_dataset directory using the following command:
49 |
50 | ```
51 | alfa load -p data/gws_dataset
52 | ```
53 | ALFA will automatically load every json file in the directory into its dataset.
54 |
55 | You have dropped into a Python shell with access to an ALFA object, variable ```A```.
56 | ```A``` has 2 important attributes: ```A.events```, and ```A.activities```.
57 |
58 | These are datasets that represent the events and activities present in the logs.
59 | In Google Audit Logs, every action is represented by an "activity". Each "activity" contains a list of "events". These events are essentially building blocks for activities. When loaded, ALFA will automatically analyze the events and classify specific events in accordance with the "[MITRE ATT&CK Cloud Matrix Framework](https://attack.mitre.org/matrices/enterprise/cloud/)".
60 |
61 | Events and activities have a lot of data. Let's explore the dataset to garner an understanding for the dataset.
62 |
63 | ```
64 | A.events.shape # (rows, columns)
65 | A.activities.shape # (rows, columns)
66 | ```
67 | We have 9825 events and 9789 activities. Too many to list off.
68 | Let's select a random sample of 10 from each, and produce a summary, to get an idea of what kind of data we're looking at.
69 | Starting with events:
70 | ```
71 | summary(A.events.sample(10))
72 |
73 | name activity_time activity_id
74 | ---- --------- -------------------------------- --------------------
75 | 6417 authorize 2022-07-19 12:21:35.002000+00:00 -6241941505348084839
76 | 6295 authorize 2022-07-19 12:18:15.883000+00:00 4289095237957192984
77 | 6257 authorize 2022-07-19 12:17:15.685000+00:00 -3533409653085737212
78 | 5378 authorize 2022-07-19 10:51:12.786000+00:00 -3453280989539057832
79 | 6786 authorize 2022-07-19 12:31:54.947000+00:00 4287956763103796901
80 | 8773 authorize 2022-07-19 13:53:10.243000+00:00 4880599880504672506
81 | 7812 authorize 2022-07-19 13:00:25.098000+00:00 -872164451340203101
82 | 2310 authorize 2022-07-19 09:10:31.579000+00:00 7105495284863502655
83 | 3080 authorize 2022-07-19 09:31:51.872000+00:00 4333762167492228608
84 | 3902 authorize 2022-07-19 10:09:28.796000+00:00 -4366756655797941829
85 | ```
86 |
87 | and moving onto activities:
88 | ```
89 | summary(A.activities.sample(10))
90 |
91 | id.time kind actor.email id.applicationName
92 | -------------------- ------------------------ ---------------------- ------------------------ --------------------
93 | -6010887833366957832 2022-07-19T08:00:47.766Z admin#reports#activity admin@cloud-response.com token
94 | 7764407026099878510 2022-07-19T08:53:11.775Z admin#reports#activity admin@cloud-response.com token
95 | 2407139138637242658 2022-07-19T08:26:52.110Z admin#reports#activity admin@cloud-response.com token
96 | -6978932130952386443 2022-07-19T09:12:01.777Z admin#reports#activity admin@cloud-response.com token
97 | 2956946567129110438 2022-07-19T10:35:40.534Z admin#reports#activity admin@cloud-response.com token
98 | -5470615884955105544 2022-07-19T14:02:30.155Z admin#reports#activity admin@cloud-response.com token
99 | 4429007405214477146 2022-07-19T10:22:22.940Z admin#reports#activity admin@cloud-response.com token
100 | 3378976036216458085 2022-07-19T08:30:11.927Z admin#reports#activity admin@cloud-response.com token
101 | 6860468462391716631 2022-08-02T13:41:54.960Z admin#reports#activity admin@cloud-response.com token
102 | 1895767229440860272 2022-07-19T10:48:12.979Z admin#reports#activity admin@cloud-response.com token
103 | ```
104 |
105 | From the summary note the following:
106 | - Each event has a name, and belongs to an activity
107 | - Each activity has a kind. Some have emails belonging to the user that initiated that activity.
108 |
109 | Suppose you wanted to take a deeper look at the events belonging to one of these activities.
110 | We'll select the activity_id of first activity, "-6010887833366957832" for this example.
111 |
112 | ```
113 | A.activities.loc['-6010887833366957832'].events
114 |
115 | [{'name': 'authorize',
116 | 'parameters': [{'name': 'client_id', 'value': '106850843410684334493'},
117 | {'name': 'app_name', 'value': '106850843410684334493'},
118 | {'name': 'client_type', 'value': 'WEB'},
119 | {'name': 'scope_data',
120 | 'multiMessageValue': [{'parameter': [{'name': 'scope_name',
121 | 'value': 'https://www.googleapis.com/auth/admin.reports.audit.readonly'},
122 | {'name': 'product_bucket', 'multiValue': ['GSUITE_ADMIN']}]}]},
123 | {'name': 'scope',
124 | 'multiValue': ['https://www.googleapis.com/auth/admin.reports.audit.readonly']}],
125 | 'attack.label': ['application_access_token.use_alternate_authentication_material.defense_evasion',
126 | 'steal_application_access_token.credential_access'],
127 | 'attack.category': ['defense_evasion', 'credential_access'],
128 | 'attack.index': 4,
129 | 'activity_id': '-6010887833366957832',
130 | 'activity_time': Timestamp('2022-07-19 08:00:47.766000+0000', tz='UTC')}]
131 |
132 | ```
133 |
134 | Reading through the mess of data, we see it's an authorize event (like the events in our sample). Aside from the data that Google provides, the events are also marked with an "attack.label", "attack.category" and "attack.index".
135 |
136 | These columns are added by ALFA during analysis. "attack.label" contains a list of the *full* MITRE ATT&CK path. "attack.category" is the last portion of the path, and the index is a value that corresponds to the label. The higher the "attack.index", the further along the event is in the MITRE ATT&CK Cloud Matrix Framework. This is useful for calculating "Kill Chains", as will be explored in the following section.
137 |
138 | ## Kill Chain Analysis
139 | Every ALFA object (```A```) is a collection of activities and events. The events can be analysed to assess how closely they fit a "kill chain", using the "kill chain statistic".
140 | The "kill chain statistic", or kcs, is defined as the "tendency for a set of chronologically ordered events to escalate up the MITRE ATT&CK Cloud Matrix Framework". In other words, "how well does my dataset fit a kill chain?". It is a floating point score between -1 and 1. 1 indicates a perfect kill chain, -1 indicates moving in the complete opposite direction. A score close to 0 indicates undirected events (no pattern).
141 |
142 | Let's grab the kcs for the entire dataset:
143 | ```
144 | A.kcs()
145 |
146 | 0.09208973121583104
147 | ```
148 |
149 | We have a score just shy of 0.1. This low score is to be expected for the entire dataset. However, there may exist kill chains _within_ the dataset. To discern these, ALFA has a "subchains" method. Let's find some subchains.
150 |
151 | ```
152 | summary( A.subchains() )
153 |
154 | ---- ---- --------
155 | 9428 9435 0.928571
156 | 9680 9687 0.857143
157 | 12 19 0.714286
158 | 26 33 0.714286
159 | 33 40 0.714286
160 | 151 158 0.714286
161 | 9366 9373 0.714286
162 | 9814 9821 0.714286
163 | 76 83 0.714286
164 | 9500 9507 0.714286
165 | ---- ---- --------
166 |
167 | ```
168 |
169 | Here we have a list of kill chains, each with 3 values. The first value is the start index of the kill chain. This is index of the first event in the kill chain. e.g
170 | ```A.events.loc[9428]```. The second value is the end_index of the kill chain. Lastly, there is the kcs for the given kill chain. Note that the subchains are ordered by highest-kcs first.
171 |
172 | 0.93 is a very high score! Let's take a closer look at those events:
173 | ```
174 | summary(A.events[9428:9435])
175 |
176 | name activity_time activity_id
177 | ---- ------------------ -------------------------------- --------------------
178 | 9428 change_user_access 2022-08-02 07:12:37.638000+00:00 -8593694044162584673
179 | 9429 create 2022-08-02 07:12:37.638000+00:00 -8593694044162584673
180 | 9430 change_acl_editors 2022-08-02 07:12:37.638000+00:00 -8593694044162584673
181 | 9431 add_to_folder 2022-08-02 07:12:37.638000+00:00 -8593694044162584673
182 | 9432 login_verification 2022-08-02 07:13:37.996000+00:00 258855114937
183 | 9433 login_success 2022-08-02 07:13:37.996000+00:00 258855114937
184 | 9434 download 2022-08-02 07:13:57.079000+00:00 3276112931527544503
185 | ```
186 |
187 | The event names can help display an overview of what occurred in this moment. By displaying the activity_time, they can also help direct you at particular points in the log which may be of interest.
188 |
189 | Perhaps looking at the activities these events belong to is helpful. Note that there are only 3 activities associated with these 7 events.
190 |
191 | ```
192 | summary( A.events[9428:9435].activities() )
193 | id.time kind actor.email id.applicationName
194 | -------------------- ------------------------ ---------------------- ---------------------------- --------------------
195 | -8593694044162584673 2022-08-02T07:12:37.638Z admin#reports#activity workspace@cloud-response.com drive
196 | 258855114937 2022-08-02T09:32:37.406Z admin#reports#activity workspace@cloud-response.com login
197 | 3276112931527544503 2022-08-02T07:13:57.079Z admin#reports#activity workspace@cloud-response.com drive
198 |
199 | ```
200 |
201 | Here we can see which account is associated with the behavior, and where it originated from.
202 |
203 | ## Activities of Interest
204 |
205 | As mentioned above, finding interesting activities can aid the discovery of interesting portions of the dataset.
206 | To automate this, one can utilise "activites of interest" (aoi) method:
207 |
208 | ```
209 | In [26]: summary( A.aoi() )
210 | id.time kind actor.email id.applicationName
211 | -------------------- ------------------------ ---------------------- ----------------------------- --------------------
212 | -8637423948085216889 2022-03-14T18:07:54.887Z admin#reports#activity admin@cloud-response.com admin
213 | 768087181562 2022-03-19T15:24:52.241Z admin#reports#activity greg@cloud-response.com login
214 | -7684398170435703864 2022-03-14T20:36:48.966Z admin#reports#activity greg@cloud-response.com calendar
215 | -5686198897511485377 2022-03-19T19:38:47.642Z admin#reports#activity admin@cloud-response.com groups_enterprise
216 | 4000677510509368906 2022-03-19T21:13:29.295Z admin#reports#activity greg@cloud-response.com token
217 | -4185571506150141986 2022-03-19T21:31:14.993Z admin#reports#activity greg@cloud-response.com token
218 | 8275857749769031410 2022-03-19T21:35:04.656Z admin#reports#activity greg@cloud-response.com token
219 | -4582372916506076442 2022-03-19T22:17:46.663Z admin#reports#activity admin@cloud-response.com token
220 | 722534617001 2022-08-15T16:24:08.797Z admin#reports#activity admin@cloud-response.com login
221 | ...
222 | ```
223 |
224 | The ```aoi``` method will return a list of all activities whose events appeared in a subchain. As such, it's a quick shortcut for finding interesting sections of the logs.
225 |
226 | These activities can be exported to a json file, to be fed into a tool of your choosing:
227 | ```
228 | A.aoi(export='wow.json')
229 | ```
230 |
231 |
232 |
--------------------------------------------------------------------------------