├── runtime.txt
├── src
└── ds4n6_lib
│ ├── __init__.py
│ ├── ml_models
│ ├── __init__.py
│ ├── seq2seq_lstm.py
│ └── transformer.py
│ ├── tools.py
│ ├── d4.py
│ ├── pf.py
│ ├── amcache.py
│ ├── winreg.py
│ ├── svclist.py
│ ├── unx.py
│ ├── autoruns.py
│ ├── kansa.py
│ ├── volatility.py
│ ├── mactime.py
│ ├── fstl.py
│ ├── pslist.py
│ ├── macrobber.py
│ ├── tshark.py
│ ├── flist.py
│ ├── utils.py
│ ├── kape.py
│ └── mlgraph.py
├── setup.cfg
├── pyproject.toml
├── MANIFEST.in
├── setup.py
├── requirements.txt
├── README.md
└── CONTRIBUTING.md
/runtime.txt:
--------------------------------------------------------------------------------
1 | python-3.10.12
--------------------------------------------------------------------------------
/src/ds4n6_lib/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/src/ds4n6_lib/ml_models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 | "setuptools>=42",
4 | "wheel"
5 | ]
6 | build-backend = "setuptools.build_meta"
7 |
--------------------------------------------------------------------------------
/src/ds4n6_lib/tools.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | def explore(df, col, max_rows=None, max_columns=None):
4 | hist = df[col].value_counts()
5 | with pd.option_context('display.max_rows', max_rows, 'display.max_columns', max_columns):
6 | print("#Count:",len(hist))
7 | print(hist)
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE
3 | include requirements.txt
4 |
5 | include src/ds4n6_lib/isim/*.yml
6 |
7 | # Patterns to exclude from any directory
8 | global-exclude *~
9 | global-exclude *.pyc
10 | global-exclude *.pyo
11 | global-exclude .git
12 | global-exclude .ipynb_checkpoints
13 |
--------------------------------------------------------------------------------
/src/ds4n6_lib/d4.py:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # INFO
3 | ###############################################################################
4 | # Recommended "import as": d4
5 |
6 | ###############################################################################
7 | # VARIABLES
8 | ###############################################################################
9 | # Debug Level (0: min - 5:max) ------------------------------------------------
10 | # 0: Disabled
11 | # 1: TBD
12 | # 2: Executed functions
13 | # 3: Low detail on executed functions
14 | # 4: Medium detail on executed functions
15 | # 5: High detail on executed functions
16 | debug = 0
17 |
18 | # Other -----------------------------------------------------------------------
19 | out = None
20 | ipregex="^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$"
21 |
22 | ###############################################################################
23 | # DECLARE VARS
24 | # not_well-formed
25 | main_nwf=[
26 | {'find':'<\x04Data', 'replace':'', 'replace':''},
40 | {'find':' Data ', 'replace':' '},
41 | {'find':' <([a-zA-Z0-9_-]*)> ', 'replace':' \\1 ', 'type':'re'},
42 | {'find':'::<([a-zA-Z0-9_-]*)>::', 'replace':'::\\1::', 'type':'re'},
43 | ]
44 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 | with open("README.md", "r", encoding="utf-8") as fh:
4 | long_description = fh.read()
5 |
6 | setuptools.setup(
7 | name="ds4n6_lib",
8 | version="0.8.3",
9 | author="Jess Garcia",
10 | author_email="ds4n6@one-esecurity.com",
11 | description="Bringing Data Science & Artificial Intelligence to the fingertips of the average Forensicator, and promote advances in the field",
12 | long_description=long_description,
13 | long_description_content_type="text/markdown",
14 | url="https://github.com/ds4n6/ds4n6_lib",
15 | project_urls={
16 | "Bug Tracker" : "https://github.com/ds4n6/ds4n6_lib/issues",
17 | "Website" : "http://www.ds4n6.io/"
18 | },
19 | keywords = ['dfir', 'datascience', 'forensics'],
20 | install_requires=[
21 | 'requests',
22 | 'numpy',
23 | 'pandas',
24 | 'Evtx',
25 | 'python-evtx',
26 | 'ipyaggrid',
27 | 'IPython',
28 | 'ipywidgets',
29 | 'keras',
30 | 'matplotlib',
31 | 'nbformat',
32 | 'numpy',
33 | 'pandas',
34 | 'pyparsing',
35 | 'qgrid',
36 | 'ruamel.yaml',
37 | 'sklearn',
38 | 'tensorflow',
39 | 'tqdm',
40 | 'traitlets',
41 | 'xmltodict',
42 | 'networkx',
43 | 'gensim',
44 | ],
45 | classifiers=[
46 | "Development Status :: 3 - Alpha",
47 | "Intended Audience :: Developers",
48 | "Intended Audience :: Information Technology",
49 | "Framework :: Jupyter",
50 | "Topic :: Security",
51 | "Topic :: Scientific/Engineering :: Artificial Intelligence",
52 | "Topic :: Software Development :: Libraries :: Python Modules",
53 | "Programming Language :: Python :: 3",
54 | "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
55 | "Operating System :: OS Independent",
56 | ],
57 | package_dir={"": "src"},
58 | packages=setuptools.find_packages(where="src"),
59 | python_requires=">=3.10",
60 | )
61 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py==1.4.0
2 | argon2-cffi==20.1.0
3 | astunparse==1.6.3
4 | async-generator==1.10
5 | attrs==20.3.0
6 | backcall==0.2.0
7 | bleach==3.3.0
8 | cachetools==4.2.1
9 | certifi==2020.12.5
10 | cffi==1.14.5
11 | chardet==4.0.0
12 | configparser==4.0.2
13 | cycler==0.10.0
14 | decorator==5.0.5
15 | defusedxml==0.7.1
16 | entrypoints==0.3
17 | evtx==0.8.2
18 | flatbuffers==23.1.21
19 | gast==0.3.3
20 | gensim==4.3.2
21 | google-auth==2.16.0
22 | google-auth-oauthlib==1.0.0
23 | google-pasta==0.2.0
24 | grpcio==1.51.1
25 | h5py==3.8.0
26 | hexdump==3.3
27 | idna==2.10
28 | importlib-metadata==3.10.0
29 | ipyaggrid==0.2.1
30 | ipykernel==5.5.3
31 | ipython==7.22.0
32 | ipython-genutils==0.2.0
33 | ipywidgets==7.6.3
34 | jedi==0.18.0
35 | Jinja2==2.11.3
36 | joblib==1.2.0
37 | jsonschema==3.2.0
38 | jupyter-client==8.0.3
39 | jupyter-core==5.2.0
40 | jupyterlab-pygments==0.2.2
41 | jupyterlab-widgets==3.0.5
42 | Keras==2.13.1
43 | Keras-Preprocessing==1.1.2
44 | kiwisolver==1.3.1
45 | Markdown==3.3.4
46 | MarkupSafe==1.1.1
47 | matplotlib==3.7.0
48 | mistune==0.8.4
49 | more-itertools==5.0.0
50 | nbclient==0.5.3
51 | nbconvert==6.0.7
52 | nbformat==5.1.3
53 | nest-asyncio==1.5.1
54 | networkx==2.5
55 | notebook==6.3.0
56 | numpy==1.23.5
57 | oauthlib==3.1.0
58 | opt-einsum==3.3.0
59 | packaging==20.9
60 | pandas==2.1.4
61 | pandocfilters==1.4.3
62 | parso==0.8.2
63 | pexpect==4.8.0
64 | pickleshare==0.7.5
65 | Pillow==8.2.0
66 | prometheus-client==0.10.0
67 | prompt-toolkit==3.0.18
68 | protobuf==4.24.1
69 | ptyprocess==0.7.0
70 | pyasn1==0.4.8
71 | pyasn1-modules==0.2.8
72 | pycparser==2.20
73 | Pygments==2.8.1
74 | pyparsing==2.4.7
75 | pyrsistent==0.17.3
76 | python-dateutil==2.8.2
77 | python-evtx==0.7.4
78 | pytz==2021.1
79 | PyYAML==6.0.1
80 | pyzmq==25.0.2
81 | qgrid==1.3.1
82 | requests==2.25.1
83 | requests-oauthlib==1.3.0
84 | rsa==4.7.2
85 | ruamel.yaml==0.17.21
86 | ruamel.yaml.clib==0.2.7
87 | scikit-learn==1.2.1
88 | scipy==1.10.0
89 | Send2Trash==1.5.0
90 | simplejson==3.17.2
91 | six==1.15.0
92 | sklearn==0.0
93 | tensorboard==2.13.0
94 | tensorflow==2.13.0
95 | tensorflow-estimator==2.13.0
96 | termcolor==1.1.0
97 | terminado==0.9.4
98 | testpath==0.4.4
99 | threadpoolctl==2.1.0
100 | tornado==6.2
101 | tqdm==4.59.0
102 | traitlets==5.9.0
103 | typing-extensions==3.7.4.3
104 | urllib3==1.26.4
105 | wcwidth==0.2.5
106 | webencodings==0.5.1
107 | Werkzeug==1.0.1
108 | widgetsnbextension==3.5.1
109 | wrapt==1.12.1
110 | xmltodict==0.12.0
111 | zipp==1.0.0
--------------------------------------------------------------------------------
/src/ds4n6_lib/pf.py:
--------------------------------------------------------------------------------
1 | import os
2 | import glob
3 | from tensorflow.keras import layers
4 | from tensorflow.keras.models import Model
5 | import tensorflow as tf
6 | from tensorflow.keras import losses
7 | import matplotlib.pyplot as plt
8 | import pandas as pd
9 |
10 |
11 | def convert_prefetch_ham_to_hml(df):
12 | df_split = df['file_referenced'].str.split("\\",expand=True)
13 | df_split = df_split.drop(columns=[0]).fillna(value='')
14 |
15 | first_column = df_split.iloc[:, 0]
16 | medium_column = []
17 | last_column = []
18 | for i in range(df_split.shape[0]):
19 | arr = [x for x in df_split.iloc[i, 1:] if x != '']
20 | medium_column.append('\\'.join(arr[:-1]))
21 | last_column.append('\\'.join(arr[-1:])) # [-1:] because some len(arr) == 0
22 |
23 | list_to_df = list(zip(first_column, medium_column, last_column, df['machine_id']))
24 | new_df = pd.DataFrame(list_to_df, columns =['A', 'B', 'C', 'machine_id'])
25 | return new_df
26 |
27 |
28 | def ml_prefetch_anomalies(df, odalg="simple_autoencoder", latent_dim = 128, epochs = 10, learning_rate = 1e-3):
29 | # Deep Learning
30 | x_train = pd.get_dummies(df).to_numpy()
31 |
32 | class Autoencoder(Model):
33 | def __init__(self, input_dim, latent_dim):
34 | super(Autoencoder, self).__init__()
35 | self.input_dim = input_dim
36 | self.latent_dim = latent_dim
37 | self.encoder = layers.Dense(latent_dim, activation='relu')
38 | self.decoder = layers.Dense(input_dim, activation='sigmoid')
39 |
40 | def call(self, x):
41 | encoded = self.encoder(x)
42 | decoded = self.decoder(encoded)
43 | return decoded
44 |
45 | def train_autoencoder(latent_dim, epochs, learning_rate):
46 | autoencoder = Autoencoder(input_dim=x_train.shape[1], latent_dim=latent_dim)
47 | opt = tf.keras.optimizers.Adam(learning_rate=learning_rate)
48 | autoencoder.compile(optimizer=opt, loss=losses.MeanSquaredError())
49 | history = autoencoder.fit(x_train, x_train, epochs=epochs, shuffle=True, verbose=0)
50 | return autoencoder, history
51 |
52 | model, history = train_autoencoder(latent_dim=latent_dim,
53 | epochs=epochs,
54 | learning_rate=learning_rate)
55 |
56 |
57 | preds = model.predict(x_train)
58 | inference_losses = tf.keras.metrics.mean_squared_error(preds, x_train.astype('float')).numpy()
59 |
60 | ranking = []
61 | for i, loss in zip(range(len(inference_losses)), inference_losses):
62 | fr = '\\'.join(df.iloc[i, :3])
63 |
64 | machine_id = df.iloc[i]['machine_id']
65 | if fr.endswith('.DLL'):
66 | ranking.append((loss, i, fr, machine_id))
67 |
68 | ranking = sorted(ranking, key=lambda x: -x[0])
69 | anomdf = pd.DataFrame(ranking, columns=['loss', 'source_index', 'file referenced', 'machine_id'])
70 | return anomdf[['file referenced', 'machine_id']]
71 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 | DS4N6 stands for Data Science Forensics.
12 |
13 | We also refer to this project as DSDFIR, AI4N6 or AIDFIR, since Data Science (DS) includes Artificial Intelligence (AI), and the project goes beyond the strictly Forensics, covering the whole Digital Forensics & Incident Response (DFIR) discipline (and sometimes even beyond). But hey, we had to give the project a catchy name!
14 |
15 | The Mission of the DS4N6 project is simple:
16 |
17 | ```
18 | Bringing Data Science & Artificial Intelligence
19 | to the fingertips of the average Forensicator,
20 | and promote advances in the field
21 | ```
22 |
23 | The first (modest) alpha version of our ds4n6 python library, together with some easy-to-use python scripts, was originally made public after the presentation at the SANS DFIR Summit US, July 16-17.
24 | **For detailed information about the Project, the Library, its Functions, its Usage, etc., visit the project page: http://www.ds4n6.io/tools/ds4n6.py.html**
25 |
26 | ## Getting Started
27 |
28 | These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system.
29 |
30 | https://github.com/ds4n6/ds4n6_lib.git
31 |
32 | ### Prerequisites
33 |
34 | The DS4N6 library works on the 3.x versions of the Python programming language. The module has external dependencies related to datascience and extraction of forensic evidence.
35 |
36 | Install requirements:
37 |
38 | - python-evtx
39 | - Evtx
40 | - ipyaggrid
41 | - IPython
42 | - ipywidgets
43 | - keras
44 | - matplotlib
45 | - nbformat
46 | - numpy
47 | - pandas
48 | - pyparsing
49 | - qgrid
50 | - ruamel.yaml
51 | - sklearn
52 | - tensorflow
53 | - tqdm
54 | - traitlets
55 | - xmltodict
56 | - networkx
57 | - gensim
58 |
59 | ### Installation
60 |
61 | The installation can be easily done through pip.
62 |
63 | #### pip installation
64 |
65 | ```sh
66 | pip install -r requirements.txt
67 | ```
68 |
69 | Finally, import in your python3 program or Jupyter Notebook as "ds".
70 |
71 | ```python
72 | import ds4n6_lib as ds
73 | ```
74 |
75 | ## Contributing
76 |
77 | If you think you can provide value to the Community, collaborating with Research, Blog Posts, Cheatsheets, Code, etc., contact us!
78 |
79 | Please read [CONTRIBUTING.md](https://gist.github.com/PurpleBooth/b24679402957c63ec426) for details on our code of conduct, and the process for submitting pull requests to us.
80 |
81 | ### download from github
82 |
83 | All you will need to do is to clone the library, install the test, create a virtual enviroment to use it and active it.
84 |
85 | ```sh
86 |
87 | git clone https://github.com/ds4n6/ds4n6_lib
88 |
89 | virtualenv -p python3.10 .test
90 | source .test/bin/activate
91 |
92 | pip install -r requirements.txt
93 | ```
94 |
95 | ## Authors
96 |
97 | * **Jess Garcia** - *Initial work* - http://ds4n6.io/community/jess_garcia.html
98 |
99 | See also the list of [contributors](http://ds4n6.io/community.html) who participated in this project.
100 |
101 | ## License
102 |
103 | This project is licensed under the GNU GPL v3.0 License - see the [LICENSE](LICENSE) file for details
104 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | When contributing to this repository, please first discuss the change you wish to make via issue,
4 | email, or any other method with the owners of this repository before making a change.
5 |
6 | Please note we have a code of conduct, please follow it in all your interactions with the project.
7 |
8 | ## Pull Request Process
9 |
10 | 1. Ensure any install or build dependencies are removed before the end of the layer when doing a
11 | build.
12 | 2. Update the README.md with details of changes to the interface, this includes new environment
13 | variables, exposed ports, useful file locations and container parameters.
14 | 3. Increase the version numbers in any examples files and the README.md to the new version that this
15 | Pull Request would represent.
16 | 4. You may merge the Pull Request in once you have the sign-off of two other developers, or if you
17 | do not have permission to do that, you may request the second reviewer to merge it for you.
18 |
19 | ## Code of Conduct
20 |
21 | ### Our Pledge
22 |
23 | In the interest of fostering an open and welcoming environment, we as
24 | contributors and maintainers pledge to making participation in our project and
25 | our community a harassment-free experience for everyone, regardless of age, body
26 | size, disability, ethnicity, gender identity and expression, level of experience,
27 | nationality, personal appearance, race, religion, or sexual identity and
28 | orientation.
29 |
30 | ### Our Standards
31 |
32 | Examples of behavior that contributes to creating a positive environment
33 | include:
34 |
35 | * Using welcoming and inclusive language
36 | * Being respectful of differing viewpoints and experiences
37 | * Gracefully accepting constructive criticism
38 | * Focusing on what is best for the community
39 | * Showing empathy towards other community members
40 |
41 | Examples of unacceptable behavior by participants include:
42 |
43 | * The use of sexualized language or imagery and unwelcome sexual attention or
44 | advances
45 | * Trolling, insulting/derogatory comments, and personal or political attacks
46 | * Public or private harassment
47 | * Publishing others' private information, such as a physical or electronic
48 | address, without explicit permission
49 | * Other conduct which could reasonably be considered inappropriate in a
50 | professional setting
51 |
52 | ### Our Responsibilities
53 |
54 | Project maintainers are responsible for clarifying the standards of acceptable
55 | behavior and are expected to take appropriate and fair corrective action in
56 | response to any instances of unacceptable behavior.
57 |
58 | Project maintainers have the right and responsibility to remove, edit, or
59 | reject comments, commits, code, wiki edits, issues, and other contributions
60 | that are not aligned to this Code of Conduct, or to ban temporarily or
61 | permanently any contributor for other behaviors that they deem inappropriate,
62 | threatening, offensive, or harmful.
63 |
64 | ### Scope
65 |
66 | This Code of Conduct applies both within project spaces and in public spaces
67 | when an individual is representing the project or its community. Examples of
68 | representing a project or community include using an official project e-mail
69 | address, posting via an official social media account, or acting as an appointed
70 | representative at an online or offline event. Representation of a project may be
71 | further defined and clarified by project maintainers.
72 |
73 | ### Enforcement
74 |
75 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
76 | reported by contacting the project team at ds4n6@one-esecurity.com. All
77 | complaints will be reviewed and investigated and will result in a response that
78 | is deemed necessary and appropriate to the circumstances. The project team is
79 | obligated to maintain confidentiality with regard to the reporter of an incident.
80 | Further details of specific enforcement policies may be posted separately.
81 |
82 | Project maintainers who do not follow or enforce the Code of Conduct in good
83 | faith may face temporary or permanent repercussions as determined by other
84 | members of the project's leadership.
85 |
86 | ### Attribution
87 |
88 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
89 | available at [http://contributor-covenant.org/version/1/4][version]
90 |
91 | [homepage]: http://contributor-covenant.org
92 | [version]: http://contributor-covenant.org/version/1/4/
93 |
--------------------------------------------------------------------------------
/src/ds4n6_lib/amcache.py:
--------------------------------------------------------------------------------
1 | # DS4N6
2 | #
3 | # Description: Library of functions to apply Data Science to forensics artifacts
4 | #
5 |
6 | ###############################################################################
7 | # INFO
8 | ###############################################################################
9 | # Recommended "import as": d4amch
10 |
11 | ###############################################################################
12 | # IMPORTS
13 | ###############################################################################
14 |
15 | # DEV IMPORTS ----------------------------------------------------------------
16 |
17 | # python IMPORTS --------------------------------------------------------------
18 | import os
19 | import glob
20 | import re
21 | import time
22 | import inspect
23 | import pickle
24 |
25 | # DS IMPORTS ------------------------------------------------------------------
26 | import numpy as np
27 | import pandas as pd
28 | import matplotlib.pyplot as plt
29 |
30 | # DS4N6 IMPORTS ---------------------------------------------------------------
31 | import ds4n6_lib.d4 as d4
32 | import ds4n6_lib.common as d4com
33 | import ds4n6_lib.gui as d4gui
34 | import ds4n6_lib.utils as d4utl
35 | import ds4n6_lib.unx as d4unx
36 |
37 | ###############################################################################
38 | # FUNCTIONS
39 | ###############################################################################
40 |
41 | # ANALYSIS FUNCTIONS ##########################################################
42 |
43 | # simple ======================================================================
44 | def simple_func(df, *args, **kwargs):
45 | """ Reformat the input df so the data is presented to the analyst in the
46 | friendliest possible way
47 |
48 | Parameters:
49 | df (pd.dataframe): Input data
50 |
51 | Returns:
52 | pd.DataFrame: Optionally it will return the filtered dataframe,
53 | only if ret=True is set, constant & hidden columns included
54 | If ret_out=True is set, then the output just as it is shown
55 | (without constant/hidden columns) will be return
56 | """
57 |
58 | # Artifact-specific argument parsing =======================================
59 |
60 | # Variables ================================================================
61 | hiddencols = ['SHA1_', 'FileReference_']
62 |
63 | dfout = df
64 |
65 | # Maximum number of lines in DF for beautification
66 | maxdfbprintlines = 20
67 |
68 | # Pre-Processing ==========================================================
69 |
70 | # Call to simple_common ===================================================
71 | dfout = d4com.simple_common(df, *args, **kwargs, hiddencols=hiddencols, maxdfbprintlines=maxdfbprintlines)
72 |
73 | # Post-Processing =========================================================
74 |
75 | # Return ==================================================================
76 | return dfout
77 |
78 | # analysis ====================================================================
79 | def analysis(obj, *args, **kwargs):
80 | """ Redirects execution to analysis_func()
81 | """
82 | return analysis_func(obj, *args, **kwargs)
83 |
84 | def analysis_func(obj, *args, **kwargs):
85 | """ Umbrella function that redirects to different types of analysis
86 | available on the input data
87 |
88 | Parameters:
89 | obj: Input data (typically DF or dict of DFs)
90 |
91 | Returns:
92 | pd.DataFrame: Refer to each specific analysis function
93 | """
94 |
95 | def syntax():
96 | print('Syntax: analysis(obj, "analysis_type")\n')
97 | d4list("str-help")
98 | return
99 |
100 | def d4list(objtype):
101 |
102 | # Analysis Modules Available for this objective
103 | anlav = False
104 | print("Available Amcache analysis types:")
105 | # if objtype == None or objtype == "str-help" or objtype == "str-list" or re.search("^dict-pandas_dataframe-XXXXX", objtype):
106 | # anlav = True
107 | # print("- XXXXX_files: No.events XXXXX file (Input: XXXdfs)")
108 |
109 | if anlav == False:
110 | print('- No analysis modules available for this object ('+objtype+').')
111 |
112 | nargs = len(args)
113 |
114 | if nargs == 0:
115 | syntax()
116 | return
117 |
118 | obj = args[0]
119 |
120 | objtype = d4com.data_identify(obj)
121 |
122 | if isinstance(obj, str):
123 | if obj == "list":
124 | d4list(objtype)
125 | return
126 | if obj == "help":
127 | syntax()
128 | return
129 |
130 | if nargs == 1:
131 | syntax()
132 | return
133 |
134 | anltype = args[1]
135 |
136 | if not isinstance(anltype, str):
137 | syntax()
138 | return
139 |
140 | if anltype == "help":
141 | syntax()
142 | return
143 | elif anltype == "list":
144 | d4list(objtype)
145 | return
146 |
147 | # ANALYSIS FUNCTIONS ======================================================
148 |
149 | # XXXdfs ------------------------------------------------------------------
150 | # if re.search("^dict-pandas_dataframe-XXXXX", objtype):
151 | # if anltype == "XXXXX_files":
152 | # return analysis_XXXXX_files(*args, **kwargs)
153 |
154 | print("INFO: [d4amch] No analysis functions available for this data type ("+objtype+")")
155 |
156 | # DATAFRAME ACCESSOR ##########################################################
157 |
158 | @pd.api.extensions.register_dataframe_accessor("d4amch")
159 | class Ds4n6AmchAccessor:
160 | def __init__(self, pandas_obj):
161 | self._obj = pandas_obj
162 |
163 | def simple(self, *args, **kwargs):
164 | """ Redirects execution to simple_func()
165 | """
166 | df = self._obj
167 | return simple_func(df, *args, **kwargs)
168 |
169 |
170 | @pd.api.extensions.register_dataframe_accessor("d4_amcache")
171 | class Ds4n6AmcacheAccessor:
172 | def __init__(self, pandas_obj):
173 | self._obj = pandas_obj
174 |
175 | def simple(self, *args, **kwargs):
176 | """ Redirects execution to simple_func()
177 | """
178 | df = self._obj
179 | return simple_func(df, *args, **kwargs)
180 |
--------------------------------------------------------------------------------
/src/ds4n6_lib/winreg.py:
--------------------------------------------------------------------------------
1 | # DS4N6
2 | #
3 | # Description: Library of functions to apply Data Science to forensics artifacts
4 | #
5 |
6 |
7 | ###############################################################################
8 | # INFO
9 | ###############################################################################
10 | # Recommended "import as": d4reg
11 |
12 | ###############################################################################
13 | # IMPORTS
14 | ###############################################################################
15 |
16 | # DEV IMPORTS ----------------------------------------------------------------
17 |
18 | # python IMPORTS --------------------------------------------------------------
19 | import os
20 | import glob
21 | import re
22 | import time
23 | import inspect
24 | import pickle
25 |
26 | # DS IMPORTS ------------------------------------------------------------------
27 | import numpy as np
28 | import pandas as pd
29 | import matplotlib.pyplot as plt
30 |
31 | # DS4N6 IMPORTS ---------------------------------------------------------------
32 | import ds4n6_lib.d4 as d4
33 | import ds4n6_lib.common as d4com
34 | import ds4n6_lib.gui as d4gui
35 | import ds4n6_lib.utils as d4utl
36 | import ds4n6_lib.unx as d4unx
37 |
38 | ###############################################################################
39 | # FUNCTIONS
40 | ###############################################################################
41 |
42 | # ANALYSIS FUNCTIONS ##########################################################
43 |
44 | # simple ======================================================================
45 | def simple_func(df, *args, **kwargs):
46 | """ Reformat the input df so the data is presented to the analyst in the
47 | friendliest possible way
48 |
49 | Parameters:
50 | df (pd.dataframe): Input data
51 |
52 | Returns:
53 | pd.DataFrame: Optionally it will return the filtered dataframe,
54 | only if ret=True is set, constant & hidden columns included
55 | If ret_out=True is set, then the output just as it is shown
56 | (without constant/hidden columns) will be return
57 | """
58 |
59 | # Artifact-specific argument parsing =======================================
60 |
61 | # Variables ================================================================
62 | hiddencols = ['KeyLastWriteTimestamp_', 'KeyPath_', 'KeyPath-Hash_']
63 |
64 | dfout = df
65 |
66 | # Maximum number of lines in DF for beautification
67 | maxdfbprintlines = 20
68 |
69 | # Pre-Processing ==========================================================
70 |
71 | # Call to simple_common ===================================================
72 | dfout = d4com.simple_common(df, *args, **kwargs, hiddencols=hiddencols, maxdfbprintlines=maxdfbprintlines)
73 |
74 | # Post-Processing =========================================================
75 |
76 | # Return ==================================================================
77 | return dfout
78 |
79 | # analysis ====================================================================
80 | def analysis(obj, *args, **kwargs):
81 | """ Redirects execution to analysis_func()
82 | """
83 | return analysis_func(obj, *args, **kwargs)
84 |
85 | def analysis_func(obj, *args, **kwargs):
86 | """ Umbrella function that redirects to different types of analysis
87 | available on the input data
88 |
89 | Parameters:
90 | obj: Input data (typically DF or dict of DFs)
91 |
92 | Returns:
93 | pd.DataFrame: Refer to each specific analysis function
94 | """
95 |
96 | def syntax():
97 | print('Syntax: analysis(obj, "analysis_type")\n')
98 | d4list("str-help")
99 | return
100 |
101 | def d4list(objtype):
102 |
103 | # Analysis Modules Available for this objective
104 | anlav = False
105 | print("Available winreg analysis types:")
106 | # if objtype == None or objtype == "str-help" or objtype == "str-list" or re.search("^dict-pandas_dataframe-winreg_kv", objtype):
107 | # anlav = True
108 | # print("- winreg_files: No.events winreg file (Input: winreg dfs)")
109 |
110 | if not anlav:
111 | print('- No analysis modules available for this object ('+objtype+').')
112 |
113 | nargs = len(args)
114 |
115 | if nargs == 0:
116 | syntax()
117 | return
118 |
119 | obj = args[0]
120 |
121 | objtype = d4com.data_identify(obj)
122 |
123 | if isinstance(obj, str):
124 | if obj == "list":
125 | d4list(objtype)
126 | return
127 | if obj == "help":
128 | syntax()
129 | return
130 |
131 | if nargs == 1:
132 | syntax()
133 | return
134 |
135 | anltype = args[1]
136 |
137 | if not isinstance(anltype, str):
138 | syntax()
139 | return
140 |
141 | if anltype == "help":
142 | syntax()
143 | return
144 | elif anltype == "list":
145 | d4list(objtype)
146 | return
147 |
148 | # ANALYSIS FUNCTIONS ======================================================
149 |
150 | # XXXdfs ------------------------------------------------------------------
151 | # if re.search("^dict-pandas_dataframe-XXXXX", objtype):
152 | # if anltype == "XXXXX_files":
153 | # return analysis_XXXXX_files(*args, **kwargs)
154 |
155 | print("INFO: [d4reg] No analysis functions available for this data type ("+objtype+")")
156 |
157 | # DATAFRAME ACCESSOR ##########################################################
158 |
159 | @pd.api.extensions.register_dataframe_accessor("d4reg")
160 | class Ds4n6RegAccessor:
161 | def __init__(self, pandas_obj):
162 | self._obj = pandas_obj
163 |
164 | def simple(self, *args, **kwargs):
165 | """ Redirects execution to simple_func()
166 | """
167 | df = self._obj
168 | return simple_func(df, *args, **kwargs)
169 |
170 |
171 | @pd.api.extensions.register_dataframe_accessor("d4_winreg")
172 | class Ds4n6WinRegAccessor:
173 | def __init__(self, pandas_obj):
174 | self._obj = pandas_obj
175 |
176 | def simple(self, *args, **kwargs):
177 | """ Redirects execution to simple_func()
178 | """
179 | df = self._obj
180 | return simple_func(df, *args, **kwargs)
181 |
--------------------------------------------------------------------------------
/src/ds4n6_lib/svclist.py:
--------------------------------------------------------------------------------
1 | # DS4N6
2 | #
3 | # Description: Library of functions to apply Data Science to forensics artifacts
4 | #
5 |
6 | ###############################################################################
7 | # INFO
8 | ###############################################################################
9 | # Recommended "import as": d4svclst
10 |
11 | ###############################################################################
12 | # IMPORTS
13 | ###############################################################################
14 |
15 | # DEV IMPORTS ----------------------------------------------------------------
16 |
17 | # python IMPORTS --------------------------------------------------------------
18 | import os
19 | import glob
20 | import re
21 | import time
22 | import inspect
23 | import pickle
24 |
25 | # DS IMPORTS ------------------------------------------------------------------
26 | import numpy as np
27 | import pandas as pd
28 | import matplotlib.pyplot as plt
29 |
30 | # DS4N6 IMPORTS ---------------------------------------------------------------
31 | import ds4n6_lib.d4 as d4
32 | import ds4n6_lib.common as d4com
33 | import ds4n6_lib.gui as d4gui
34 | import ds4n6_lib.utils as d4utl
35 | import ds4n6_lib.unx as d4unx
36 |
37 | ###############################################################################
38 | # FUNCTIONS
39 | ###############################################################################
40 |
41 | # ANALYSIS FUNCTIONS ##########################################################
42 |
43 | # simple ======================================================================
44 | def simple_func(df, *args, **kwargs):
45 | """ Reformat the input df so the data is presented to the analyst in the
46 | friendliest possible way
47 |
48 | Parameters:
49 | df (pd.dataframe): Input data
50 |
51 | Returns:
52 | pd.DataFrame: Optionally it will return the filtered dataframe,
53 | only if ret=True is set, constant & hidden columns included
54 | If ret_out=True is set, then the output just as it is shown
55 | (without constant/hidden columns) will be return
56 | """
57 |
58 | if d4.debug >= 3:
59 | print("DEBUG: [DBG"+str(d4.debug)+"] ["+str(os.path.basename(__file__))+"] ["+str(inspect.currentframe().f_code.co_name)+"()]")
60 |
61 | # Artifact-specific argument parsing =======================================
62 | hiddencolsuser = kwargs.get('hiddencols', [])
63 |
64 | # Variables ================================================================
65 | hiddencolsdef = []
66 |
67 | # Merge artifact hiddencols with user-specified hiddencols + update kwargs
68 | hiddencols = hiddencolsuser + hiddencolsdef
69 | kwargs['hiddencols'] = hiddencols
70 |
71 | dfout = df
72 |
73 | # Maximum number of lines in DF for beautification
74 | maxdfbprintlines = 20
75 |
76 | # Pre-Processing ==========================================================
77 |
78 | # Call to simple_common ===================================================
79 | dfout = d4com.simple_common(df, *args, **kwargs, maxdfbprintlines=maxdfbprintlines)
80 |
81 | # Post-Processing =========================================================
82 |
83 | # Return ==================================================================
84 | return dfout
85 |
86 | # analysis ====================================================================
87 | def analysis(obj, *args, **kwargs):
88 | """ Redirects execution to analysis_func()
89 | """
90 | return analysis_func(obj, *args, **kwargs)
91 |
92 | def analysis_func(obj, *args, **kwargs):
93 | """ Umbrella function that redirects to different types of analysis
94 | available on the input data
95 |
96 | Parameters:
97 | obj: Input data (typically DF or dict of DFs)
98 |
99 | Returns:
100 | pd.DataFrame: Refer to each specific analysis function
101 | """
102 |
103 | # SUB-FUNCTIONS ###########################################################
104 | def syntax():
105 | print('Syntax: analysis(obj, "analysis_type")\n')
106 | d4list("str-help")
107 | return
108 |
109 | def d4list(objtype):
110 |
111 | # Analysis Modules Available for this objective
112 | anlav = False
113 | print("Available XXXXX analysis types:")
114 | if objtype == None or objtype == "str-help" or objtype == "str-list" or re.search("^dict-pandas_dataframe-XXXXX", objtype):
115 | anlav = True
116 | print("- XXXXX_files: No.events XXXXX file (Input: XXXdfs)")
117 |
118 | if anlav == False:
119 | print('- No analysis modules available for this object ('+objtype+').')
120 |
121 | # FUNCTION BODY ###########################################################
122 | if d4.debug >= 3:
123 | print("DEBUG: [DBG"+str(d4.debug)+"] ["+str(os.path.basename(__file__))+"] ["+str(inspect.currentframe().f_code.co_name)+"()]")
124 |
125 | thisdatatype = "XXXXXXXX-THIS_DATA_TYPE"
126 |
127 | nargs = len(args)
128 |
129 | if nargs == 0:
130 | syntax()
131 | return
132 |
133 | obj = args[0]
134 |
135 | objtype = d4com.data_identify(obj)
136 |
137 | if isinstance(obj, str):
138 | if obj == "list":
139 | d4list(objtype)
140 | return
141 | if obj == "help":
142 | syntax()
143 | return
144 |
145 | if nargs == 1:
146 | if thisdatatype is not None:
147 | if re.search("^dict-pandas_dataframe-"+thisdatatype, objtype) or re.search("^pandas_dataframe-"+thisdatatype, objtype):
148 | d4list(objtype)
149 | else:
150 | syntax()
151 | else:
152 | syntax()
153 |
154 | return
155 |
156 | anltype = args[1]
157 |
158 | if not isinstance(anltype, str):
159 | syntax()
160 | return
161 |
162 | if anltype == "help":
163 | syntax()
164 | return
165 | elif anltype == "list":
166 | d4list(objtype)
167 | return
168 |
169 | # ANALYSIS FUNCTIONS ======================================================
170 |
171 | # XXXdfs ------------------------------------------------------------------
172 | if re.search("^dict-pandas_dataframe-XXXXX", objtype):
173 | if anltype == "XXXXX_files":
174 | return analysis_XXXXX_files(*args, **kwargs)
175 |
176 | print("INFO: [d4XXX] No analysis functions available for this data type ("+objtype+")")
177 |
178 | # DATAFRAME ACCESSOR ##########################################################
179 |
180 | @pd.api.extensions.register_dataframe_accessor("d4svclst")
181 | class Ds4n6SvcListAccessor:
182 | def __init__(self, pandas_obj):
183 | self._obj = pandas_obj
184 |
185 | def simple(self, *args, **kwargs):
186 | """ Redirects execution to simple_func()
187 | """
188 | df = self._obj
189 | return simple_func(df, *args, **kwargs)
190 |
191 |
192 | @pd.api.extensions.register_dataframe_accessor("d4_svclist")
193 | class Ds4n6SvcListAccessor:
194 | def __init__(self, pandas_obj):
195 | self._obj = pandas_obj
196 |
197 | def simple(self, *args, **kwargs):
198 | """ Redirects execution to simple_func()
199 | """
200 | df = self._obj
201 | return simple_func(df, *args, **kwargs)
202 |
--------------------------------------------------------------------------------
/src/ds4n6_lib/unx.py:
--------------------------------------------------------------------------------
1 | # DS4N6
2 | #
3 | # Description: library of functions to appy Data Science in several forensics
4 | # artifacts
5 | #
6 |
7 | ###############################################################################
8 | # IDEAS
9 | ###############################################################################
10 | # dfsed
11 | # multicol -> For a series or DF col, show in multiple cols to optimize screen
12 | # Equiv. to Linux: pr -l1 -t -3 /t
13 |
14 | ###############################################################################
15 | # INFO
16 | ###############################################################################
17 | # Recommended "import as": d4unx
18 |
19 | ###############################################################################
20 | # IMPORTS
21 | ###############################################################################
22 |
23 | # DEV IMPORTS ----------------------------------------------------------------
24 |
25 | # python IMPORTS --------------------------------------------------------------
26 | import re
27 | import inspect
28 |
29 | # DS IMPORTS ------------------------------------------------------------------
30 | import numpy as np
31 | import pandas as pd
32 |
33 | ###############################################################################
34 | # FUNCTIONS
35 | ###############################################################################
36 |
37 | def xgrep_func(*args, **kwargs):
38 |
39 | def syntax():
40 | print('Syntax: xgrep(