├── .github ├── CODEOWNERS ├── dependabot.yml ├── wordlist.txt └── workflows │ ├── bandit.yml │ ├── codeql-analysis.yml │ ├── linting.yml │ ├── pylint.yml │ └── spelling.yml ├── .gitignore ├── .pylintrc ├── CODE_OF_CONDUCT.md ├── Dockerfile ├── Falcon Data Replicator Sample ├── data_replicator_config.py └── data_replicator_sample_consumer.py ├── LICENSE ├── README.md ├── SECURITY.md ├── falcon_data_replicator.ini ├── falcon_data_replicator.py ├── fdr └── fdrconnector.py ├── ocsf ├── __init__.py └── ocsf.py ├── requirements.txt └── standalone └── falcon_data_replicator.py /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # These owners will be the default owners for everything in 2 | # the repo. Unless a later match takes precedence, 3 | # @global-owner1 and @global-owner2 will be requested for 4 | # review when someone opens a pull request. 5 | * @jshcodes 6 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: pip 9 | directory: "/" 10 | schedule: 11 | interval: weekly 12 | open-pull-requests-limit: 10 13 | - package-ecosystem: github-actions 14 | directory: "/" 15 | schedule: 16 | interval: monthly 17 | open-pull-requests-limit: 10 18 | -------------------------------------------------------------------------------- /.github/wordlist.txt: -------------------------------------------------------------------------------- 1 | CrowdStrike 2 | html 3 | http 4 | https 5 | www 6 | faq 7 | SQS 8 | ini 9 | py 10 | autogenerated 11 | boto 12 | botocore 13 | dateutil 14 | jmespath 15 | urllib 16 | config 17 | codebase 18 | socio 19 | sexualized 20 | CodeQL 21 | Snyk 22 | fastparquet 23 | filelock 24 | json 25 | numpy 26 | pyyaml 27 | txt 28 | OCSF 29 | -------------------------------------------------------------------------------- /.github/workflows/bandit.yml: -------------------------------------------------------------------------------- 1 | name: Bandit 2 | on: 3 | push: 4 | paths: 5 | - '**.py' 6 | branches: 7 | - main 8 | - 'ver_*' 9 | pull_request: 10 | paths: 11 | - '**.py' 12 | branches: 13 | - main 14 | - 'ver_*' 15 | 16 | jobs: 17 | analyze: 18 | runs-on: ubuntu-latest 19 | strategy: 20 | matrix: 21 | python-version: ['3.9'] 22 | steps: 23 | - uses: actions/checkout@v4 24 | - name: Set up Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v5 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | python -m pip install bandit 32 | pip install -r requirements.txt 33 | - name: Analyze stand-alone with bandit 34 | run: | 35 | bandit -r . 36 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ main ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ main ] 20 | schedule: 21 | - cron: '40 6 * * 6' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | 28 | strategy: 29 | fail-fast: false 30 | matrix: 31 | language: [ 'python' ] 32 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 33 | # Learn more: 34 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed 35 | 36 | steps: 37 | - name: Checkout repository 38 | uses: actions/checkout@v4 39 | 40 | # Initializes the CodeQL tools for scanning. 41 | - name: Initialize CodeQL 42 | uses: github/codeql-action/init@v3 43 | with: 44 | languages: ${{ matrix.language }} 45 | # If you wish to specify custom queries, you can do so here or in a config file. 46 | # By default, queries listed here will override any specified in a config file. 47 | # Prefix the list here with "+" to use these queries and those in the config file. 48 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 49 | 50 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 51 | # If this step fails, then you should remove it and run the build manually (see below) 52 | - name: Autobuild 53 | uses: github/codeql-action/autobuild@v3 54 | 55 | # ℹ️ Command-line programs to run using the OS shell. 56 | # 📚 https://git.io/JvXDl 57 | 58 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 59 | # and modify them (or add more) to build your code if your project 60 | # uses a compiled language 61 | 62 | #- run: | 63 | # make bootstrap 64 | # make release 65 | 66 | - name: Perform CodeQL Analysis 67 | uses: github/codeql-action/analyze@v3 68 | -------------------------------------------------------------------------------- /.github/workflows/linting.yml: -------------------------------------------------------------------------------- 1 | name: Flake8 2 | on: 3 | push: 4 | paths: 5 | - '**.py' 6 | branches: 7 | - main 8 | - 'ver_*' 9 | pull_request: 10 | paths: 11 | - '**.py' 12 | branches: 13 | - main 14 | - 'ver_*' 15 | 16 | jobs: 17 | analyze: 18 | runs-on: ubuntu-latest 19 | strategy: 20 | matrix: 21 | python-version: ['3.9'] 22 | steps: 23 | - uses: actions/checkout@v4 24 | - name: Set up Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v5 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | python -m pip install flake8 32 | pip install -r requirements.txt 33 | - name: Lint with flake8 34 | run: | 35 | # stop the build if there are Python syntax errors or undefined names 36 | flake8 standalone --count --select=E9,F63,F7,F82 --show-source --statistics 37 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 38 | flake8 . --count --max-line-length=127 --statistics 39 | -------------------------------------------------------------------------------- /.github/workflows/pylint.yml: -------------------------------------------------------------------------------- 1 | name: Python Lint 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - '**.py' 9 | pull_request: 10 | branches: 11 | - main 12 | paths: 13 | - '**.py' 14 | 15 | jobs: 16 | build: 17 | runs-on: ubuntu-latest 18 | strategy: 19 | matrix: 20 | python-version: ['3.9'] 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | - name: Set up Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v5 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install pylint 31 | - name: Install package dependencies 32 | run: | 33 | python -m pip install -r requirements.txt 34 | - name: Lint main with pylint 35 | run: | 36 | pylint *.py 37 | - name: Lint ocsf with pylint 38 | run: | 39 | pylint ocsf 40 | - name: Lint fdr with pylint 41 | run: | 42 | pylint fdr 43 | -------------------------------------------------------------------------------- /.github/workflows/spelling.yml: -------------------------------------------------------------------------------- 1 | name: Spell Check 2 | on: 3 | pull_request: 4 | paths: 5 | - '**.md' 6 | push: 7 | paths: 8 | - '**.md' 9 | jobs: 10 | spelling: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - name: Check Spelling 15 | uses: SFLScientific/spellcheck-github-actions@master 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | ext/ 132 | downloaded/ 133 | ocsf/mappings 134 | 135 | .idea 136 | .DS_Store 137 | 138 | # files for local testing 139 | utils/ 140 | falcon_data_replicator_local.ini -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MAIN] 2 | 3 | # Analyse import fallback blocks. This can be used to support both Python 2 and 4 | # 3 compatible code, which means that the block might have code that exists 5 | # only in one or another interpreter, leading to false positives when analysed. 6 | analyse-fallback-blocks=no 7 | 8 | # Load and enable all available extensions. Use --list-extensions to see a list 9 | # all available extensions. 10 | #enable-all-extensions= 11 | 12 | # In error mode, checkers without error messages are disabled and for others, 13 | # only the ERROR messages are displayed, and no reports are done by default. 14 | #errors-only= 15 | 16 | # Always return a 0 (non-error) status code, even if lint errors are found. 17 | # This is primarily useful in continuous integration scripts. 18 | #exit-zero= 19 | 20 | # A comma-separated list of package or module names from where C extensions may 21 | # be loaded. Extensions are loading into the active Python interpreter and may 22 | # run arbitrary code. 23 | extension-pkg-allow-list= 24 | 25 | # A comma-separated list of package or module names from where C extensions may 26 | # be loaded. Extensions are loading into the active Python interpreter and may 27 | # run arbitrary code. (This is an alternative name to extension-pkg-allow-list 28 | # for backward compatibility.) 29 | extension-pkg-whitelist= 30 | 31 | # Return non-zero exit code if any of these messages/categories are detected, 32 | # even if score is above --fail-under value. Syntax same as enable. Messages 33 | # specified are enabled, while categories only check already-enabled messages. 34 | fail-on= 35 | 36 | # Specify a score threshold to be exceeded before program exits with error. 37 | fail-under=10 38 | 39 | # Interpret the stdin as a python script, whose filename needs to be passed as 40 | # the module_or_package argument. 41 | #from-stdin= 42 | 43 | # Files or directories to be skipped. They should be base names, not paths. 44 | ignore=CVS 45 | 46 | # Add files or directories matching the regex patterns to the ignore-list. The 47 | # regex matches against paths and can be in Posix or Windows format. 48 | ignore-paths= 49 | 50 | # Files or directories matching the regex patterns are skipped. The regex 51 | # matches against base names, not paths. The default value ignores Emacs file 52 | # locks 53 | ignore-patterns=^\.# 54 | 55 | # List of module names for which member attributes should not be checked 56 | # (useful for modules/projects where namespaces are manipulated during runtime 57 | # and thus existing member attributes cannot be deduced by static analysis). It 58 | # supports qualified module names, as well as Unix pattern matching. 59 | ignored-modules= 60 | 61 | # Python code to execute, usually for sys.path manipulation such as 62 | # pygtk.require(). 63 | #init-hook= 64 | 65 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the 66 | # number of processors available to use. 67 | jobs=1 68 | 69 | # Control the amount of potential inferred values when inferring a single 70 | # object. This can help the performance when dealing with large functions or 71 | # complex, nested conditions. 72 | limit-inference-results=100 73 | 74 | # List of plugins (as comma separated values of python module names) to load, 75 | # usually to register additional checkers. 76 | load-plugins= 77 | 78 | # Pickle collected data for later comparisons. 79 | persistent=yes 80 | 81 | # Minimum Python version to use for version dependent checks. Will default to 82 | # the version used to run pylint. 83 | py-version=3.9 84 | 85 | # Discover python modules and packages in the file system subtree. 86 | recursive=no 87 | 88 | # When enabled, pylint would attempt to guess common misconfiguration and emit 89 | # user-friendly hints instead of false-positive error messages. 90 | suggestion-mode=yes 91 | 92 | # Allow loading of arbitrary C extensions. Extensions are imported into the 93 | # active Python interpreter and may run arbitrary code. 94 | unsafe-load-any-extension=no 95 | 96 | # In verbose mode, extra non-checker-related info will be displayed. 97 | #verbose= 98 | 99 | 100 | [REPORTS] 101 | 102 | # Python expression which should return a score less than or equal to 10. You 103 | # have access to the variables 'fatal', 'error', 'warning', 'refactor', 104 | # 'convention', and 'info' which contain the number of messages in each 105 | # category, as well as 'statement' which is the total number of statements 106 | # analyzed. This score is used by the global evaluation report (RP0004). 107 | evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) 108 | 109 | # Template used to display messages. This is a python new-style format string 110 | # used to format the message information. See doc for all details. 111 | msg-template= 112 | 113 | # Set the output format. Available formats are text, parseable, colorized, json 114 | # and msvs (visual studio). You can also give a reporter class, e.g. 115 | # mypackage.mymodule.MyReporterClass. 116 | #output-format= 117 | 118 | # Tells whether to display a full report or only the messages. 119 | reports=no 120 | 121 | # Activate the evaluation score. 122 | score=yes 123 | 124 | 125 | [MESSAGES CONTROL] 126 | 127 | # Only show warnings with the listed confidence levels. Leave empty to show 128 | # all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, 129 | # UNDEFINED. 130 | confidence=HIGH, 131 | CONTROL_FLOW, 132 | INFERENCE, 133 | INFERENCE_FAILURE, 134 | UNDEFINED 135 | 136 | # Disable the message, report, category or checker with the given id(s). You 137 | # can either give multiple identifiers separated by comma (,) or put this 138 | # option multiple times (only on the command line, not in the configuration 139 | # file where it should appear only once). You can also use "--disable=all" to 140 | # disable everything first and then re-enable specific checks. For example, if 141 | # you want to run only the similarities checker, you can use "--disable=all 142 | # --enable=similarities". If you want to run only the classes checker, but have 143 | # no Warning level messages displayed, use "--disable=all --enable=classes 144 | # --disable=W". 145 | disable=raw-checker-failed, 146 | bad-inline-option, 147 | locally-disabled, 148 | file-ignored, 149 | suppressed-message, 150 | useless-suppression, 151 | deprecated-pragma, 152 | use-symbolic-message-instead 153 | 154 | # Enable the message, report, category or checker with the given id(s). You can 155 | # either give multiple identifier separated by comma (,) or put this option 156 | # multiple time (only on the command line, not in the configuration file where 157 | # it should appear only once). See also the "--disable" option for examples. 158 | enable=c-extension-no-member 159 | 160 | 161 | [LOGGING] 162 | 163 | # The type of string formatting that logging methods do. `old` means using % 164 | # formatting, `new` is for `{}` formatting. 165 | logging-format-style=old 166 | 167 | # Logging modules to check that the string format arguments are in logging 168 | # function parameter format. 169 | logging-modules=logging 170 | 171 | 172 | [SPELLING] 173 | 174 | # Limits count of emitted suggestions for spelling mistakes. 175 | max-spelling-suggestions=4 176 | 177 | # Spelling dictionary name. Available dictionaries: none. To make it work, 178 | # install the 'python-enchant' package. 179 | spelling-dict= 180 | 181 | # List of comma separated words that should be considered directives if they 182 | # appear at the beginning of a comment and should not be checked. 183 | spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: 184 | 185 | # List of comma separated words that should not be checked. 186 | spelling-ignore-words= 187 | 188 | # A path to a file that contains the private dictionary; one word per line. 189 | spelling-private-dict-file= 190 | 191 | # Tells whether to store unknown words to the private dictionary (see the 192 | # --spelling-private-dict-file option) instead of raising a message. 193 | spelling-store-unknown-words=no 194 | 195 | 196 | [MISCELLANEOUS] 197 | 198 | # List of note tags to take in consideration, separated by a comma. 199 | notes=FIXME, 200 | XXX, 201 | TODO 202 | 203 | # Regular expression of note tags to take in consideration. 204 | notes-rgx= 205 | 206 | 207 | [TYPECHECK] 208 | 209 | # List of decorators that produce context managers, such as 210 | # contextlib.contextmanager. Add to this list to register other decorators that 211 | # produce valid context managers. 212 | contextmanager-decorators=contextlib.contextmanager 213 | 214 | # List of members which are set dynamically and missed by pylint inference 215 | # system, and so shouldn't trigger E1101 when accessed. Python regular 216 | # expressions are accepted. 217 | generated-members= 218 | 219 | # Tells whether to warn about missing members when the owner of the attribute 220 | # is inferred to be None. 221 | ignore-none=yes 222 | 223 | # This flag controls whether pylint should warn about no-member and similar 224 | # checks whenever an opaque object is returned when inferring. The inference 225 | # can return multiple potential results while evaluating a Python object, but 226 | # some branches might not be evaluated, which results in partial inference. In 227 | # that case, it might be useful to still emit no-member and other checks for 228 | # the rest of the inferred objects. 229 | ignore-on-opaque-inference=yes 230 | 231 | # List of symbolic message names to ignore for Mixin members. 232 | ignored-checks-for-mixins=no-member, 233 | not-async-context-manager, 234 | not-context-manager, 235 | attribute-defined-outside-init 236 | 237 | # List of class names for which member attributes should not be checked (useful 238 | # for classes with dynamically set attributes). This supports the use of 239 | # qualified names. 240 | ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace 241 | 242 | # Show a hint with possible names when a member name was not found. The aspect 243 | # of finding the hint is based on edit distance. 244 | missing-member-hint=yes 245 | 246 | # The minimum edit distance a name should have in order to be considered a 247 | # similar match for a missing member name. 248 | missing-member-hint-distance=1 249 | 250 | # The total number of similar names that should be taken in consideration when 251 | # showing a hint for a missing member. 252 | missing-member-max-choices=1 253 | 254 | # Regex pattern to define which classes are considered mixins. 255 | mixin-class-rgx=.*[Mm]ixin 256 | 257 | # List of decorators that change the signature of a decorated function. 258 | signature-mutators= 259 | 260 | 261 | [CLASSES] 262 | 263 | # Warn about protected attribute access inside special methods 264 | check-protected-access-in-special-methods=no 265 | 266 | # List of method names used to declare (i.e. assign) instance attributes. 267 | defining-attr-methods=__init__, 268 | __new__, 269 | setUp, 270 | __post_init__ 271 | 272 | # List of member names, which should be excluded from the protected access 273 | # warning. 274 | exclude-protected=_asdict, 275 | _fields, 276 | _replace, 277 | _source, 278 | _make 279 | 280 | # List of valid names for the first argument in a class method. 281 | valid-classmethod-first-arg=cls 282 | 283 | # List of valid names for the first argument in a metaclass class method. 284 | valid-metaclass-classmethod-first-arg=cls 285 | 286 | 287 | [VARIABLES] 288 | 289 | # List of additional names supposed to be defined in builtins. Remember that 290 | # you should avoid defining new builtins when possible. 291 | additional-builtins= 292 | 293 | # Tells whether unused global variables should be treated as a violation. 294 | allow-global-unused-variables=yes 295 | 296 | # List of names allowed to shadow builtins 297 | allowed-redefined-builtins= 298 | 299 | # List of strings which can identify a callback function by name. A callback 300 | # name must start or end with one of those strings. 301 | callbacks=cb_, 302 | _cb 303 | 304 | # A regular expression matching the name of dummy variables (i.e. expected to 305 | # not be used). 306 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ 307 | 308 | # Argument names that match this expression will be ignored. Default to name 309 | # with leading underscore. 310 | ignored-argument-names=_.*|^ignored_|^unused_ 311 | 312 | # Tells whether we should check for unused import in __init__ files. 313 | init-import=no 314 | 315 | # List of qualified module names which can have objects that can redefine 316 | # builtins. 317 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io 318 | 319 | 320 | [FORMAT] 321 | 322 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 323 | expected-line-ending-format= 324 | 325 | # Regexp for a line that is allowed to be longer than the limit. 326 | ignore-long-lines=^\s*(# )??$ 327 | 328 | # Number of spaces of indent required inside a hanging or continued line. 329 | indent-after-paren=4 330 | 331 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 332 | # tab). 333 | indent-string=' ' 334 | 335 | # Maximum number of characters on a single line. 336 | max-line-length=127 337 | 338 | # Maximum number of lines in a module. 339 | max-module-lines=1000 340 | 341 | # Allow the body of a class to be on the same line as the declaration if body 342 | # contains single statement. 343 | single-line-class-stmt=no 344 | 345 | # Allow the body of an if to be on the same line as the test if there is no 346 | # else. 347 | single-line-if-stmt=no 348 | 349 | 350 | [IMPORTS] 351 | 352 | # List of modules that can be imported at any level, not just the top level 353 | # one. 354 | allow-any-import-level= 355 | 356 | # Allow wildcard imports from modules that define __all__. 357 | allow-wildcard-with-all=no 358 | 359 | # Deprecated modules which should not be used, separated by a comma. 360 | deprecated-modules= 361 | 362 | # Output a graph (.gv or any supported image format) of external dependencies 363 | # to the given file (report RP0402 must not be disabled). 364 | ext-import-graph= 365 | 366 | # Output a graph (.gv or any supported image format) of all (i.e. internal and 367 | # external) dependencies to the given file (report RP0402 must not be 368 | # disabled). 369 | import-graph= 370 | 371 | # Output a graph (.gv or any supported image format) of internal dependencies 372 | # to the given file (report RP0402 must not be disabled). 373 | int-import-graph= 374 | 375 | # Force import order to recognize a module as part of the standard 376 | # compatibility libraries. 377 | known-standard-library= 378 | 379 | # Force import order to recognize a module as part of a third party library. 380 | known-third-party=enchant 381 | 382 | # Couples of modules and preferred modules, separated by a comma. 383 | preferred-modules= 384 | 385 | 386 | [EXCEPTIONS] 387 | 388 | # Exceptions that will emit a warning when caught. 389 | overgeneral-exceptions=BaseException, 390 | Exception 391 | 392 | 393 | [REFACTORING] 394 | 395 | # Maximum number of nested blocks for function / method body 396 | max-nested-blocks=6 397 | 398 | # Complete name of functions that never returns. When checking for 399 | # inconsistent-return-statements if a never returning function is called then 400 | # it will be considered as an explicit return statement and no message will be 401 | # printed. 402 | never-returning-functions=sys.exit,argparse.parse_error 403 | 404 | 405 | [SIMILARITIES] 406 | 407 | # Comments are removed from the similarity computation 408 | ignore-comments=yes 409 | 410 | # Docstrings are removed from the similarity computation 411 | ignore-docstrings=yes 412 | 413 | # Imports are removed from the similarity computation 414 | ignore-imports=yes 415 | 416 | # Signatures are removed from the similarity computation 417 | ignore-signatures=yes 418 | 419 | # Minimum lines number of a similarity. 420 | min-similarity-lines=4 421 | 422 | 423 | [DESIGN] 424 | 425 | # List of regular expressions of class ancestor names to ignore when counting 426 | # public methods (see R0903) 427 | exclude-too-few-public-methods= 428 | 429 | # List of qualified class names to ignore when counting class parents (see 430 | # R0901) 431 | ignored-parents= 432 | 433 | # Maximum number of arguments for function / method. 434 | max-args=5 435 | 436 | # Maximum number of attributes for a class (see R0902). 437 | max-attributes=7 438 | 439 | # Maximum number of boolean expressions in an if statement (see R0916). 440 | max-bool-expr=5 441 | 442 | # Maximum number of branch for function / method body. 443 | max-branches=15 444 | 445 | # Maximum number of locals for function / method body. 446 | max-locals=35 447 | 448 | # Maximum number of parents for a class (see R0901). 449 | max-parents=7 450 | 451 | # Maximum number of public methods for a class (see R0904). 452 | max-public-methods=20 453 | 454 | # Maximum number of return / yield for function / method body. 455 | max-returns=6 456 | 457 | # Maximum number of statements in function / method body. 458 | max-statements=50 459 | 460 | # Minimum number of public methods for a class (see R0903). 461 | min-public-methods=2 462 | 463 | 464 | [STRING] 465 | 466 | # This flag controls whether inconsistent-quotes generates a warning when the 467 | # character used as a quote delimiter is used inconsistently within a module. 468 | check-quote-consistency=no 469 | 470 | # This flag controls whether the implicit-str-concat should generate a warning 471 | # on implicit string concatenation in sequences defined over several lines. 472 | check-str-concat-over-line-jumps=no 473 | 474 | 475 | [BASIC] 476 | 477 | # Naming style matching correct argument names. 478 | argument-naming-style=snake_case 479 | 480 | # Regular expression matching correct argument names. Overrides argument- 481 | # naming-style. If left empty, argument names will be checked with the set 482 | # naming style. 483 | #argument-rgx= 484 | 485 | # Naming style matching correct attribute names. 486 | attr-naming-style=snake_case 487 | 488 | # Regular expression matching correct attribute names. Overrides attr-naming- 489 | # style. If left empty, attribute names will be checked with the set naming 490 | # style. 491 | #attr-rgx= 492 | 493 | # Bad variable names which should always be refused, separated by a comma. 494 | bad-names=foo, 495 | bar, 496 | baz, 497 | toto, 498 | tutu, 499 | tata 500 | 501 | # Bad variable names regexes, separated by a comma. If names match any regex, 502 | # they will always be refused 503 | bad-names-rgxs= 504 | 505 | # Naming style matching correct class attribute names. 506 | class-attribute-naming-style=any 507 | 508 | # Regular expression matching correct class attribute names. Overrides class- 509 | # attribute-naming-style. If left empty, class attribute names will be checked 510 | # with the set naming style. 511 | #class-attribute-rgx= 512 | 513 | # Naming style matching correct class constant names. 514 | class-const-naming-style=UPPER_CASE 515 | 516 | # Regular expression matching correct class constant names. Overrides class- 517 | # const-naming-style. If left empty, class constant names will be checked with 518 | # the set naming style. 519 | #class-const-rgx= 520 | 521 | # Naming style matching correct class names. 522 | class-naming-style=PascalCase 523 | 524 | # Regular expression matching correct class names. Overrides class-naming- 525 | # style. If left empty, class names will be checked with the set naming style. 526 | #class-rgx= 527 | 528 | # Naming style matching correct constant names. 529 | const-naming-style=UPPER_CASE 530 | 531 | # Regular expression matching correct constant names. Overrides const-naming- 532 | # style. If left empty, constant names will be checked with the set naming 533 | # style. 534 | #const-rgx= 535 | 536 | # Minimum line length for functions/classes that require docstrings, shorter 537 | # ones are exempt. 538 | docstring-min-length=-1 539 | 540 | # Naming style matching correct function names. 541 | function-naming-style=snake_case 542 | 543 | # Regular expression matching correct function names. Overrides function- 544 | # naming-style. If left empty, function names will be checked with the set 545 | # naming style. 546 | #function-rgx= 547 | 548 | # Good variable names which should always be accepted, separated by a comma. 549 | good-names=i, 550 | j, 551 | k, 552 | ex, 553 | Run, 554 | _ 555 | 556 | # Good variable names regexes, separated by a comma. If names match any regex, 557 | # they will always be accepted 558 | good-names-rgxs= 559 | 560 | # Include a hint for the correct naming format with invalid-name. 561 | include-naming-hint=no 562 | 563 | # Naming style matching correct inline iteration names. 564 | inlinevar-naming-style=any 565 | 566 | # Regular expression matching correct inline iteration names. Overrides 567 | # inlinevar-naming-style. If left empty, inline iteration names will be checked 568 | # with the set naming style. 569 | #inlinevar-rgx= 570 | 571 | # Naming style matching correct method names. 572 | method-naming-style=snake_case 573 | 574 | # Regular expression matching correct method names. Overrides method-naming- 575 | # style. If left empty, method names will be checked with the set naming style. 576 | #method-rgx= 577 | 578 | # Naming style matching correct module names. 579 | module-naming-style=snake_case 580 | 581 | # Regular expression matching correct module names. Overrides module-naming- 582 | # style. If left empty, module names will be checked with the set naming style. 583 | #module-rgx= 584 | 585 | # Colon-delimited sets of names that determine each other's naming style when 586 | # the name regexes allow several styles. 587 | name-group= 588 | 589 | # Regular expression which should only match function or class names that do 590 | # not require a docstring. 591 | no-docstring-rgx=^_ 592 | 593 | # List of decorators that produce properties, such as abc.abstractproperty. Add 594 | # to this list to register other decorators that produce valid properties. 595 | # These decorators are taken in consideration only for invalid-name. 596 | property-classes=abc.abstractproperty 597 | 598 | # Regular expression matching correct type variable names. If left empty, type 599 | # variable names will be checked with the set naming style. 600 | #typevar-rgx= 601 | 602 | # Naming style matching correct variable names. 603 | variable-naming-style=snake_case 604 | 605 | # Regular expression matching correct variable names. Overrides variable- 606 | # naming-style. If left empty, variable names will be checked with the set 607 | # naming style. 608 | #variable-rgx= 609 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Falcon Data Replicator Community Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | CrowdStrike. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM docker.io/python:3-slim-buster 2 | 3 | RUN : \ 4 | && apt-get update \ 5 | && DEBIAN_FRONTEND=noninteractive apt-get upgrade --no-install-recommends --assume-yes \ 6 | && rm -rf /var/lib/apt/lists/* 7 | 8 | RUN useradd --create-home --home-dir /fdr fdruser 9 | USER fdruser 10 | WORKDIR /fdr 11 | 12 | COPY requirements.txt . 13 | RUN pip install -r ./requirements.txt 14 | 15 | COPY . . 16 | 17 | ENTRYPOINT [ "python3", "-m" , "falcon_data_replicator"] -------------------------------------------------------------------------------- /Falcon Data Replicator Sample/data_replicator_config.py: -------------------------------------------------------------------------------- 1 | # AWS security credentials 2 | AWS_KEY = "" 3 | 4 | AWS_SECRET = "" 5 | 6 | # URL of SQS queue. 7 | QUEUE_URL = "" 8 | 9 | # Root directory to download files from S3 to. 10 | OUTPUT_PATH = "" 11 | 12 | # Time in seconds before a message is added back to the SQS queue if not deleted. Ensure this is large enough for you 13 | # to safely finish processing any downloaded files. 14 | VISIBILITY_TIMEOUT = 300 15 | 16 | # name of the aws region 17 | REGION_NAME = "" 18 | -------------------------------------------------------------------------------- /Falcon Data Replicator Sample/data_replicator_sample_consumer.py: -------------------------------------------------------------------------------- 1 | import data_replicator_config 2 | import json 3 | import os 4 | import time 5 | 6 | try: 7 | import boto3 8 | except ImportError as err: 9 | print(err) 10 | print( 11 | 'boto3 is required to run data_replicator_sample_consumer. Please "pip install boto3"!' 12 | ) 13 | 14 | ################################################################################################### 15 | # NOTE: See Falcon Data Replicator instructions for details on how to use this sample consumer. # 16 | ################################################################################################### 17 | 18 | AWS_KEY = data_replicator_config.AWS_KEY 19 | AWS_SECRET = data_replicator_config.AWS_SECRET 20 | QUEUE_URL = data_replicator_config.QUEUE_URL 21 | OUTPUT_PATH = os.path.realpath(data_replicator_config.OUTPUT_PATH) 22 | VISIBILITY_TIMEOUT = data_replicator_config.VISIBILITY_TIMEOUT 23 | REGION_NAME = data_replicator_config.REGION_NAME 24 | 25 | sqs = boto3.resource( 26 | "sqs", 27 | region_name=REGION_NAME, 28 | aws_access_key_id=AWS_KEY, 29 | aws_secret_access_key=AWS_SECRET, 30 | ) 31 | s3 = boto3.client( 32 | "s3", 33 | region_name=REGION_NAME, 34 | aws_access_key_id=AWS_KEY, 35 | aws_secret_access_key=AWS_SECRET, 36 | ) 37 | queue = sqs.Queue(url=QUEUE_URL) 38 | 39 | 40 | def handle_file(path): 41 | """PUT CUSTOM LOGIC FOR HANDLING FILES HERE""" 42 | print("Downloaded file to path %s" % path) 43 | 44 | 45 | def download_message_files(msg): 46 | """Downloads the files from s3 referenced in msg and places them in OUTPUT_PATH. 47 | 48 | download_message_files function will iterate through every file listed at msg['filePaths'], 49 | move it to a local path with name "{OUTPUT_PATH}/{s3_path}", 50 | and then call handle_file(path). 51 | """ 52 | 53 | # Construct output path for this message's files 54 | msg_output_path = os.path.realpath(os.path.join(OUTPUT_PATH, msg["pathPrefix"])) 55 | # Only write files to the specified output_path 56 | if os.path.commonpath([OUTPUT_PATH, msg_output_path]) != OUTPUT_PATH: 57 | print( 58 | f"Skipping {msg_output_path} to prevent writes outside of output path: {OUTPUT_PATH}" 59 | ) 60 | return 61 | 62 | # Ensure directory exists at output path 63 | if not os.path.exists(msg_output_path): 64 | os.makedirs(msg_output_path) 65 | 66 | for s3_file in msg["files"]: 67 | try: 68 | s3_path = s3_file["path"] 69 | local_path = os.path.realpath(os.path.join(OUTPUT_PATH, s3_path)) 70 | # only write files to the specified output path 71 | if os.path.commonpath([OUTPUT_PATH, local_path]) != OUTPUT_PATH: 72 | print( 73 | f"Skipping {local_path} to prevent writes outside of output path: {OUTPUT_PATH}" 74 | ) 75 | continue 76 | 77 | # Handle FDR platform and time partitioned folders 78 | if not os.path.exists(os.path.dirname(local_path)): 79 | os.makedirs(os.path.dirname(local_path)) 80 | 81 | # Copy one file from s3 to local 82 | s3.download_file(msg["bucket"], s3_path, local_path) 83 | # Do something with file 84 | handle_file(local_path) 85 | except Exception as e: 86 | print(f"Error downloading file {s3_file['path']}: {e}") 87 | print( 88 | "\nIf you're unsure how to handle this error, open an issue on Github: https://github.com/CrowdStrike/FDR/issues or contact support.\n" 89 | ) 90 | exit(1) 91 | 92 | 93 | def consume_data_replicator(): 94 | """Consume from data replicator and track number of messages/files/bytes downloaded.""" 95 | 96 | sleep_time = 1 97 | msg_cnt = 0 98 | file_cnt = 0 99 | byte_cnt = 0 100 | 101 | while True: # We want to continuously poll the queue for new messages. 102 | # Receive messages from queue if any exist (NOTE: receive_messages() only receives a few messages at a 103 | # time, it does NOT exhaust the queue) 104 | for msg in queue.receive_messages(VisibilityTimeout=VISIBILITY_TIMEOUT): 105 | msg_cnt += 1 106 | body = json.loads(msg.body) # grab the actual message body 107 | download_message_files(body) 108 | file_cnt += body["fileCount"] 109 | byte_cnt += body["totalSize"] 110 | # msg.delete() must be called or the message will be returned to the SQS queue after 111 | # VISIBILITY_TIMEOUT seconds 112 | msg.delete() 113 | time.sleep(sleep_time) 114 | 115 | print( 116 | "Messages consumed: %i\tFile count: %i\tByte count: %i" 117 | % (msg_cnt, file_cnt, byte_cnt) 118 | ) 119 | 120 | 121 | if __name__ == "__main__": 122 | consume_data_replicator() 123 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![CrowdStrike Falcon](https://raw.githubusercontent.com/CrowdStrike/falconpy/main/docs/asset/cs-logo.png)
[![Twitter URL](https://img.shields.io/twitter/url?label=Follow%20%40CrowdStrike&style=social&url=https%3A%2F%2Ftwitter.com%2FCrowdStrike)](https://twitter.com/CrowdStrike)
2 | # Falcon Data Replicator 3 | [![Bandit](https://github.com/CrowdStrike/FDR/actions/workflows/bandit.yml/badge.svg)](https://github.com/CrowdStrike/FDR/actions/workflows/bandit.yml) 4 | [![Flake8](https://github.com/CrowdStrike/FDR/actions/workflows/linting.yml/badge.svg)](https://github.com/CrowdStrike/FDR/actions/workflows/linting.yml) 5 | [![Python Lint](https://github.com/CrowdStrike/FDR/actions/workflows/pylint.yml/badge.svg)](https://github.com/CrowdStrike/FDR/actions/workflows/pylint.yml) 6 | [![CodeQL](https://github.com/CrowdStrike/FDR/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/CrowdStrike/FDR/actions/workflows/codeql-analysis.yml) 7 | 8 | The Falcon Data Replicator replicates log data from your CrowdStrike environment to a stand-alone target. This target can be a location on the file system, 9 | or a cloud storage bucket. 10 | > Currently AWS is the only cloud provider implemented. 11 | ## Requirements 12 | + Python 3.6+ 13 | + boto3 14 | + CrowdStrike Falcon FDR credentials 15 | + CrowdStrike Falcon FDR SQS queue URL 16 | ## Stand-alone solution 17 | + [falcon_data_replicator.ini](https://github.com/CrowdStrike/FDR/blob/main/falcon_data_replicator.ini) - Configuration file 18 | + [standalone/falcon_data_replicator.py](https://github.com/CrowdStrike/FDR/blob/main/standalone/falcon_data_replicator.py) - Stand-alone solution application file 19 | ### Configuration 20 | The `falcon_data_replicator.ini` file contains all of the parameters necessary to configure the 21 | solution for replication to the local file system and / or a storage bucket in AWS S3. After 22 | retrieving the AWS credentials and SQS queue details from your Falcon console, edit this file 23 | to reflect your environment. 24 | #### Required parameters 25 | The following parameters must be provided in order for the solution to operate. 26 | + `AWS_KEY` - AWS client ID provided to you by the CrowdStrike Falcon console 27 | + `AWS_SECRET` - AWS client secret provided to you by the CrowdStrike Falcon console 28 | + `QUEUE_URL` - AWS SQS queue URL provided to you by the CrowdStrike Falcon console 29 | + `OUTPUT_PATH` - File path where downloaded files will be stored, not used for in-memory transfers 30 | + `VISIBILITY_TIMEOUT` - Time in seconds before a message is returned back to the SQS queue 31 | + `REGION_NAME` - The name of the AWS region where your CrowdStrike SQS queue resides 32 | + `MESSAGE_DELAY` - The time in seconds to wait in between the processing of each message 33 | + `QUEUE_DELAY` - The time in seconds to wait before each check of the queue for more messages 34 | + `LOG_FILE` - The name and path of the the log file 35 | #### Destination parameters 36 | The following parameters configure our destination details. If not these parameters are not present, 37 | upload to our bucket is skipped and the local files are retained after download. 38 | + `TARGET_BUCKET` - The name of the AWS bucket we will use for our target destination 39 | + `TARGET_REGION` - The name of the AWS region our target bucket resides within 40 | + `REMOVE_LOCAL_FILE` - Boolean representing whether or not to remove local files after they are uploaded 41 | + `IN_MEMORY_TRANSFER_ONLY` - Transfer the file from the source bucket to the destination bucket without storing the file on the local file system. 42 | + `DO_OCSF_CONVERSION` - Boolean representing whether or not to convert the events to the OCSF format 43 | + `TARGET_ACCOUNT_ID` - The AWS account ID of the target bucket 44 | + `OCSF_ROLE_NAME` - The name of the role to use when writing to the target bucket 45 | + `OCSF_ROLE_EXTERNAL_ID` - The external ID to use when assuming the role provided by OCSF_ROLE_NAME. Default: `CrowdStrikeCustomSource` 46 | + `OCSF_INGEST_LATENCY` - The maximum amount of time (in minutes) to buffer records before publishing. Min: 5 Max: 60 Default: 5 47 | + `OCSF_MAX_FILE_SIZE` - Maximum size of a file in MB before it is uploaded. Min: 1 Max: 200 Default: 200 48 | > Note: Security Lake performance is sensitive to the number of files that must be read for a query. Use `OCSF_MAX_FILE_SIZE` and `OCSF_INGEST_LATENCY` to tune performance for your use case. 49 | ### Running the solution 50 | After updating the configuration file to reflect your environment specifics, you can run this solution using: 51 | ```bash 52 | python3 falcon_data_replicator.py 53 | ``` 54 | If your configuration file is not present in the same directory as the application file, you can reference 55 | this path using the _-f_ or _--config_file_ command line parameters. 56 | ```bash 57 | python3 falcon_data_replicator.py -f some_path/falcon_data_replicator.ini 58 | ``` 59 | ## Container-based 60 | _Coming soon_ 61 | 62 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | This document outlines security policy and procedures for the CrowdStrike `FDR Connector` project. 3 | + [Supported Python versions](#supported-python-versions) 4 | + [Supported FDR Connector versions](#supported-fdr-connector-versions) 5 | + [Reporting a potential security vulnerability](#reporting-a-potential-security-vulnerability) 6 | + [Disclosure and Mitigation Process](#disclosure-and-mitigation-process) 7 | 8 | ## Supported Python versions 9 | 10 | FDR Connector functionality is unit tested to run under the following versions of Python. 11 | 12 | | Version | Supported | 13 | | :------- | :--------- | 14 | | 3.9.x | :white_check_mark: | 15 | | 3.8.x | :white_check_mark: | 16 | | 3.7.x | :white_check_mark: | 17 | | 3.6.x | :white_check_mark: | 18 | | <= 3.5 | :x: | 19 | | <= 2.x.x | :x: | 20 | 21 | ## Supported FDR Connector versions 22 | 23 | When discovered, we release security vulnerability patches for the most recent release at an accelerated cadence. 24 | 25 | ## Reporting a potential security vulnerability 26 | 27 | Please report suspected security vulnerabilities by: 28 | + Submitting a [bug](https://github.com/CrowdStrike/FDR/issues) 29 | + Submitting a [pull request](https://github.com/CrowdStrike/FDR/pulls) to potentially resolve the issue 30 | 31 | ## Disclosure and mitigation process 32 | 33 | Upon receiving a security bug report, the issue will be assigned to one of the project maintainers. This person will coordinate the related fix and release 34 | process, involving the following steps: 35 | + Communicate with you to confirm we have received the report and provide you with a status update. 36 | - You should receive this message within 48 - 72 business hours. 37 | + Confirmation of the issue and a determination of affected versions. 38 | + An audit of the codebase to find any potentially similar problems. 39 | + Preparation of patches for all releases still under maintenance. 40 | - These patches will be submitted as a separate pull request and contain a version update. 41 | - This pull request will be flagged as a security fix. 42 | 43 | ## Comments 44 | If you have suggestions on how this process could be improved, please let us know by [submitting an issue](https://github.com/CrowdStrike/FDR/issues). 45 | -------------------------------------------------------------------------------- /falcon_data_replicator.ini: -------------------------------------------------------------------------------- 1 | # ____ __ ___ __ ___ ___ __ 2 | # / __/__ _/ /______ ___ / _ \___ _/ /____ _ / _ \___ ___ / (_)______ _/ /____ ____ 3 | # / _// _ `/ / __/ _ \/ _ \ / // / _ `/ __/ _ `/ / , _/ -_) _ \/ / / __/ _ `/ __/ _ \/ __/ 4 | # /_/ \_,_/_/\__/\___/_//_/ /____/\_,_/\__/\_,_/ /_/|_|\__/ .__/_/_/\__/\_,_/\__/\___/_/ 5 | # /_/ 6 | # falcon_data_replicator.ini 7 | # Creation date: 04.03.21, jshcodes@CrowdStrike 8 | # 9 | # Local configuration file for Falcon Data Replicator integration 10 | # 11 | # ========================================================================================================= 12 | # ____ ____ _ 13 | # / ___| ___ _ _ _ __ ___ ___ | _ \ __ _| |_ __ _ 14 | # \___ \ / _ \| | | | '__/ __/ _ \ | | | |/ _` | __/ _` | 15 | # ___) | (_) | |_| | | | (_| __/ | |_| | (_| | || (_| | 16 | # |____/ \___/ \__,_|_| \___\___| |____/ \__,_|\__\__,_| 17 | # 18 | # These values must be populated in order for this solution to operate 19 | # 20 | [Source Data] 21 | # AWS security credentials, provided to you by the CrowdStrike console (String) 22 | AWS_KEY = AWS_KEY_GOES_HERE 23 | # (String) 24 | AWS_SECRET = AWS_SECRET_GOES_HERE 25 | # URL of the SQS queue provided to you by CrowdStrike (String) 26 | # Should be a SQS URL 27 | QUEUE_URL = https://AWS_QUEUE_URL_GOES_HERE 28 | # This is the folder where downloads are stored. If you are immediately uploading these files to another 29 | # s3 bucket, then you can name this folder anything. If you plan on storing this data on the file system 30 | # then this would represent that location. (String) 31 | OUTPUT_PATH = downloaded 32 | # Time in seconds before a message is added back to the SQS queue if not deleted. 33 | # Ensure this is large enough for you to safely finish processing any downloaded files. (Integer) 34 | # Example: 300 35 | VISIBILITY_TIMEOUT = 300 36 | # Name of the AWS region for our source bucket (String) 37 | # This should match the region of your CrowdStrike FDR source bucket 38 | REGION_NAME = us-west-1 39 | # Delay (in seconds) to wait in between messages 40 | MESSAGE_DELAY = 1 41 | # Delay (in seconds) to wait in between message runs 42 | QUEUE_DELAY = 5 43 | # Log file 44 | LOG_FILE = falcon_data_replicator.log 45 | # Maximum number of processor threads to use for processing 46 | # Leaving this value blank will tell the application to make 47 | # it's best guess. The maximum number of threads that will 48 | # be generated at one time should not exceed 10. 49 | # (Max number of SQS received per iteration.) 50 | MAX_THREADS = 5 51 | # Logging level, INFO or DEBUG 52 | LOG_LEVEL = INFO 53 | 54 | # ____ _ _ _ _ ____ _ 55 | # | _ \ ___ ___| |_(_)_ __ __ _| |_(_) ___ _ __ | _ \ __ _| |_ __ _ 56 | # | | | |/ _ \/ __| __| | '_ \ / _` | __| |/ _ \| '_ \ | | | |/ _` | __/ _` | 57 | # | |_| | __/\__ \ |_| | | | | (_| | |_| | (_) | | | | | |_| | (_| | || (_| | 58 | # |____/ \___||___/\__|_|_| |_|\__,_|\__|_|\___/|_| |_| |____/ \__,_|\__\__,_| 59 | # 60 | # If these values are not defined, this solution will save downloaded files to the OUTPUT_PATH location only. 61 | # 62 | [Destination Data] 63 | # Target bucket (String) 64 | # The name of your bucket. This bucket must exist. 65 | TARGET_BUCKET = TARGET_BUCKET_NAME_GOES_HERE 66 | # Name of our target AWS region (String) 67 | # Example: us-east-1 68 | TARGET_REGION = TARGET_REGION_NAME_GOES_HERE 69 | # Remove local files after upload (Boolean) 70 | # Allowed values: True, False, Yes, No 71 | REMOVE_LOCAL_FILE = yes 72 | # No local file system usage 73 | # Allowed values: True, False, Yes, No 74 | IN_MEMORY_TRANSFER_ONLY = yes 75 | # Convert inbound data into OCSF format before 76 | # publishing it to the target bucket or folder 77 | DO_OCSF_CONVERSION = No 78 | # OCSF Target AWS Account Id 79 | TARGET_ACCOUNT_ID= TARGET_ACCOUNT_ID 80 | # The role name to assume to write to Security Lake bucket 81 | OCSF_ROLE_NAME = 82 | # The external ID used to assume the role in the target account 83 | OCSF_ROLE_EXTERNAL_ID = CrowdStrikeCustomSource 84 | # Security Lake performance is sensitive to the number of files that must be read for a query. 85 | # The max amount of time (in minutes) to buffer records before publishing. Min: 5 Max: 60 Default: 5 86 | OCSF_INGEST_LATENCY = 5 87 | # Maximum size of a file in MB before it is uploaded. Min: 200 Max: 256 Default: 256 88 | OCSF_MAX_FILE_SIZE = 256 -------------------------------------------------------------------------------- /falcon_data_replicator.py: -------------------------------------------------------------------------------- 1 | r"""Falcon Data Replicator - Local File System / AWS S3 connector 2 | 3 | _____ _ ____ _ ____ _ _ _ 4 | | ___|_ _| | ___ ___ _ __ | _ \ __ _| |_ __ _ | _ \ ___ _ __ | (_) ___ __ _| |_ ___ _ __ 5 | | |_ / _` | |/ __/ _ \| '_ \ | | | |/ _` | __/ _` | | |_) / _ \ '_ \| | |/ __/ _` | __/ _ \| '__| 6 | | _| (_| | | (_| (_) | | | | | |_| | (_| | || (_| | | _ < __/ |_) | | | (_| (_| | || (_) | | 7 | |_| \__,_|_|\___\___/|_| |_| |____/ \__,_|\__\__,_| |_| \_\___| .__/|_|_|\___\__,_|\__\___/|_| 8 | |_| 9 | 10 | . 11 | Your data | _____________________________________________________ ___ 12 | is here! | | _____ ________ _ __ | __ 13 | \ _______| | / ___/______ _ _____/ / __/ /_____(_) /_____ | ___ 14 | / _____ | | / /__/ __/ _ \ |/|/ / _ /\ \/ __/ __/ / '_/ -_) | 15 | / /(__) || | \___/_/ \___/__,__/\_,_/___/\__/_/ /_/_/\_\\__/ | ___ 16 | ________/ / |OO| || | | 17 | | Hemi |-------|| | --= FALCON DATA REPLICATOR >> | ___ 18 | (| | -.|| |_______________________ | ____ 19 | | ____ \ ||_________||____________ | ____ ____ | 20 | /| / __ \ |______|| / __ \ / __ \ | | / __ \ / __ \ |\ ___ 21 | \|| / \ |_______________| / \ |_| / \ |__| |___________| / \ |__| / \|_|/ 22 | | () | | () | | () | | () | | () | ____ 23 | \__/ \__/ \__/ \__/ \__/ 24 | 25 | 26 | Local File System / AWS S3 connector 27 | 28 | NOTE: See https://github.com/CrowdStrike/FDR for details on how to use this application. 29 | """ 30 | import json 31 | import io 32 | import os 33 | import sys 34 | import time 35 | import pathlib 36 | import signal as sig 37 | import configparser 38 | import argparse 39 | import logging 40 | from logging.handlers import RotatingFileHandler 41 | from functools import partial 42 | from concurrent.futures import ThreadPoolExecutor 43 | from threading import main_thread 44 | from ocsf import transform_fdr_data_to_ocsf_data, upload_parquet_files_to_s3 45 | from fdr.fdrconnector import FDRConnector 46 | 47 | # This solution is dependant upon the AWS boto3 Python library 48 | try: 49 | import boto3 50 | except ImportError as err: 51 | print(err) 52 | raise SystemExit("The AWS boto3 library is required to run Falcon " 53 | "Data Replicator.\nPlease execute 'pip3 install boto3'" 54 | ) from err 55 | 56 | try: 57 | from aws_assume_role_lib import assume_role 58 | except ImportError as err: 59 | print(err) 60 | raise SystemExit("The aws-assume-role-lib library is required to run Falcon " 61 | "Data Replicator.\nPlease execute 'pip3 install aws-assume-role-lib'" 62 | ) from err 63 | # Global FDR 64 | FDR = None 65 | 66 | 67 | # This method is used as an exit handler. When a quit, cancel or interrupt is received, 68 | # this method forces FDR to finish processing the file it is working on before exiting. 69 | def clean_exit(stat, signal, frame): # pylint: disable=W0613 70 | """Graceful exit handler for SIGINT, SIGQUIT and SIGTERM""" 71 | stat.set_exit(True) 72 | return True 73 | 74 | 75 | def do_keyed_delete(file_target: str, log: logging.Logger): 76 | """Remove temporary folder artifacts.""" 77 | os.remove(file_target) 78 | os.rmdir(os.path.dirname(file_target)) 79 | pure = pathlib.PurePath(file_target) 80 | # Remove the parent temporary folders if they exist 81 | try: 82 | os.rmdir(pure.parent.parent) 83 | except OSError: 84 | log.debug(f"Skipping deletion of {pure.parent.parent} as not empty.") 85 | else: 86 | log.debug("Removed %s", pure.parent.parent) 87 | if FDR.output_path not in pure.parent.parent.parent.name: 88 | try: 89 | os.rmdir(pure.parent.parent.parent) 90 | except OSError: 91 | log.debug( 92 | f"Skipping deletion of {pure.parent.parent.parent} as not empty.") 93 | else: 94 | log.debug("Removed %s", pure.parent.parent.parent) 95 | 96 | 97 | def handle_file(path, key, target_bkt, file_object=None, log_util: logging.Logger = None): 98 | """Process the file. If configured, upload this file to our target bucket and remove it.""" 99 | total_events_in_file = 0 100 | transform_time = 0 101 | upload_time = 0 102 | # If we've defined a target bucket 103 | if FDR.target_bucket_name: 104 | if not file_object: 105 | if FDR.do_ocsf: 106 | # Send the gzip'd file to be transformed and write it as parquet file 107 | start_transform_time = time.time() 108 | total_events_in_file = transform_fdr_data_to_ocsf_data( 109 | FDR, path, log_util) 110 | transform_time = time.time() - start_transform_time 111 | # upload the file that meets the criteria 112 | start_upload_time = time.time() 113 | upload_parquet_files_to_s3(FDR, target_bkt, log_util) 114 | upload_time = time.time() - start_upload_time 115 | else: 116 | start_upload_time = time.time() 117 | # Open our local file (binary) 118 | with open(path, 'rb') as data: 119 | # Perform the upload to the same key in our target bucket 120 | target_bkt.upload_fileobj( 121 | data, FDR.target_bucket_name, key) 122 | log_util.info('Uploaded file to path %s', key) 123 | upload_time = time.time() - start_upload_time 124 | # Only perform this step if configured to do so 125 | if FDR.remove_local_file: 126 | # Remove the file from the local file system 127 | do_keyed_delete(path, log_util) 128 | 129 | else: 130 | if FDR.do_ocsf: 131 | # OCSF conversion using IN Memory data from s3 source 132 | start_transform_time = time.time() 133 | total_events_in_file = transform_fdr_data_to_ocsf_data( 134 | FDR, file_object, log_util) 135 | transform_time = time.time() - start_transform_time 136 | # upload the file that meets the criteria 137 | start_upload_time = time.time() 138 | upload_parquet_files_to_s3(FDR, target_bkt, log_util) 139 | upload_time = time.time() - start_upload_time 140 | else: 141 | start_upload_time = time.time() 142 | target_bkt.upload_fileobj( 143 | file_object, FDR.target_bucket_name, key) 144 | log_util.info('Uploaded file to path %s', key) 145 | upload_time = time.time() - start_upload_time 146 | if os.path.exists(f"{FDR.output_path}/{key}"): 147 | # Something about our zip handling is leaving artifacts on the drive 148 | do_keyed_delete(f"{FDR.output_path}/{key}", log_util) 149 | # We're done 150 | return {'done': True, 'total_events_per_input_file': total_events_in_file, 151 | 'transform_time_per_input_file': transform_time, 152 | 'upload_time_per_input_file': upload_time 153 | } 154 | 155 | 156 | def download_message_files(msg, s3ta, s3or, log: logging.Logger): 157 | """Download the file specified in the SQS message and trigger file handling.""" 158 | total_event_count = 0 159 | total_download_time_sec = 0.0 160 | total_transform_time_sec = 0.0 161 | total_upload_time_sec = 0.0 162 | # For every file in our message 163 | for s3_file in msg['files']: 164 | # Retrieve the bucket path for this file 165 | s3_path = s3_file['path'] 166 | total_download_time_per_input_file = 0 167 | if not FDR.in_memory_transfer_only: 168 | # Construct output path for this message's files 169 | msg_output_path = os.path.realpath(os.path.join(FDR.output_path, msg["pathPrefix"])) 170 | # Only write files to the specified output_path 171 | if os.path.commonpath([FDR.output_path, msg_output_path]) != FDR.output_path: 172 | log.info( 173 | f"Skipping {msg_output_path} to prevent writes outside of output path: {FDR.output_path}" 174 | ) 175 | continue 176 | # Ensure directory exists at output path 177 | if not os.path.exists(msg_output_path): 178 | # Create it if it doesn't 179 | os.makedirs(msg_output_path) 180 | # Create a local path name for our destination file based off of the S3 path 181 | local_path = os.path.realpath(os.path.join(FDR.output_path, s3_path)) 182 | # Only write files to the specified output_path 183 | if os.path.commonpath([FDR.output_path, local_path]) != FDR.output_path: 184 | log.info( 185 | f"Skipping {local_path} to prevent writes outside of output path: {FDR.output_path}" 186 | ) 187 | continue 188 | if not os.path.exists(os.path.dirname(local_path)): 189 | # Handle fdr platform and time partitioned folders 190 | os.makedirs(os.path.dirname(local_path)) 191 | start_download_time = time.time() 192 | # Open our local file for binary write 193 | with open(local_path, 'wb') as data: 194 | # Download the file from S3 into our opened local file 195 | s3or.download_fileobj(msg['bucket'], s3_path, data) 196 | log.debug('Downloaded file to path %s', local_path) 197 | total_download_time_per_input_file = time.time() - start_download_time 198 | # Handle S3 upload if configured 199 | result = handle_file(local_path, s3_path, s3ta, None, log) 200 | else: 201 | log.debug('Downloading file to memory') 202 | start_download_time = time.time() 203 | s3t = boto3.resource("s3", 204 | region_name=FDR.region_name, 205 | aws_access_key_id=FDR.aws_key, 206 | aws_secret_access_key=FDR.aws_secret 207 | ) 208 | bkt = s3t.Bucket(msg['bucket']) 209 | obj = bkt.Object(s3_path) 210 | stream = io.BytesIO() 211 | obj.download_fileobj(stream) 212 | # Seek to the beginning of the stream before passing it to the upload handler 213 | stream.seek(0) 214 | total_download_time_per_input_file = time.time() - start_download_time 215 | result = handle_file(None, s3_path, s3ta, stream, log) 216 | 217 | total_event_count += result['total_events_per_input_file'] 218 | total_download_time_sec += total_download_time_per_input_file 219 | total_transform_time_sec += result['transform_time_per_input_file'] 220 | total_upload_time_sec += result['upload_time_per_input_file'] 221 | # pif is per_input_file 222 | log.debug( 223 | 'total_events_pif=%i, ' 224 | 'total_download_time_pif=%f, ' 225 | 'total_transform_time_pif=%f, ' 226 | 'total_upload_time_pif=%f, ' 227 | 'filepath=%s', 228 | result['total_events_per_input_file'], 229 | total_download_time_per_input_file, 230 | result['transform_time_per_input_file'], 231 | result['upload_time_per_input_file'], 232 | s3_path) 233 | 234 | return {'total_event_count': total_event_count, 235 | 'total_download_time_sec': total_download_time_sec, 236 | 'total_transform_time_sec': total_transform_time_sec, 237 | 'total_upload_time_sec': total_upload_time_sec} 238 | 239 | 240 | def process_queue_message(msg, s3b, s3o, log_util: logging.Logger): 241 | """Process the message off of the queue and trigger the file download.""" 242 | log_util.debug("Processing message [%s]", msg.message_id) 243 | # Grab the actual message body 244 | body = json.loads(msg.body) 245 | # Download the file to our local file system and potentially upload it to S3 246 | metrics = download_message_files(body, s3b, s3o, log_util) 247 | log_util.debug("Removing message [%s] from queue", msg.message_id) 248 | # Remove our message from the queue, if this is not performed in visibility_timeout seconds 249 | # this message will be restored to the queue for follow-up processing 250 | msg.delete() 251 | 252 | return body['fileCount'], body['totalSize'], True, metrics 253 | 254 | 255 | def do_shutdown(log_util: logging.Logger, clean: bool = False): 256 | """Perform a graceful shutdown.""" 257 | if clean: 258 | log_util.warning("Routine exit requested") 259 | sys.exit(0) 260 | else: 261 | log_util.warning("Unexpected error occurred") 262 | sys.exit(1) 263 | 264 | 265 | def consume_data_replicator(s3_bkt, s3_cs_bkt, log: logging.Logger): 266 | """Consume from data replicator and track number of messages/files/bytes downloaded.""" 267 | # Tracking details 268 | total_event_count = 0 269 | total_download_time_sec = 0.0 270 | total_transform_time_sec = 0.0 271 | total_upload_time_sec = 0.0 272 | total_time_sec = 0.0 273 | msg_cnt = 0 274 | file_cnt = 0 275 | byte_cnt = 0 276 | 277 | # Continuously poll the queue for new messages. 278 | while not FDR.exiting: 279 | received = False 280 | # Receive messages from queue if any exist and send each message to it's own thread for processing 281 | # (NOTE: receive_messages() only receives a few messages at a time, it does NOT exhaust the queue) 282 | # 283 | with ThreadPoolExecutor(FDR.max_threads, thread_name_prefix="thread") as executor: 284 | futures = { 285 | executor.submit(process_queue_message, msg, 286 | s3_bkt, s3_cs_bkt, log) 287 | for msg in queue.receive_messages(VisibilityTimeout=FDR.visibility_timeout, MaxNumberOfMessages=10) 288 | } 289 | max_total_download_time_sec = 0.0 290 | max_total_transform_time_sec = 0.0 291 | max_total_upload_time_sec = 0.0 292 | max_total_time_sec = 0.0 293 | for fut in futures: 294 | msg_cnt += 1 295 | res = fut.result() 296 | file_cnt += res[0] 297 | byte_cnt += res[1] 298 | received = res[2] 299 | total_event_count += res[3]['total_event_count'] 300 | max_total_download_time_sec = max(max_total_download_time_sec, res[3]['total_download_time_sec']) 301 | max_total_transform_time_sec = max(max_total_transform_time_sec, res[3]['total_transform_time_sec']) 302 | max_total_upload_time_sec = max(max_total_upload_time_sec, res[3]['total_upload_time_sec']) 303 | m_tot_time_sec = max_total_download_time_sec + \ 304 | max_total_transform_time_sec + max_total_upload_time_sec 305 | max_total_time_sec = max(max_total_time_sec, m_tot_time_sec) 306 | 307 | if not received: 308 | log.info("No messages received, sleeping for %i seconds", 309 | FDR.queue_delay) 310 | for _ in range(0, FDR.queue_delay): 311 | time.sleep(1) 312 | if FDR.exiting: 313 | do_shutdown(log, True) 314 | else: 315 | total_download_time_sec += max_total_download_time_sec 316 | total_transform_time_sec += max_total_transform_time_sec 317 | total_upload_time_sec += max_total_upload_time_sec 318 | total_time_sec += max_total_time_sec 319 | log.info( 320 | "Messages_consumed: %i\t" 321 | "File_count: %i\t" 322 | "total_event_count: %i\t" 323 | "total_time_sec: %f\t" 324 | "total_download_time_sec: %f\t" 325 | "total_transform_time_sec: %f\t" 326 | "total_upload_time_sec: %f\t" 327 | "Byte_count: %i", 328 | msg_cnt, 329 | file_cnt, 330 | total_event_count, 331 | total_time_sec, 332 | total_download_time_sec, 333 | total_transform_time_sec, 334 | total_upload_time_sec, 335 | byte_cnt) 336 | 337 | # We've requested an exit 338 | if FDR.exiting: 339 | # Clean exit 340 | do_shutdown(log, True) 341 | else: 342 | # Something untoward has occurred 343 | do_shutdown(log, False) 344 | 345 | 346 | def setup_logging(connector: FDRConnector): 347 | """Configure logging.""" 348 | # Set our parent thread name 349 | thread = main_thread() 350 | thread.name = "main" 351 | # Ask boto to keep his voice down 352 | logging.getLogger('boto').setLevel(logging.CRITICAL) 353 | logging.getLogger('boto3').setLevel(logging.CRITICAL) 354 | logging.getLogger('botocore').setLevel(logging.CRITICAL) 355 | logging.getLogger('s3transfer').setLevel(logging.CRITICAL) 356 | logging.getLogger('urllib3').setLevel(logging.CRITICAL) 357 | # Log level 358 | log_level = logging.INFO 359 | if FDR.log_level.upper() == "DEBUG": 360 | log_level = logging.DEBUG 361 | # Setup our root logger 362 | logging.basicConfig( 363 | level=log_level, format="%(asctime)-8s %(levelname)-8s %(name)s/%(threadName)-10s %(message)s") 364 | # Create our FDR logger 365 | log_util = logging.getLogger("FDR") 366 | # Rotate log file handler 367 | rfh = RotatingFileHandler( 368 | connector.log_file, maxBytes=20971520, backupCount=5) 369 | # Log file output format 370 | f_format = logging.Formatter( 371 | '%(asctime)s %(levelname)-8s %(name)s/%(threadName)-10s %(message)s') 372 | # Set the log file output level to INFO 373 | rfh.setLevel(logging.INFO) 374 | # Add our log file formatter to the log file handler 375 | rfh.setFormatter(f_format) 376 | # Add our log file handler to our logger 377 | log_util.addHandler(rfh) 378 | # Log our pre-startup event 379 | log_util.info(" _____ ____ ____ _") 380 | log_util.info("| ___| _ \\| _ \\ (.\\") 381 | log_util.info("| |_ | | | | |_) | |/(\\") 382 | log_util.info("| _| | |_| | _ < \\(\\\\") 383 | log_util.info("|_| |____/|_| \\_\\ \"^\"`\\") 384 | log_util.info("Process starting up with Thread Count=%i", FDR.max_threads) 385 | 386 | return log_util 387 | 388 | 389 | def setup_signal_handlers(connector: FDRConnector): 390 | """Setup our graceful exit handlers.""" 391 | sig.signal(sig.SIGINT, partial(clean_exit, connector)) 392 | sig.signal(sig.SIGTERM, partial(clean_exit, connector)) 393 | sig.signal(sig.SIGQUIT, partial(clean_exit, connector)) 394 | 395 | 396 | def get_crowdstrike_aws_objects(connector: FDRConnector): 397 | """Retrieve the CrowdStrike AWS objects storing our FDR data.""" 398 | sqs = boto3.resource('sqs', 399 | region_name=connector.region_name, 400 | aws_access_key_id=connector.aws_key, 401 | aws_secret_access_key=connector.aws_secret 402 | ) 403 | # Connect to our CrowdStrike provided S3 bucket 404 | s3bkt = boto3.client('s3', 405 | region_name=connector.region_name, 406 | aws_access_key_id=connector.aws_key, 407 | aws_secret_access_key=connector.aws_secret 408 | ) 409 | 410 | # Create our queue object for handling message traffic 411 | sqs_queue = sqs.Queue(url=FDR.queue_url) 412 | 413 | return sqs_queue, s3bkt 414 | 415 | 416 | # pylint: disable=R0913 417 | def get_aws_client(resource_type, account_id, aws_region, role_name, session_name, external_id, role_path='/'): 418 | """ 419 | This function Assumes role and returns a client 420 | 421 | Args: 422 | resource_type (string): Resource type to initialize (Ex: ec2, s3) 423 | account_id (string): Target account Id to assume role 424 | aws_region (string): AWS region to initialize service 425 | role_name (string): Role name to assume 426 | session_name (string): Assume role session name 427 | external_id (string): External Id to assume role 428 | role_path (string): Role Path, default = '/' 429 | 430 | Returns: 431 | serviceClient (botocore client): botocore resource client 432 | 433 | """ 434 | try: 435 | # Make Role ARN 436 | if role_path == '/': 437 | role_arn = f'arn:aws:iam::{account_id}:role/{role_name}' 438 | else: 439 | role_arn = f'arn:aws:iam::{account_id}:role/{role_path.lstrip("/").rstrip("/")}/{role_name}' 440 | 441 | # Assume role 442 | session = boto3.Session(region_name=aws_region) 443 | assumed_role_session = assume_role(session, role_arn, RoleSessionName=session_name, ExternalId=external_id) 444 | return assumed_role_session.client(resource_type, region_name=aws_region) 445 | 446 | except Exception as error: 447 | print(f'Failed to assume the role for Account: {account_id}: {error}') 448 | raise 449 | 450 | 451 | def get_s3_target(connector: FDRConnector, log_util: logging.Logger): 452 | """Retrieve details for any S3 bucket uploads.""" 453 | returned = None 454 | if FDR.target_bucket_name and connector.target_region_name: 455 | log_util.info("Upload to AWS S3 enabled") 456 | 457 | # Connect to our target S3 bucket, uses the existing 458 | # client configuration to connect (Not the CS provided ones) 459 | if connector.do_ocsf: 460 | returned = get_aws_client('s3', 461 | connector.target_account_id, 462 | connector.target_region_name, 463 | connector.ocsf_role_name, 464 | "CrowdStrikeCustomSource", 465 | connector.ocsf_role_external_id 466 | ) 467 | else: 468 | returned = boto3.client( 469 | 's3', region_name=connector.target_region_name) 470 | 471 | return returned 472 | 473 | 474 | def consume_arguments(): 475 | """Consume any provided command line arguments.""" 476 | # Configure our accepted command line parameters 477 | parser = argparse.ArgumentParser( 478 | description=__doc__, formatter_class=argparse.RawTextHelpFormatter) 479 | parser.add_argument("-f", "--config_file", dest="config_file", help="Path to the configuration file", 480 | required=False) 481 | # Parse any parameters passed at runtime 482 | return parser.parse_args() 483 | 484 | 485 | def initialize_connector(cmd_line: argparse.Namespace): 486 | """Initialize an instance of our FDRConnector class.""" 487 | # If we were not provided a configuration file name 488 | if not cmd_line.config_file: 489 | # Use the default name / location provided in our repo 490 | config_file = "falcon_data_replicator.ini" 491 | else: 492 | # Use the configuration file provided at runtime 493 | config_file = cmd_line.config_file 494 | # Read in our configuration parameters 495 | configuration = configparser.ConfigParser() 496 | configuration.read(config_file) 497 | # Create our connector 498 | return FDRConnector(configuration) 499 | 500 | 501 | # Start our main routine 502 | if __name__ == '__main__': 503 | # Consume any provided command line arguments 504 | cmdline = consume_arguments() 505 | # Initialize our FDR connector 506 | FDR = initialize_connector(cmdline) 507 | # Setup logging 508 | logger = setup_logging(FDR) 509 | # Enable our graceful exit handler to allow uploads and artifact 510 | # cleanup to complete for SIGINT, SIGTERM and SIGQUIT signals. 511 | setup_signal_handlers(FDR) 512 | # Connect to our CrowdStrike provided SQS queue and S3 bucket 513 | queue, s3_cs = get_crowdstrike_aws_objects(FDR) 514 | # If we are doing S3 uploads 515 | s3_target = get_s3_target(FDR, logger) 516 | logger.info("Startup complete") 517 | # Start consuming the replicator feed 518 | consume_data_replicator(s3_target, s3_cs, logger) 519 | -------------------------------------------------------------------------------- /fdr/fdrconnector.py: -------------------------------------------------------------------------------- 1 | """Falcon Data Replicator - Connection configuration class.""" 2 | import os 3 | import sys 4 | import configparser 5 | 6 | 7 | # Class to hold our connector config and to track our running status 8 | class FDRConnector: # pylint: disable=R0902 9 | """The FDRConnector class contains the details of this connection and tracks the status of our process.""" 10 | 11 | def __init__(self, config: configparser.ConfigParser): # pylint: disable=R0912,R0915 12 | """Initialize our status class""" 13 | self.set_exit(False) 14 | # We cannot read our source parameters, exit the routine 15 | if "Source Data" not in config: 16 | print("Unable to load configuration file parameters. Routine halted.") 17 | sys.exit(1) 18 | 19 | # AWS Client ID - Provided by CrowdStrike 20 | self.aws_key = config["Source Data"]["AWS_KEY"] 21 | # AWS Client Secret - Provided by CrowdStrike 22 | self.aws_secret = config["Source Data"]["AWS_SECRET"] 23 | # AWS SQS queue URL - Provided by CrowdStrike 24 | self.queue_url = config["Source Data"]["QUEUE_URL"] 25 | # Local file output location 26 | self.output_path = os.path.realpath(config["Source Data"]["OUTPUT_PATH"]) 27 | # Timeout before messages are returned to the queue 28 | self.visibility_timeout = int(config["Source Data"]["VISIBILITY_TIMEOUT"]) 29 | # Message delay 30 | self.message_delay = int(config["Source Data"]["MESSAGE_DELAY"]) 31 | # Queue delay 32 | self.queue_delay = int(config["Source Data"]["QUEUE_DELAY"]) 33 | # Log File 34 | self.log_file = config["Source Data"]["LOG_FILE"] 35 | # AWS Region name for our source S3 bucket 36 | self.region_name = config["Source Data"]["REGION_NAME"] 37 | # Log setting 38 | self.log_level = config["Source Data"].get("LOG_LEVEL", "INFO") 39 | max_threads = config["Source Data"].get("MAX_THREADS", False) 40 | if not max_threads: 41 | self.max_threads = min(32, (os.cpu_count() or 1) * 4) 42 | else: 43 | self.max_threads = int(max_threads) 44 | self.in_memory_transfer_only = False # Defaults to writing to the local file system 45 | self.target_region_name = None # Defaults to no upload 46 | self.target_bucket_name = None # Defaults to no upload 47 | self.remove_local_file = False # Defaults to keeping files locally 48 | 49 | try: 50 | # Fail on these in order. If REMOVE_LOCAL_FILE, or IN_MEMORY_TRANSFER_ONLY 51 | # fail, processing will still continue. 52 | if "Destination Data" in config: 53 | # If it's not present, we don't need it 54 | if config["Destination Data"]["TARGET_BUCKET"]: 55 | # The name of our target S3 bucket 56 | self.target_bucket_name = config["Destination Data"]["TARGET_BUCKET"] 57 | 58 | if config["Destination Data"]["TARGET_REGION"]: 59 | # The AWS region name our target S3 bucket resides in 60 | self.target_region_name = config["Destination Data"]["TARGET_REGION"] 61 | 62 | if config["Destination Data"]["REMOVE_LOCAL_FILE"]: 63 | # Should we remove local files after we upload them? 64 | remove = config["Destination Data"]["REMOVE_LOCAL_FILE"] 65 | self.remove_local_file = False 66 | if remove.lower() in "true,yes".split(","): # pylint: disable=R1703 67 | self.remove_local_file = True 68 | 69 | if config["Destination Data"]["IN_MEMORY_TRANSFER_ONLY"]: 70 | # Transfer to S3 without using the local file system? 71 | mem_trans = config["Destination Data"]["IN_MEMORY_TRANSFER_ONLY"] 72 | self.in_memory_transfer_only = False 73 | if mem_trans.lower() in "true,yes".split(","): # pylint: disable=R1703 74 | self.in_memory_transfer_only = True 75 | 76 | if config["Destination Data"]["DO_OCSF_CONVERSION"]: 77 | ocsf_setting = config["Destination Data"].get("DO_OCSF_CONVERSION", "no") 78 | self.do_ocsf = False 79 | if ocsf_setting.lower() in "true,yes".split(","): 80 | self.do_ocsf = True 81 | if config["Destination Data"]["TARGET_ACCOUNT_ID"]: 82 | # AWS Account ID 83 | self.target_account_id = config["Destination Data"]["TARGET_ACCOUNT_ID"] 84 | 85 | if self.do_ocsf: 86 | ocsf_max_file_size = int( 87 | config["Destination Data"].get("OCSF_MAX_FILE_SIZE", 256)) 88 | ocsf_ingest_latency = int(config["Destination Data"].get("OCSF_INGEST_LATENCY", 5)) 89 | ocsf_role_name = config["Destination Data"].get( 90 | "OCSF_ROLE_NAME", None) 91 | ocsf_role_external_id = config["Destination Data"].get("OCSF_ROLE_EXTERNAL_ID", 92 | "CrowdStrike OCSF Conversion" 93 | ) 94 | 95 | if ocsf_role_name is None: 96 | raise RuntimeError( 97 | "OCSF_ROLE_NAME must be set if DO_OCSF_CONVERSION is true") 98 | 99 | self.ocsf_role_name = ocsf_role_name 100 | self.ocsf_role_external_id = ocsf_role_external_id 101 | self.ocsf_max_file_size = max( 102 | min(ocsf_max_file_size, 256), 200) 103 | self.ocsf_ingest_latency = max(min(ocsf_ingest_latency, 60), 5) 104 | 105 | except KeyError: 106 | pass 107 | 108 | @property 109 | def exiting(self): 110 | """Returns the value of the exiting property""" 111 | return self.exiting 112 | 113 | @classmethod 114 | def set_exit(cls, val): 115 | """Sets the value of the exiting property""" 116 | cls.exiting = val 117 | return True 118 | -------------------------------------------------------------------------------- /ocsf/__init__.py: -------------------------------------------------------------------------------- 1 | """OCSF file conversion, upload.""" 2 | from .ocsf import transform_fdr_data_to_ocsf_data, upload_parquet_files_to_s3 3 | 4 | __all__ = ["transform_fdr_data_to_ocsf_data", "upload_parquet_files_to_s3"] 5 | -------------------------------------------------------------------------------- /ocsf/ocsf.py: -------------------------------------------------------------------------------- 1 | """Transforms FDR data to OCSF Format and writes in parquet file and uploads the file to AWS Security Lake""" 2 | import glob 3 | import gzip 4 | import json 5 | import os 6 | import re 7 | import threading 8 | from datetime import datetime 9 | from functools import reduce 10 | from logging import Logger 11 | from filelock import FileLock 12 | import pandas as pd 13 | import yaml 14 | 15 | NEWLINE = ord('\n') 16 | 17 | CUSTOM_SOURCES = { 18 | 1001: 'CrowdStrike_FILE_ACTIVITY', 19 | 1005: 'CrowdStrike_MODULE_ACTIVITY', 20 | 1007: 'CrowdStrike_PROCESS_ACTIVITY', 21 | 4001: 'CrowdStrike_NETWORK_ACTIVITY', 22 | 4003: 'CrowdStrike_DNS_ACTIVITY' 23 | } 24 | 25 | BYTES_IN_MB = 1000000 26 | 27 | WRITE_UPLOAD_THREAD_LOCK = threading.Lock() 28 | 29 | 30 | def upload_parquet_files_to_s3(fdr, s3_target, log_utl: Logger): 31 | """Uploads parquet files to s3""" 32 | if fdr.target_bucket_name: 33 | with WRITE_UPLOAD_THREAD_LOCK: 34 | for root, _, filenames in os.walk('ext'): 35 | for filename in filenames: 36 | upload_file_path = os.path.join(root, filename) 37 | timestamp_str = filename.split('_')[-1].split('.')[0] 38 | 39 | if not filename.endswith('parquet'): 40 | continue 41 | 42 | if not os.path.exists(upload_file_path): 43 | continue 44 | 45 | if os.path.getsize(upload_file_path) >= (BYTES_IN_MB * fdr.ocsf_max_file_size) or \ 46 | is_older_than_minutes(timestamp_str, fdr.ocsf_ingest_latency): 47 | lock = FileLock(upload_file_path + ".lock") 48 | with lock: 49 | with open(upload_file_path, 'rb') as parquet_data: 50 | log_utl.debug('@@@@uploaded_file@@@@=%s', upload_file_path) 51 | s3_target.upload_fileobj(parquet_data, fdr.target_bucket_name, upload_file_path) 52 | # Remove the file from the local file system 53 | os.remove(upload_file_path) 54 | 55 | 56 | def is_older_than_minutes(timestamp, minutes): 57 | """Checks if the timestamp is older than the number of minutes passed 58 | 59 | Arguments: 60 | timestamp {string} -- timestamp in string format 61 | minutes {int} -- number of minutes 62 | 63 | Returns: 64 | bool -- True if the timestamp is older than the number of minutes passed 65 | """ 66 | return (datetime.utcnow().timestamp() - float(timestamp)) > minutes * 60 67 | 68 | 69 | def write_to_parquet_file(fdr, ocsf_events, filename_class_uid_key, log_utl: Logger = None): 70 | """write the events to a parquet file""" 71 | split_path = filename_class_uid_key.rsplit(os.path.sep, 1) 72 | log_utl.debug('split_path=%s', split_path) 73 | folder_path = split_path[0] 74 | file_name = split_path[1] 75 | data = pd.DataFrame(ocsf_events) 76 | data.sort_index(axis=1, inplace=True) 77 | if 'exit_code' in data.columns: 78 | data['exit_code'] = data['exit_code'].astype('Int64') 79 | with WRITE_UPLOAD_THREAD_LOCK: 80 | file_list = os.listdir(folder_path) 81 | events_wrote_to_file = False 82 | if len(file_list) > 0: 83 | for file_path in file_list: 84 | parquet_file_name = os.path.join(folder_path, file_path) 85 | if file_path.endswith('parquet') and file_path.startswith(file_name + '_chunk_') and \ 86 | os.path.getsize(parquet_file_name) <= (BYTES_IN_MB * fdr.ocsf_max_file_size): 87 | lock = FileLock(parquet_file_name + ".lock") 88 | with lock: 89 | events_wrote_to_file = True 90 | log_utl.debug('!!!!!!!!!!Update to bucket=%s, record_len=%s, file_name=%s', 91 | filename_class_uid_key, 92 | len(ocsf_events), parquet_file_name) 93 | existing_data = pd.read_parquet(parquet_file_name) 94 | existing_data.sort_index(axis=1, inplace=True) 95 | concat_data = pd.concat([existing_data, data], axis=0) 96 | concat_data.to_parquet(parquet_file_name, compression='gzip', index=False) 97 | if not events_wrote_to_file: 98 | parquet_file_name = filename_class_uid_key + '_chunk_' + str( 99 | int(datetime.utcnow().timestamp())) + '.parquet' 100 | lock = FileLock(parquet_file_name + ".lock") 101 | with lock: 102 | log_utl.debug('#########Write to bucket=%s, record_len=%s, file_name=%s', filename_class_uid_key, 103 | len(ocsf_events), parquet_file_name) 104 | data.to_parquet(parquet_file_name, compression='gzip', index=False) 105 | 106 | 107 | def read_fdr_part(rdr): 108 | """reads the fdr file""" 109 | # to avoid reading the file into memory, we push each byte into a bytearray 110 | # and yield the completed json once we hit a newline 111 | tmp = bytearray() 112 | for char in rdr.read(): 113 | if char == NEWLINE: 114 | if tmp: 115 | try: 116 | yield json.loads(tmp.decode('utf-8')) 117 | except json.JSONDecodeError as e: 118 | print(f"Error decoding JSON: {e}") 119 | tmp.clear() 120 | else: 121 | tmp.append(char) 122 | 123 | 124 | def transform_fdr_data_to_ocsf_data(fdr, file, log_utl: Logger = None): 125 | """Transform FDR data into OSCF format data.""" 126 | total_events_in_file = 0 127 | mapping_dict_by_name = {} 128 | supporting_mapping_dict = {} 129 | for mapping_defn in glob.glob(os.path.join('ocsf', 'mappings', '*.yaml')): 130 | with open(mapping_defn, encoding='utf-8') as mapping_file: 131 | mapping_yamls_by_defn_file = yaml.safe_load_all(mapping_file) 132 | for mapping_yaml in mapping_yamls_by_defn_file: 133 | mapping_jsons = json.loads(json.dumps(mapping_yaml)) 134 | for mapping_json in mapping_jsons: 135 | if mapping_json['type'] == 'Telemetry': 136 | prepare_mapping_dict(mapping_json, mapping_dict_by_name) 137 | else: 138 | prepare_mapping_dict(mapping_json, supporting_mapping_dict) 139 | 140 | file_prefix = 'class_uid' 141 | ocsf_dicts = {} 142 | with gzip.open(file, 'rb') as chunk: 143 | for event in read_fdr_part(chunk): 144 | total_events_in_file += 1 145 | mapping_event_simplename = event.get('event_simpleName') 146 | if mapping_event_simplename in mapping_dict_by_name: 147 | class_uid_field = next( 148 | (field for field in mapping_dict_by_name[mapping_event_simplename].get('fields') if 149 | field['name'] == 'class_uid'), False) 150 | if class_uid_field: 151 | class_uid = class_uid_field['value'] 152 | if class_uid in CUSTOM_SOURCES: 153 | timestamp = int(int(event.get('timestamp')) / 1000) 154 | folder_path = os.path.join('ext', CUSTOM_SOURCES[class_uid_field['value']], 155 | 'region=' + fdr.target_region_name, 156 | 'accountId=' + fdr.target_account_id, 157 | 'eventDay=' + datetime.fromtimestamp(timestamp).strftime('%Y%m%d')) 158 | is_dir_exist = os.path.exists(folder_path) 159 | if not is_dir_exist: 160 | try: 161 | os.makedirs(folder_path) 162 | except FileExistsError: 163 | pass 164 | class_uid_path = os.path.join(folder_path, file_prefix + '_' + str( 165 | class_uid)) 166 | ocsf_class_uid_dicts = ocsf_dicts.setdefault(class_uid_path, []) 167 | ocsf_dict = {} 168 | ocsf_class_uid_dicts.append( 169 | transform_event_to_ocsf(event, ocsf_dict, mapping_dict_by_name[mapping_event_simplename], 170 | supporting_mapping_dict)) 171 | 172 | for filename_class_uid_key, values in ocsf_dicts.items(): 173 | event_count = 0 174 | ocsf_events = [] 175 | for event in values: 176 | ocsf_events.append(event) 177 | event_count += 1 178 | if event_count == 100000: 179 | write_to_parquet_file(fdr, ocsf_events, filename_class_uid_key, log_utl) 180 | ocsf_events = [] 181 | event_count = 0 182 | 183 | if len(ocsf_events) > 0: 184 | write_to_parquet_file(fdr, ocsf_events, filename_class_uid_key, log_utl) 185 | 186 | return total_events_in_file 187 | 188 | 189 | def prepare_mapping_dict(mapping_json: dict, out_dict: dict): 190 | """Dict containing the mapping definition for each name""" 191 | if isinstance(mapping_json.get('name'), list): 192 | for name in mapping_json.get('name'): 193 | out_dict[name] = mapping_json 194 | else: 195 | out_dict[mapping_json.get('name')] = mapping_json 196 | 197 | 198 | def transform_event_to_ocsf(event: dict, ocsf_dict: dict, mapping_dict: dict, mapping_supporting_dict: dict): 199 | """Transforms event to ocsf format""" 200 | for mapping in mapping_dict.get('mappings'): 201 | if not event.get(mapping.get('ours')) and mapping.get('default') is not None: 202 | event[mapping.get('ours')] = mapping.get('default') 203 | map_field(event, ocsf_dict, mapping, mapping_supporting_dict) 204 | for field in mapping_dict.get('fields'): 205 | add_default_field(ocsf_dict, field) 206 | 207 | return dot_notation_to_json(ocsf_dict) 208 | 209 | 210 | # Transform Functions start # 211 | def extract_filename(value): 212 | """extracts filename from the value""" 213 | basename = re.search(r'[^\\/]+(?=[\\/]?$)', value) 214 | if basename: 215 | return basename.group(0) 216 | return value 217 | 218 | 219 | def as_number(value): 220 | """converts to int""" 221 | if value is None: 222 | return 0 223 | if '.' in value: 224 | return int(value.split('.')[0]) 225 | return int(value) 226 | 227 | 228 | def as_string(value): 229 | """converts to string""" 230 | if value is None: 231 | return '' 232 | return str(value) 233 | 234 | 235 | def map_ours_theirs(src: dict, dst: dict, mapping: dict, mapping_supporting_dict: dict): 236 | # pylint: disable=unused-argument 237 | """transform function map_ours_theirs""" 238 | dst[mapping.get('theirs')] = src.get(mapping.get('ours')) 239 | 240 | 241 | def map_ours_theirs_using_fn(src: dict, dst: dict, mapping: dict, mapping_supporting_dict: dict): 242 | """transform function map_ours_theirs_using_fn""" 243 | supporting_enum = mapping_supporting_dict.get(mapping.get('using')) 244 | for value in supporting_enum.get('values'): 245 | if value.get('ours') == src.get(mapping.get('ours')): 246 | dst[mapping.get('theirs')] = value.get('theirs') 247 | 248 | 249 | def map_ours_theirs_transform_fn(src: dict, dst: dict, mapping: dict, mapping_supporting_dict: dict): 250 | # pylint: disable=unused-argument 251 | """transform function map_ours_theirs_transform_fn""" 252 | transform_fn = ALL_TRANSFORMS.get(mapping.get('transform')) 253 | dst[mapping.get('theirs')] = transform_fn(src.get(mapping.get('ours'))) 254 | 255 | 256 | def map_items_theirs(src: dict, dst: dict, mapping: dict, mapping_supporting_dict: dict): 257 | # pylint: disable=unused-argument 258 | """transform function map_items_theirs""" 259 | values = [] 260 | for _, item in enumerate(mapping.get('items')): 261 | value = {} 262 | for item_mapping in item.get('mappings'): 263 | if src.get(item_mapping.get('ours')) is not None: 264 | value[item_mapping.get('theirs')] = src.get(item_mapping.get('ours')) 265 | for field in item.get('fields'): 266 | if src.get(item_mapping.get('ours')): 267 | value[field.get('name')] = field.get('value') 268 | values.append(value) 269 | 270 | dst[mapping.get('theirs')] = values 271 | 272 | 273 | def map_ours_theirs_list(src: dict, dst: dict, mapping: dict, mapping_supporting_dict: dict): 274 | # pylint: disable=unused-argument 275 | """transform function map_ours_theirs_list""" 276 | for their in mapping.get('theirs'): 277 | if src.get(mapping.get('ours')) is not None: 278 | dst[their] = src.get(mapping.get('ours')) 279 | 280 | 281 | def map_ours_theirs_list_using_fn(src: dict, dst: dict, mapping: dict, mapping_supporting_dict: dict): 282 | # pylint: disable=unused-argument 283 | """transform function map_ours_theirs_list_using_fn""" 284 | supporting_enum = mapping_supporting_dict.get(mapping.get('using')) 285 | for their in mapping.get('theirs'): 286 | if src.get(mapping.get('ours')) is not None: 287 | for value in supporting_enum.get('values'): 288 | if value.get('ours') == src.get(mapping.get('ours')): 289 | dst[their] = value.get(mapping.get('theirs')) 290 | 291 | 292 | def map_ours_theirs_list_transform_fn(src: dict, dst: dict, mapping: dict, mapping_supporting_dict: dict): 293 | # pylint: disable=unused-argument 294 | """transform function map_ours_theirs_list_transform_fn""" 295 | transform_fn = ALL_TRANSFORMS.get(mapping.get('transform')) 296 | for their in mapping.get('theirs'): 297 | if src.get(mapping.get('ours')) is not None: 298 | dst[their] = transform_fn(src.get(mapping.get('ours'))) 299 | 300 | 301 | # Transform Functions End# 302 | def apply_transform(src: dict, mapping: dict): 303 | """determines the transform function to be applied""" 304 | ours = mapping.get('ours') 305 | theirs = mapping.get('theirs') 306 | optional_using = mapping.get('using') 307 | optional_translate = mapping.get('transform') 308 | optional_items = mapping.get('items') 309 | return_func = '' 310 | if ours and not isinstance(ours, list): 311 | if theirs and not isinstance(theirs, list): 312 | if src.get(ours) is not None and not optional_translate and not optional_using and not optional_items: 313 | return_func = 'map_ours_theirs' 314 | elif src.get(ours) is not None and not optional_translate and optional_using and not optional_items: 315 | return_func = 'map_ours_theirs_using_fn' 316 | elif src.get(ours) is not None and optional_translate and not optional_using and not optional_items: 317 | return_func = 'map_ours_theirs_transform_fn' 318 | if theirs and isinstance(theirs, list): 319 | if not optional_translate and not optional_using and not optional_items: 320 | return_func = 'map_ours_theirs_list' 321 | elif not optional_translate and optional_using and not optional_items: 322 | return_func = 'map_ours_theirs_list_using_fn' 323 | elif optional_translate and not optional_using and not optional_items: 324 | return_func = 'map_ours_theirs_list_transform_fn' 325 | elif not ours and optional_items and isinstance(optional_items, list): 326 | if theirs and not isinstance(theirs, list) and not optional_translate and not optional_using: 327 | return_func = 'map_items_theirs' 328 | 329 | return return_func 330 | 331 | 332 | def map_field(src: dict, dst: dict, mapping: dict, mapping_supporting_dict: dict): 333 | """maps the FDR field to OCSF field""" 334 | map_fn = ALL_TRANSFORMS.get(apply_transform(src, mapping)) 335 | if map_fn: 336 | map_fn(src, dst, mapping, mapping_supporting_dict) 337 | 338 | 339 | def dot_notation_to_json(ocsf_dict): 340 | """converts the dot notations in the json to nested json""" 341 | output = {} 342 | for key, value in ocsf_dict.items(): 343 | path = key.split('.') 344 | target = reduce(lambda d, k: d.setdefault(k, {}), path[:-1], output) 345 | target[path[-1]] = value 346 | return output 347 | 348 | 349 | def add_default_field(dest: dict, field: dict): 350 | """adds the default field in the dict""" 351 | name = field.get('name') 352 | value = field.get('value') 353 | if isinstance(value, list) and len(value) == 1 and value[0] is None: 354 | dest[name] = [] 355 | else: 356 | dest[name] = value 357 | 358 | 359 | ALL_TRANSFORMS = { 360 | 'extract_filename': extract_filename, 361 | 'as_number': as_number, 362 | 'as_string': as_string, 363 | 'map_ours_theirs': map_ours_theirs, 364 | 'map_ours_theirs_using_fn': map_ours_theirs_using_fn, 365 | 'map_ours_theirs_transform_fn': map_ours_theirs_transform_fn, 366 | 'map_items_theirs': map_items_theirs, 367 | 'map_ours_theirs_list': map_ours_theirs_list, 368 | 'map_ours_theirs_list_using_fn': map_ours_theirs_list_using_fn, 369 | 'map_ours_theirs_list_transform_fn': map_ours_theirs_list_transform_fn 370 | } 371 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with python 3.9 3 | # To update, run: 4 | # 5 | # pip-compile requirements.txt 6 | # 7 | boto3>=1.24.70 8 | # via -r requirements.txt 9 | fastparquet>=0.8.3 10 | # via -r requirements.txt 11 | filelock>=3.8.0 12 | # via -r requirements.txt 13 | json2parquet>=2.0.0 14 | # via -r requirements.txt 15 | pyyaml>=6.0 16 | # via -r requirements.txt 17 | numpy>=1.22.2 # not directly required, pinned by Snyk to avoid a vulnerability 18 | aws-assume-role-lib>=2.10.0 19 | pyarrow>=14.0.1 # not directly required, pinned by Snyk to avoid a vulnerability 20 | -------------------------------------------------------------------------------- /standalone/falcon_data_replicator.py: -------------------------------------------------------------------------------- 1 | """Falcon Data Replicator - Local File System / AWS S3 connector""" 2 | 3 | # _____ _ ____ _ ____ _ _ _ 4 | # | ___|_ _| | ___ ___ _ __ | _ \ __ _| |_ __ _ | _ \ ___ _ __ | (_) ___ __ _| |_ ___ _ __ 5 | # | |_ / _` | |/ __/ _ \| '_ \ | | | |/ _` | __/ _` | | |_) / _ \ '_ \| | |/ __/ _` | __/ _ \| '__| 6 | # | _| (_| | | (_| (_) | | | | | |_| | (_| | || (_| | | _ < __/ |_) | | | (_| (_| | || (_) | | 7 | # |_| \__,_|_|\___\___/|_| |_| |____/ \__,_|\__\__,_| |_| \_\___| .__/|_|_|\___\__,_|\__\___/|_| 8 | # |_| 9 | # Local File System / AWS S3 connector 10 | # 11 | ################################################################################################### 12 | # NOTE: See https://github.com/CrowdStrike/FDR for details on how to use this application. # 13 | ################################################################################################### 14 | # 15 | import json 16 | import io 17 | import os 18 | import sys 19 | import time 20 | import pathlib 21 | import signal as sig 22 | import configparser 23 | import argparse 24 | import logging 25 | from logging.handlers import RotatingFileHandler 26 | from functools import partial 27 | 28 | # This solution is dependant upon the AWS boto3 Python library 29 | try: 30 | import boto3 31 | except ImportError as err: 32 | print(err) 33 | print( 34 | 'The AWS boto3 library is required to run Falcon Data Replicator.\nPlease execute "pip3 install boto3"' 35 | ) 36 | sys.exit(1) 37 | 38 | 39 | # Class to hold our connector config and to track our running status 40 | class FDRConnector: # pylint: disable=R0902 41 | """The FDRConnector class contains the details of this connection and tracks the status of our process.""" 42 | 43 | def __init__(self, config: configparser.ConfigParser): 44 | """Initialize our status class""" 45 | self.set_exit(False) 46 | # We cannot read our source parameters, exit the routine 47 | if "Source Data" not in config: 48 | print("Unable to load configuration file parameters. Routine halted.") 49 | sys.exit(1) 50 | 51 | # AWS Client ID - Provided by CrowdStrike 52 | self.aws_key = config["Source Data"]["AWS_KEY"] 53 | # AWS Client Secret - Provided by CrowdStrike 54 | self.aws_secret = config["Source Data"]["AWS_SECRET"] 55 | # AWS SQS queue URL - Provided by CrowdStrike 56 | self.queue_url = config["Source Data"]["QUEUE_URL"] 57 | # Local file output location 58 | self.output_path = os.path.realpath(config["Source Data"]["OUTPUT_PATH"]) 59 | # Timeout before messages are returned to the queue 60 | self.visibility_timeout = int(config["Source Data"]["VISIBILITY_TIMEOUT"]) 61 | # Message delay 62 | self.message_delay = int(config["Source Data"]["MESSAGE_DELAY"]) 63 | # Queue delay 64 | self.queue_delay = int(config["Source Data"]["QUEUE_DELAY"]) 65 | # Log File 66 | self.log_file = config["Source Data"]["LOG_FILE"] 67 | # AWS Region name for our source S3 bucket 68 | self.region_name = config["Source Data"]["REGION_NAME"] 69 | self.in_memory_transfer_only = ( 70 | False # Defaults to writing to the local file system 71 | ) 72 | self.target_region_name = None # Defaults to no upload 73 | self.target_bucket_name = None # Defaults to no upload 74 | self.remove_local_file = False # Defaults to keeping files locally 75 | try: 76 | # Fail on these in order. If REMOVE_LOCAL_FILE, or IN_MEMORY_TRANSFER_ONLY 77 | # fail, processing will still continue. 78 | if "Destination Data" in config: 79 | # If it's not present, we don't need it 80 | if config["Destination Data"]["TARGET_BUCKET"]: 81 | # The name of our target S3 bucket 82 | self.target_bucket_name = config["Destination Data"][ 83 | "TARGET_BUCKET" 84 | ] 85 | 86 | if config["Destination Data"]["TARGET_REGION"]: 87 | # The AWS region name our target S3 bucket resides in 88 | self.target_region_name = config["Destination Data"][ 89 | "TARGET_REGION" 90 | ] 91 | 92 | if config["Destination Data"]["REMOVE_LOCAL_FILE"]: 93 | # Should we remove local files after we upload them? 94 | remove = config["Destination Data"]["REMOVE_LOCAL_FILE"] 95 | if remove.lower() in "true,yes".split(","): # pylint: disable=R1703 96 | self.remove_local_file = True 97 | else: 98 | self.remove_local_file = False 99 | 100 | if config["Destination Data"]["IN_MEMORY_TRANSFER_ONLY"]: 101 | # Transfer to S3 without using the local file system? 102 | mem_trans = config["Destination Data"]["IN_MEMORY_TRANSFER_ONLY"] 103 | if mem_trans.lower() in "true,yes".split( 104 | "," 105 | ): # pylint: disable=R1703 106 | self.in_memory_transfer_only = True 107 | else: 108 | self.in_memory_transfer_only = False 109 | 110 | except KeyError: 111 | pass 112 | 113 | @property 114 | def exiting(self): 115 | """Returns the value of the exiting property""" 116 | return self.exiting 117 | 118 | @classmethod 119 | def set_exit(cls, val): 120 | """Sets the value of the exiting property""" 121 | cls.exiting = val 122 | return True 123 | 124 | 125 | # This method is used as an exit handler. When a quit, cancel or interrupt is received, 126 | # this method forces FDR to finish processing the file it is working on before exiting. 127 | def clean_exit(stat, signal, frame): # pylint: disable=W0613 128 | """Graceful exit handler for SIGINT, SIGQUIT and SIGTERM""" 129 | stat.set_exit(True) 130 | return True 131 | 132 | 133 | def handle_file(path, key, file_object=None): 134 | """If configured, upload this file to our target bucket and remove it.""" 135 | # If we've defined a target bucket 136 | if FDR.target_bucket_name: 137 | if not file_object: 138 | # Open our local file (binary) 139 | with open(path, "rb") as data: 140 | # Perform the upload to the same key in our target bucket 141 | s3_target.upload_fileobj(data, FDR.target_bucket_name, key) 142 | logger.info("Uploaded file to path %s", key) 143 | # Only perform this step if configured to do so 144 | if FDR.remove_local_file: 145 | # Remove the file from the local file system 146 | os.remove(path) 147 | logger.info("Removed %s", path) 148 | # Remove the temporary folder from the local file system 149 | os.rmdir(os.path.dirname(path)) 150 | logger.info("Removed %s", os.path.dirname(path)) 151 | pure = pathlib.PurePath(path) 152 | # Remove the parent temporary folders if they exist 153 | os.rmdir(pure.parent.parent) 154 | logger.info("Removed %s", pure.parent.parent) 155 | if FDR.output_path not in pure.parent.parent.parent.name: 156 | os.rmdir(pure.parent.parent.parent) 157 | logger.info("Removed %s", pure.parent.parent.parent) 158 | else: 159 | s3_target.upload_fileobj(file_object, FDR.target_bucket_name, key) 160 | logger.info("Uploaded file to path %s", key) 161 | # We're done 162 | return True 163 | 164 | 165 | def download_message_files(msg): 166 | """Downloads the files from s3 referenced in msg and places them in output_path. 167 | 168 | download_message_files function will iterate through every file listed at msg['filePaths'], 169 | move it to our output_path, and then call handle_file. 170 | """ 171 | # For every file in our message 172 | for s3_file in msg["files"]: 173 | # Retrieve the bucket path for this file 174 | s3_path = s3_file["path"] 175 | if not FDR.in_memory_transfer_only: 176 | # Create a local path name for our destination file based off of the S3 path 177 | # Construct output path for this message's files 178 | msg_output_path = os.path.realpath( 179 | os.path.join(FDR.output_path, msg["pathPrefix"]) 180 | ) 181 | # Only write files to the specified output_path 182 | if ( 183 | os.path.commonpath([FDR.output_path, msg_output_path]) 184 | != FDR.output_path 185 | ): 186 | logger.info( 187 | f"Skipping {msg_output_path} to prevent writes outside of output path: {FDR.output_path}" 188 | ) 189 | continue 190 | # Ensure directory exists at output path 191 | if not os.path.exists(msg_output_path): 192 | # Create it if it doesn't 193 | os.makedirs(msg_output_path) 194 | local_path = os.path.realpath(os.path.join(FDR.output_path, s3_path)) 195 | # Only write files to the specified output_path 196 | if os.path.commonpath([FDR.output_path, local_path]) != FDR.output_path: 197 | logger.info( 198 | f"Skipping {local_path} to prevent writes outside of output path: {FDR.output_path}" 199 | ) 200 | continue 201 | if not os.path.exists(os.path.dirname(local_path)): 202 | # Handle fdr platform and time partitioned folders 203 | os.makedirs(os.path.dirname(local_path)) 204 | # Open our local file for binary write 205 | with open(local_path, "wb") as data: 206 | # Download the file from S3 into our opened local file 207 | s3.download_fileobj(msg["bucket"], s3_path, data) 208 | logger.info("Downloaded file to path %s", local_path) 209 | # Handle S3 upload if configured 210 | handle_file(local_path, s3_path, None) 211 | else: 212 | logger.info("Downloading file to memory") 213 | s3t = boto3.resource( 214 | "s3", 215 | region_name=FDR.region_name, 216 | aws_access_key_id=FDR.aws_key, 217 | aws_secret_access_key=FDR.aws_secret, 218 | ) 219 | bkt = s3t.Bucket(msg["bucket"]) 220 | obj = bkt.Object(s3_path) 221 | stream = io.BytesIO() 222 | obj.download_fileobj(stream) 223 | # Seek to the beginning of the stream before passing it to the upload handler 224 | stream.seek(0) 225 | handle_file(None, s3_path, stream) 226 | 227 | 228 | def consume_data_replicator(): 229 | """Consume from data replicator and track number of messages/files/bytes downloaded.""" 230 | # Tracking details 231 | msg_cnt = 0 232 | file_cnt = 0 233 | byte_cnt = 0 234 | 235 | # Continuously poll the queue for new messages. 236 | while not FDR.exiting: 237 | received = False 238 | # Receive messages from queue if any exist 239 | # (NOTE: receive_messages() only receives a few messages at a time, it does NOT exhaust the queue) 240 | for msg in queue.receive_messages(VisibilityTimeout=FDR.visibility_timeout): 241 | received = True 242 | # Increment our message counter 243 | msg_cnt += 1 244 | logger.info("Processing message %i [%s]", msg_cnt, msg.message_id) 245 | # Grab the actual message body 246 | body = json.loads(msg.body) 247 | # Download the file to our local file system and potentially upload it to S3 248 | download_message_files(body) 249 | # Increment our file count by using the fileCount value in our message 250 | file_cnt += body["fileCount"] 251 | # Increment our byte count by using the totalSize value in our message 252 | byte_cnt += body["totalSize"] 253 | logger.info("Removing message %i [%s] from queue", msg_cnt, msg.message_id) 254 | # Remove our message from the queue, if this is not performed in visibility_timeout seconds 255 | # this message will be restored to the queue for follow-up processing 256 | msg.delete() 257 | # Sleep until our next message iteration 258 | time.sleep(FDR.message_delay) 259 | 260 | logger.info( 261 | "Messages consumed: %i\tFile count: %i\tByte count: %i", 262 | msg_cnt, 263 | file_cnt, 264 | byte_cnt, 265 | ) 266 | if not received: 267 | logger.info( 268 | "No messages received, sleeping for %i seconds", FDR.queue_delay 269 | ) 270 | time.sleep(FDR.queue_delay) 271 | 272 | # We've requested an exit 273 | if FDR.exiting: 274 | # Clean exit 275 | logger.warning("Routine exit requested") 276 | sys.exit(0) 277 | else: 278 | # Something untoward has occurred 279 | logger.error("Unexpected exit occurred") 280 | sys.exit(1) 281 | 282 | 283 | # Start our main routine 284 | if __name__ == "__main__": 285 | # Configure our accepted command line parameters 286 | parser = argparse.ArgumentParser("Falcon Data Replicator") 287 | parser.add_argument( 288 | "-f", 289 | "--config_file", 290 | dest="config_file", 291 | help="Path to the configuration file", 292 | required=False, 293 | ) 294 | # Parse any parameters passed at runtime 295 | args = parser.parse_args() 296 | # If we were not provided a configuration file name 297 | if not args.config_file: 298 | # Use the default name / location provided in our repo 299 | CONFIG_FILE = "../falcon_data_replicator.ini" 300 | else: 301 | # Use the configuration file provided at runtime 302 | CONFIG_FILE = args.config_file 303 | # Read in our configuration parameters 304 | configuration = configparser.ConfigParser() 305 | configuration.read(CONFIG_FILE) 306 | # Create our connector 307 | FDR = FDRConnector(configuration) 308 | # Setup our root logger 309 | logging.basicConfig( 310 | level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s" 311 | ) 312 | # Create our FDR logger 313 | logger = logging.getLogger("FDR Connector") 314 | # Rotate log file handler 315 | RFH = RotatingFileHandler(FDR.log_file, maxBytes=20971520, backupCount=5) 316 | # Log file output format 317 | F_FORMAT = logging.Formatter("%(asctime)s %(name)s %(levelname)s %(message)s") 318 | # Set the log file output level to INFO 319 | RFH.setLevel(logging.INFO) 320 | # Add our log file formatter to the log file handler 321 | RFH.setFormatter(F_FORMAT) 322 | # Add our log file handler to our logger 323 | logger.addHandler(RFH) 324 | # Log our pre-startup event 325 | logger.info(" _____ ____ ____ _") 326 | logger.info("| ___| _ \\| _ \\ (.\\") 327 | logger.info("| |_ | | | | |_) | |/(\\") 328 | logger.info("| _| | |_| | _ < \\(\\\\") 329 | logger.info('|_| |____/|_| \\_\\ "^"`\\') 330 | logger.info("Process starting up") 331 | # Enable our graceful exit handler to allow uploads and artifact 332 | # cleanup to complete for SIGINT, SIGTERM and SIGQUIT signals. 333 | sig.signal(sig.SIGINT, partial(clean_exit, FDR)) 334 | sig.signal(sig.SIGTERM, partial(clean_exit, FDR)) 335 | sig.signal(sig.SIGQUIT, partial(clean_exit, FDR)) 336 | # Connect to our CrowdStrike provided SQS queue 337 | sqs = boto3.resource( 338 | "sqs", 339 | region_name=FDR.region_name, 340 | aws_access_key_id=FDR.aws_key, 341 | aws_secret_access_key=FDR.aws_secret, 342 | ) 343 | # Connect to our CrowdStrike provided S3 bucket 344 | s3 = boto3.client( 345 | "s3", 346 | region_name=FDR.region_name, 347 | aws_access_key_id=FDR.aws_key, 348 | aws_secret_access_key=FDR.aws_secret, 349 | ) 350 | # If we are doing S3 uploads 351 | if FDR.target_bucket_name and FDR.target_region_name: 352 | logger.info("Upload to AWS S3 enabled") 353 | # Connect to our target S3 bucket, uses the existing client configuration to connect (Not the CS provided ones) 354 | s3_target = boto3.client("s3", region_name=FDR.target_region_name) 355 | # Create our queue object for handling message traffic 356 | queue = sqs.Queue(url=FDR.queue_url) 357 | logger.info("Startup complete") 358 | # Start consuming the replicator feed 359 | consume_data_replicator() 360 | 361 | 362 | # . 363 | # Your data | _____________________________________________________ ___ 364 | # is here! | | _____ ________ _ __ | __ 365 | # \ _______| | / ___/______ _ _____/ / __/ /_____(_) /_____ | ___ 366 | # / _____ | | / /__/ __/ _ \ |/|/ / _ /\ \/ __/ __/ / '_/ -_) | 367 | # / /(__) || | \___/_/ \___/__,__/\_,_/___/\__/_/ /_/_/\_\\__/ | ___ 368 | # ________/ / |OO| || | | 369 | # | Hemi |-------|| | --= FALCON DATA REPLICATOR >> | ___ 370 | # (| | -.|| |_______________________ | ____ 371 | # | ____ \ ||_________||____________ | ____ ____ | 372 | # /| / __ \ |______|| / __ \ / __ \ | | / __ \ / __ \ |\ ___ 373 | # \|| / \ |_______________| / \ |_| / \ |__| |___________| / \ |__| / \|_|/ 374 | # | () | | () | | () | | () | | () | ____ 375 | # \__/ \__/ \__/ \__/ \__/ 376 | --------------------------------------------------------------------------------