├── .github └── workflows │ ├── publish.yml │ └── test.yml ├── .gitignore ├── .pylintrc ├── LICENSE ├── MANIFEST.in ├── README.md ├── account-data └── empty ├── bin └── cloudtracker ├── cloudtracker ├── __init__.py ├── cli.py ├── data │ ├── aws_api_list.txt │ └── cloudtrail_supported_actions.txt └── datasources │ ├── __init__.py │ ├── athena.py │ └── es.py ├── config.yaml.demo ├── docs └── elasticsearch.md ├── hindsight └── run │ ├── analysis │ ├── counter.cfg │ └── counter.lua │ ├── input │ ├── file.cfg │ ├── file.lua │ └── json.lua │ └── output │ ├── elasticsearch_bulk_api.cfg │ └── elasticsearch_bulk_api.lua ├── requirements-dev.txt ├── requirements.txt ├── setup.cfg ├── setup.py ├── tasks.py └── tests ├── scripts └── pylint.sh └── unit └── test_cloudtracker.py /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload CloudTracker to PyPI 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python 18 | uses: actions/setup-python@v1 19 | with: 20 | python-version: '3.x' 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install setuptools wheel twine 25 | - name: Build and publish 26 | env: 27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 29 | run: | 30 | python setup.py sdist bdist_wheel 31 | twine upload dist/* 32 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | # .github/workflows/test.yml 2 | 3 | name: Test 4 | 5 | on: [push, pull_request] 6 | 7 | jobs: 8 | test: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | 13 | - name: Setup Python 14 | uses: actions/setup-python@v1 15 | with: 16 | python-version: 3.7 17 | 18 | - name: Install dependencies 19 | run: | 20 | pip install -r requirements.txt 21 | pip install -r requirements-dev.txt 22 | 23 | # - run: invoke build.install-package 24 | - run: invoke test.help 25 | # - run: invoke test.security 26 | - run: invoke unit.nose 27 | # - run: invoke test.lint 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.pyc 3 | *.egg-info 4 | .eggs 5 | venv/ 6 | .coverage 7 | htmlcov/ 8 | config.yaml 9 | account-data/ 10 | my_account_iam.json 11 | 12 | # IDEs 13 | .idea 14 | .vscode 15 | 16 | # Working directory 17 | tmp/* 18 | 19 | ########## Python ########## 20 | ### From gitignore.io 21 | # Byte-compiled / optimized / DLL files 22 | __pycache__/ 23 | *.py[cod] 24 | *$py.class 25 | 26 | # C extensions 27 | *.so 28 | 29 | # Distribution / packaging 30 | .Python 31 | build/ 32 | develop-eggs/ 33 | dist/ 34 | downloads/ 35 | eggs/ 36 | .eggs/ 37 | lib/ 38 | lib64/ 39 | parts/ 40 | sdist/ 41 | var/ 42 | wheels/ 43 | pip-wheel-metadata/ 44 | share/python-wheels/ 45 | *.egg-info/ 46 | .installed.cfg 47 | *.egg 48 | MANIFEST 49 | 50 | # PyInstaller 51 | # Usually these files are written by a python script from a template 52 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 53 | *.manifest 54 | *.spec 55 | 56 | # Installer logs 57 | pip-log.txt 58 | pip-delete-this-directory.txt 59 | 60 | # Unit test / coverage reports 61 | htmlcov/ 62 | .tox/ 63 | .nox/ 64 | .coverage 65 | .coverage.* 66 | .cache 67 | nosetests.xml 68 | coverage.xml 69 | *.cover 70 | .hypothesis/ 71 | .pytest_cache/ 72 | 73 | # Translations 74 | *.mo 75 | *.pot 76 | 77 | # Scrapy stuff: 78 | .scrapy 79 | 80 | # Sphinx documentation 81 | docs/_build/ 82 | 83 | # PyBuilder 84 | target/ 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # celery beat schedule file 97 | celerybeat-schedule 98 | 99 | # SageMath parsed files 100 | *.sage.py 101 | 102 | # Spyder project settings 103 | .spyderproject 104 | .spyproject 105 | 106 | # Rope project settings 107 | .ropeproject 108 | 109 | # Mr Developer 110 | .mr.developer.cfg 111 | .project 112 | .pydevproject 113 | 114 | # mkdocs documentation 115 | /site 116 | 117 | # mypy 118 | .mypy_cache/ 119 | .dmypy.json 120 | dmypy.json 121 | 122 | # Pyre type checker 123 | .pyre/ -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # A comma-separated list of package or module names from where C extensions may 4 | # be loaded. Extensions are loading into the active Python interpreter and may 5 | # run arbitrary code 6 | extension-pkg-whitelist= 7 | 8 | # Add files or directories to the blacklist. They should be base names, not 9 | # paths. 10 | ignore=CVS 11 | 12 | # Add files or directories matching the regex patterns to the blacklist. The 13 | # regex matches against base names, not paths. 14 | ignore-patterns= 15 | 16 | # Python code to execute, usually for sys.path manipulation such as 17 | # pygtk.require(). 18 | #init-hook= 19 | 20 | # Use multiple processes to speed up Pylint. 21 | jobs=4 22 | 23 | # List of plugins (as comma separated values of python modules names) to load, 24 | # usually to register additional checkers. 25 | load-plugins= 26 | 27 | # Pickle collected data for later comparisons. 28 | persistent=yes 29 | 30 | # Specify a configuration file. 31 | #rcfile= 32 | 33 | # Allow loading of arbitrary C extensions. Extensions are imported into the 34 | # active Python interpreter and may run arbitrary code. 35 | unsafe-load-any-extension=no 36 | 37 | 38 | [MESSAGES CONTROL] 39 | 40 | # Only show warnings with the listed confidence levels. Leave empty to show 41 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED 42 | confidence= 43 | 44 | # Disable the message, report, category or checker with the given id(s). You 45 | # can either give multiple identifiers separated by comma (,) or put this 46 | # option multiple times (only on the command line, not in the configuration 47 | # file where it should appear only once).You can also use "--disable=all" to 48 | # disable everything first and then reenable specific checks. For example, if 49 | # you want to run only the similarities checker, you can use "--disable=all 50 | # --enable=similarities". If you want to run only the classes checker, but have 51 | # no Warning level messages displayed, use"--disable=all --enable=classes 52 | # --disable=W" 53 | disable=fixme,I0011,E1102,R0912,C0103,C0111,R1702,R0915,C0325,R0914,W0703,R1705,W0603,W0406 54 | 55 | # Enable the message, report, category or checker with the given id(s). You can 56 | # either give multiple identifier separated by comma (,) or put this option 57 | # multiple time (only on the command line, not in the configuration file where 58 | # it should appear only once). See also the "--disable" option for examples. 59 | enable= 60 | 61 | 62 | [REPORTS] 63 | 64 | # Python expression which should return a note less than 10 (10 is the highest 65 | # note). You have access to the variables errors warning, statement which 66 | # respectively contain the number of errors / warnings messages and the total 67 | # number of statements analyzed. This is used by the global evaluation report 68 | # (RP0004). 69 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 70 | 71 | # Template used to display messages. This is a python new-style format string 72 | # used to format the message information. See doc for all details 73 | #msg-template= 74 | 75 | # Set the output format. Available formats are text, parseable, colorized, json 76 | # and msvs (visual studio).You can also give a reporter class, eg 77 | # mypackage.mymodule.MyReporterClass. 78 | output-format=text 79 | 80 | # Tells whether to display a full report or only the messages 81 | reports=no 82 | 83 | # Activate the evaluation score. 84 | score=yes 85 | 86 | 87 | [REFACTORING] 88 | 89 | # Maximum number of nested blocks for function / method body 90 | max-nested-blocks=5 91 | 92 | 93 | [BASIC] 94 | 95 | # Naming hint for argument names 96 | argument-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ 97 | 98 | # Regular expression matching correct argument names 99 | argument-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ 100 | 101 | # Naming hint for attribute names 102 | attr-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ 103 | 104 | # Regular expression matching correct attribute names 105 | attr-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ 106 | 107 | # Bad variable names which should always be refused, separated by a comma 108 | bad-names=foo,bar,baz,toto,tutu,tata 109 | 110 | # Naming hint for class attribute names 111 | class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 112 | 113 | # Regular expression matching correct class attribute names 114 | class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 115 | 116 | # Naming hint for class names 117 | class-name-hint=[A-Z_][a-zA-Z0-9]+$ 118 | 119 | # Regular expression matching correct class names 120 | class-rgx=[A-Z_][a-zA-Z0-9]+$ 121 | 122 | # Naming hint for constant names 123 | const-name-hint=(([a-zA-Z_][a-zA-Z0-9_]*)|(__.*__))$ 124 | 125 | # Regular expression matching correct constant names 126 | const-rgx=(([a-zA-Z_][a-zA-Z0-9_]*)|(__.*__))$ 127 | 128 | # Minimum line length for functions/classes that require docstrings, shorter 129 | # ones are exempt. 130 | docstring-min-length=2 131 | 132 | # Naming hint for function names 133 | function-name-hint=(([a-z][a-z0-9_]{2,50})|(_[a-z0-9_]*))$ 134 | 135 | # Regular expression matching correct function names 136 | function-rgx=(([a-z][a-z0-9_]{2,50})|(_[a-z0-9_]*))$ 137 | 138 | # Good variable names which should always be accepted, separated by a comma 139 | good-names=e,f,i,j,k,ex,Run,_ 140 | 141 | # Include a hint for the correct naming format with invalid-name 142 | include-naming-hint=no 143 | 144 | # Naming hint for inline iteration names 145 | inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ 146 | 147 | # Regular expression matching correct inline iteration names 148 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ 149 | 150 | # Naming hint for method names 151 | method-name-hint=(([a-z][a-z0-9_]{2,50})|(_[a-z0-9_]*))$ 152 | 153 | # Regular expression matching correct method names 154 | method-rgx=(([a-z][a-z0-9_]{2,50})|(_[a-z0-9_]*))$ 155 | 156 | # Naming hint for module names 157 | module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 158 | 159 | # Regular expression matching correct module names 160 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 161 | 162 | # Colon-delimited sets of names that determine each other's naming style when 163 | # the name regexes allow several styles. 164 | name-group= 165 | 166 | # Regular expression which should only match function or class names that do 167 | # not require a docstring. 168 | no-docstring-rgx=^_ 169 | 170 | # List of decorators that produce properties, such as abc.abstractproperty. Add 171 | # to this list to register other decorators that produce valid properties. 172 | property-classes=abc.abstractproperty 173 | 174 | # Naming hint for variable names 175 | variable-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ 176 | 177 | # Regular expression matching correct variable names 178 | variable-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ 179 | 180 | 181 | [FORMAT] 182 | 183 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 184 | expected-line-ending-format= 185 | 186 | # Regexp for a line that is allowed to be longer than the limit. 187 | ignore-long-lines=^\s*(# )??$ 188 | 189 | # Number of spaces of indent required inside a hanging or continued line. 190 | indent-after-paren=4 191 | 192 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 193 | # tab). 194 | indent-string=' ' 195 | 196 | # Maximum number of characters on a single line. 197 | max-line-length=120 198 | 199 | # Maximum number of lines in a module 200 | max-module-lines=1000 201 | 202 | # List of optional constructs for which whitespace checking is disabled. `dict- 203 | # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. 204 | # `trailing-comma` allows a space between comma and closing bracket: (a, ). 205 | # `empty-line` allows space-only lines. 206 | no-space-check=trailing-comma,dict-separator 207 | 208 | # Allow the body of a class to be on the same line as the declaration if body 209 | # contains single statement. 210 | single-line-class-stmt=no 211 | 212 | # Allow the body of an if to be on the same line as the test if there is no 213 | # else. 214 | single-line-if-stmt=no 215 | 216 | 217 | [LOGGING] 218 | 219 | # Logging modules to check that the string format arguments are in logging 220 | # function parameter format 221 | logging-modules=logging 222 | 223 | 224 | [MISCELLANEOUS] 225 | 226 | # List of note tags to take in consideration, separated by a comma. 227 | notes=FIXME,XXX,TODO 228 | 229 | 230 | [SIMILARITIES] 231 | 232 | # Ignore comments when computing similarities. 233 | ignore-comments=yes 234 | 235 | # Ignore docstrings when computing similarities. 236 | ignore-docstrings=yes 237 | 238 | # Ignore imports when computing similarities. 239 | ignore-imports=no 240 | 241 | # Minimum lines number of a similarity. 242 | min-similarity-lines=4 243 | 244 | 245 | [SPELLING] 246 | 247 | # Spelling dictionary name. Available dictionaries: none. To make it working 248 | # install python-enchant package. 249 | spelling-dict= 250 | 251 | # List of comma separated words that should not be checked. 252 | spelling-ignore-words= 253 | 254 | # A path to a file that contains private dictionary; one word per line. 255 | spelling-private-dict-file= 256 | 257 | # Tells whether to store unknown words to indicated private dictionary in 258 | # --spelling-private-dict-file option instead of raising a message. 259 | spelling-store-unknown-words=no 260 | 261 | 262 | [TYPECHECK] 263 | 264 | # List of decorators that produce context managers, such as 265 | # contextlib.contextmanager. Add to this list to register other decorators that 266 | # produce valid context managers. 267 | contextmanager-decorators=contextlib.contextmanager 268 | 269 | # List of members which are set dynamically and missed by pylint inference 270 | # system, and so shouldn't trigger E1101 when accessed. Python regular 271 | # expressions are accepted. 272 | generated-members= 273 | 274 | # Tells whether missing members accessed in mixin class should be ignored. A 275 | # mixin class is detected if its name ends with "mixin" (case insensitive). 276 | ignore-mixin-members=yes 277 | 278 | # This flag controls whether pylint should warn about no-member and similar 279 | # checks whenever an opaque object is returned when inferring. The inference 280 | # can return multiple potential results while evaluating a Python object, but 281 | # some branches might not be evaluated, which results in partial inference. In 282 | # that case, it might be useful to still emit no-member and other checks for 283 | # the rest of the inferred objects. 284 | ignore-on-opaque-inference=yes 285 | 286 | # List of class names for which member attributes should not be checked (useful 287 | # for classes with dynamically set attributes). This supports the use of 288 | # qualified names. 289 | ignored-classes=optparse.Values,thread._local,_thread._local 290 | 291 | # List of module names for which member attributes should not be checked 292 | # (useful for modules/projects where namespaces are manipulated during runtime 293 | # and thus existing member attributes cannot be deduced by static analysis. It 294 | # supports qualified module names, as well as Unix pattern matching. 295 | ignored-modules= 296 | 297 | # Show a hint with possible names when a member name was not found. The aspect 298 | # of finding the hint is based on edit distance. 299 | missing-member-hint=yes 300 | 301 | # The minimum edit distance a name should have in order to be considered a 302 | # similar match for a missing member name. 303 | missing-member-hint-distance=1 304 | 305 | # The total number of similar names that should be taken in consideration when 306 | # showing a hint for a missing member. 307 | missing-member-max-choices=1 308 | 309 | 310 | [VARIABLES] 311 | 312 | # List of additional names supposed to be defined in builtins. Remember that 313 | # you should avoid to define new builtins when possible. 314 | additional-builtins= 315 | 316 | # Tells whether unused global variables should be treated as a violation. 317 | allow-global-unused-variables=yes 318 | 319 | # List of strings which can identify a callback function by name. A callback 320 | # name must start or end with one of those strings. 321 | callbacks=cb_,_cb 322 | 323 | # A regular expression matching the name of dummy variables (i.e. expectedly 324 | # not used). 325 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ 326 | 327 | # Argument names that match this expression will be ignored. Default to name 328 | # with leading underscore 329 | ignored-argument-names=_.*|^ignored_|^unused_ 330 | 331 | # Tells whether we should check for unused import in __init__ files. 332 | init-import=no 333 | 334 | # List of qualified module names which can have objects that can redefine 335 | # builtins. 336 | redefining-builtins-modules=six.moves,future.builtins 337 | 338 | 339 | [CLASSES] 340 | 341 | # List of method names used to declare (i.e. assign) instance attributes. 342 | defining-attr-methods=__init__,__new__,setUp 343 | 344 | # List of member names, which should be excluded from the protected access 345 | # warning. 346 | exclude-protected=_asdict,_fields,_replace,_source,_make 347 | 348 | # List of valid names for the first argument in a class method. 349 | valid-classmethod-first-arg=cls 350 | 351 | # List of valid names for the first argument in a metaclass class method. 352 | valid-metaclass-classmethod-first-arg=mcs 353 | 354 | 355 | [DESIGN] 356 | 357 | # Maximum number of arguments for function / method 358 | max-args=5 359 | 360 | # Maximum number of attributes for a class (see R0902). 361 | max-attributes=10 362 | 363 | # Maximum number of boolean expressions in a if statement 364 | max-bool-expr=5 365 | 366 | # Maximum number of branch for function / method body 367 | max-branches=25 368 | 369 | # Maximum number of locals for function / method body 370 | max-locals=25 371 | 372 | # Maximum number of parents for a class (see R0901). 373 | max-parents=7 374 | 375 | # Maximum number of public methods for a class (see R0904). 376 | max-public-methods=30 377 | 378 | # Maximum number of return / yield for function / method body 379 | max-returns=10 380 | 381 | # Maximum number of statements in function / method body 382 | max-statements=50 383 | 384 | # Minimum number of public methods for a class (see R0903). 385 | min-public-methods=0 386 | 387 | 388 | [IMPORTS] 389 | 390 | # Allow wildcard imports from modules that define __all__. 391 | allow-wildcard-with-all=no 392 | 393 | # Analyse import fallback blocks. This can be used to support both Python 2 and 394 | # 3 compatible code, which means that the block might have code that exists 395 | # only in one or another interpreter, leading to false positives when analysed. 396 | analyse-fallback-blocks=no 397 | 398 | # Deprecated modules which should not be used, separated by a comma 399 | deprecated-modules=optparse,tkinter.tix 400 | 401 | # Create a graph of external dependencies in the given file (report RP0402 must 402 | # not be disabled) 403 | ext-import-graph= 404 | 405 | # Create a graph of every (i.e. internal and external) dependencies in the 406 | # given file (report RP0402 must not be disabled) 407 | import-graph= 408 | 409 | # Create a graph of internal dependencies in the given file (report RP0402 must 410 | # not be disabled) 411 | int-import-graph= 412 | 413 | # Force import order to recognize a module as part of the standard 414 | # compatibility libraries. 415 | known-standard-library= 416 | 417 | # Force import order to recognize a module as part of a third party library. 418 | known-third-party=enchant 419 | 420 | 421 | [EXCEPTIONS] 422 | 423 | # Exceptions that will emit a warning when being caught. Defaults to 424 | # "Exception" 425 | overgeneral-exceptions=Exception -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 Duo Security 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | CloudTracker helps you find over-privileged IAM users and roles by comparing CloudTrail logs with current IAM policies. 2 | 3 | *Intro post: https://duo.com/blog/introducing-cloudtracker-an-aws-cloudtrail-log-analyzer* 4 | 5 | 6 | This document will describe the setup that uses Athena and how to use the tool. CloudTracker no longer requires ElasticSearch, but if you'd like to use CloudTracker with ElasticSearch please see [ElasticSearch installation and ingestion](docs/elasticsearch.md). 7 | 8 | Setup 9 | ===== 10 | 11 | ### Step 1: Setup CloudTracker 12 | 13 | ``` 14 | python3 -m venv ./venv && source venv/bin/activate 15 | pip install cloudtracker 16 | ``` 17 | 18 | Note: To install with ElasticSearch support, see the [ElasticSearch docs](docs/elasticsearch.md). 19 | 20 | ### Step 2: Download your IAM data 21 | Download a copy of the IAM data of an account using the AWS CLI: 22 | 23 | ``` 24 | mkdir -p account-data 25 | aws iam get-account-authorization-details > account-data/demo_iam.json 26 | ``` 27 | 28 | ### Step 3: Configure CloudTracker 29 | 30 | Create a `config.yaml` file with contents similar to: 31 | 32 | ``` 33 | athena: 34 | s3_bucket: my_log_bucket 35 | path: my_prefix 36 | accounts: 37 | - name: demo 38 | id: 111111111111 39 | iam: account-data/demo_iam.json 40 | ``` 41 | 42 | This assumes your CloudTrail logs are at `s3://my_log_bucket/my_prefix/AWSLogs/111111111111/CloudTrail/` 43 | Set `my_prefix` to `''` if you have no prefix. 44 | 45 | If your CloudTrail is managed through an organisation you can configure this in the `athena` section: 46 | 47 | ``` 48 | athena: 49 | s3_bucket: my_log_bucket 50 | path: my_prefix 51 | org_id: o-myid123 52 | ``` 53 | 54 | ### Step 4: Run CloudTracker 55 | 56 | CloudTracker uses boto and assumes it has access to AWS credentials in environment variables, which can be done by using [aws-vault](https://github.com/99designs/aws-vault). 57 | 58 | You will need the privilege `arn:aws:iam::aws:policy/AmazonAthenaFullAccess` and also `s3:GetObject` and `s3:ListBucket` for the S3 bucket containing the CloudTrail logs. 59 | 60 | Once you're running in an aws-vault environment (or otherwise have your environment variables setup for an AWS session), you can run: 61 | 62 | ``` 63 | cloudtracker --account demo --list users 64 | ``` 65 | 66 | This will perform all of the initial setup which takes about a minute. Subsequent calls will be faster. 67 | 68 | 69 | Clean-up 70 | -------- 71 | 72 | CloudTracker does not currently clean up after itself, so query results are left behind in the default bucket `aws-athena-query-results-ACCOUNT_ID-REGION`. 73 | 74 | If you wanted to get rid of all signs of CloudTracker, remove the query results from that bucket and in Athena run `DROP DATABASE cloudtracker CASCADE` 75 | 76 | 77 | Example usage 78 | ============= 79 | 80 | Listing actors 81 | -------------- 82 | CloudTracker provides command line options to list the users and roles in an account. For example: 83 | ``` 84 | $ cloudtracker --account demo --list users --start 2018-01-01 85 | alice 86 | - bob 87 | charlie 88 | ``` 89 | 90 | In this example, a list of users was obtained from the the IAM information and then from CloudTrail logs it was found that the user "bob" has no record of being used since January 1, 2018, and therefore CloudTracker is advising the user's removal by prefixing the user with a "-". 91 | 92 | Note that not all AWS activities are stored in CloudTrail logs. Specifically, data level events such as reading and writing S3 objects, putting CloudWatch metrics, and more. Therefore, it is possible that "bob" has been active but only with actions that are not recorded in CloudTrail. Note also that you may have users or roles that are inactive that you may still wish to keep around. For example, you may have a role that is only used once a year during an annual task. You should therefore use this output as guidance, but not always as instructions. 93 | 94 | You can also list roles. 95 | 96 | ``` 97 | $ cloudtracker --account demo --list roles --start 2018-01-01 98 | admin 99 | ``` 100 | 101 | Listing actions of actors 102 | ------------------------- 103 | The main purpose of CloudTracker is to look at the API calls made by actors (users and roles). Let's assume `alice` has `SecurityAditor` privileges for her user which grants her the ability to `List` and `Describe` metadata for resources, plus the ability to `AsssumeRole` to the `admin` role. We can see her actions: 104 | 105 | ``` 106 | cloudtracker --account demo --user alice 107 | ... 108 | cloudwatch:describealarmhistory 109 | cloudwatch:describealarms 110 | - cloudwatch:describealarmsformetric 111 | - cloudwatch:getdashboard 112 | ? cloudwatch:getmetricdata 113 | ... 114 | + s3:createbucket 115 | ... 116 | ``` 117 | 118 | A lot of actions will be shown, many that are unused, as there are over a thousand AWS APIs, and most people tend to only use a few. In the snippet above, we can see that she has called `DescribeAlarmHistory` and `DescribeAlarms`. She has never called `DescribeAlarmsForMetric` or `GetDashboard` even though she has those privileges, and it is unknown if she has called `GetMetricData` as that call is not recorded in CloudTrail. Then further down I notice there is a call to `CreateBucket` that she made, but does not have privileges for. This can happen if the actor previously had privileges for an action and used them, but those privileges were taken away. Errors are filtered out, so if the actor made a call but was denied, it would not show up as used. 119 | 120 | As there may be a lot of unused or unknown actions, we can filter things down: 121 | ``` 122 | cloudtracker --account demo --user alice --show-used 123 | Getting info on alice, user created 2017-09-02T18:02:14Z 124 | cloudwatch:describealarmhistory 125 | cloudwatch:describealarms 126 | + s3:createbucket 127 | sts:assumerole 128 | ``` 129 | 130 | We can do the same thing for roles. For example: 131 | ``` 132 | cloudtracker --account demo --role admin --show-used 133 | Getting info for role admin 134 | s3:createbucket 135 | iam:createuser 136 | ``` 137 | 138 | ### Output explanation 139 | CloudTracker shows a diff of the privileges granted vs used. The symbols mean the following: 140 | 141 | - ` ` No symbol means this privilege is used, so leave it as is. 142 | - `-` A minus sign means the privilege was granted, but not used, so you should remove it. 143 | - `?` A question mark means the privilige was granted, but it is unknown if it was used because it is not recorded in CloudTrail. 144 | - `+` A plus sign means the privilege was not granted, but was used. The only way this is possible is if the privilege was previously granted, used, and then removed, so you may want to add that privilege back. 145 | 146 | 147 | Advanced functionality (only supported with ElasticSearch currently) 148 | ---------------------- 149 | This functionality is not yet supported with the Athena configuration of CloudTracker. 150 | 151 | You may know that `alice` can assume to the `admin` role, so let's look at what she did there using the `--destrole` argument: 152 | ``` 153 | cloudtracker --account demo --user alice --destrole admin --show-used 154 | Getting info on alice, user created 2017-09-02T18:02:14Z 155 | Getting info for AssumeRole into admin 156 | s3:createbucket 157 | iam:createuser 158 | ``` 159 | 160 | You may also know that `charlie` can assume to the `admin` role, so let's look at what he did there: 161 | ``` 162 | cloudtracker --account demo --user charlie --destrole admin --show-used 163 | Getting info on charlie, user created 2017-10-01T01:01:01Z 164 | Getting info for AssumeRole into admin 165 | s3:createbucket 166 | ``` 167 | 168 | In this example we can see that `charlie` has only ever created an S3 bucket as `admin`, so we may want to remove `charlie` from being able to assume this role or create another role that does not have the ability to create IAM users which we saw `alice` use. This is the key feature of CloudTracker as identifying which users are actually making use of the roles they can assume into, and the actions they are using there, is difficult without a tool like CloudTracker. 169 | 170 | ### Working with multiple accounts 171 | 172 | Amazon has advocated the use of multiple AWS accounts in much of their recent guidance. This helps reduce the blast radius of incidents, among other benefits. Once you start using multiple accounts though, you will find you may need to rethink how you are accessing all these accounts. One way of working with multiple accounts will have users assuming roles into different accounts. We can analyze the role assumptions of users into a different account the same way we did previously for a single account, except this time you need to ensure that you have CloudTrail logs from both accounts of interest are loaded into ElasticSearch. 173 | 174 | 175 | ``` 176 | cloudtracker --account demo --user charlie --destaccount backup --destrole admin --show-used 177 | Getting info on charlie, user created 2017-10-01T01:01:01Z 178 | Getting info for AssumeRole into admin 179 | s3:createbucket 180 | ``` 181 | 182 | In this example, we used the `--destaccount` option to specify the destination account. 183 | 184 | 185 | Data files 186 | ========== 187 | CloudTracker has two long text files that it uses to know what actions exist. 188 | 189 | aws_actions.txt 190 | --------------- 191 | This file contains all possible AWS API calls that can be made. One use of this is for identifying privileges granted by an IAM policy when a regex has been used, such as expanding `s3:*`. 192 | 193 | This file was created by running: 194 | ``` 195 | git clone --depth 1 -b master https://github.com/boto/botocore.git 196 | find botocore/botocore/data -name *.json | xargs cat | jq -r 'select(.operations != null) as $parent | .operations | keys | .[] | $parent.metadata.endpointPrefix +":"+.' | sort | uniq > aws_actions.txt 197 | ``` 198 | 199 | cloudtrail_supported_events.txt 200 | ------------------------------- 201 | This file contains the AWS API calls that are recorded in CloudTrail logs. This is used to identify when the status of a privilege is "unknown" (ie. not known whether it has been used or not). 202 | 203 | This file was creating by copying aws_actions.txt and removing events manually based on the CloudTrail user guide (https://docs.aws.amazon.com/awscloudtrail/latest/userguide/awscloudtrail-ug.pdf) in the section "CloudTrail Supported Services" and following the links to the various services and reading through what is and isn't supported. 204 | 205 | -------------------------------------------------------------------------------- /account-data/empty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duo-labs/cloudtracker/822ef553266aca2b1d54fc44e09c230f6df77a8a/account-data/empty -------------------------------------------------------------------------------- /bin/cloudtracker: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from cloudtracker.cli import main 3 | main() 4 | -------------------------------------------------------------------------------- /cloudtracker/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2018 Duo Security 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 5 | following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following 8 | disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the 11 | following disclaimer in the documentation and/or other materials provided with the distribution. 12 | 13 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote 14 | products derived from this software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 17 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 21 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 22 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | --------------------------------------------------------------------------- 24 | """ 25 | __version__ = "2.1.5" 26 | 27 | import json 28 | import logging 29 | import pkg_resources 30 | import re 31 | 32 | from colors import color 33 | import jmespath 34 | 35 | cloudtrail_supported_actions = None 36 | 37 | logging.basicConfig(level=logging.INFO, format="%(levelname)-8s %(message)s") 38 | 39 | # Translate CloudTrail name -> IAM name 40 | # Pulled from: http://bit.ly/2txbx1L 41 | # but some of the names there seem reversed 42 | SERVICE_RENAMES = { 43 | "monitoring": "cloudwatch", 44 | "email": "ses", 45 | } 46 | 47 | # Translate IAM name -> Cloudtrail name (SOAP API name) 48 | # Pulled from https://docs.aws.amazon.com/AmazonS3/latest/dev/cloudtrail-logging.html 49 | # I think S3 is the only service where IAM names are different than the API calls. 50 | EVENT_RENAMES = { 51 | "s3:listallmybuckets": "s3:listbuckets", 52 | "s3:getbucketaccesscontrolpolicy": "s3:getbucketacl", 53 | "s3:setbucketaccesscontrolpolicy": "s3:putbucketacl", 54 | "s3:getbucketloggingstatus": "s3:getbucketlogging", 55 | "s3:setbucketloggingstatus": "s3:putbucketlogging", 56 | } 57 | 58 | # List of actions seen in CloudTrail logs for which no IAM policies exist. 59 | # These are allowed by default. 60 | NO_IAM = { 61 | "sts:getcalleridentity": True, 62 | "sts:getsessiontoken": True, 63 | "signin:consolelogin": True, 64 | "signin:checkmfa": True, 65 | "signin:exitrole": True, 66 | "signin:renewrole": True, 67 | "signin:switchrole": True, 68 | } 69 | 70 | 71 | class Privileges(object): 72 | """Keep track of privileges an actor has been granted""" 73 | 74 | stmts = None 75 | roles = None 76 | aws_api_list = None 77 | 78 | def __init__(self, aws_api_list): 79 | self.stmts = [] 80 | self.roles = [] 81 | self.aws_api_list = aws_api_list 82 | 83 | def add_stmt(self, stmt): 84 | """Adds a statement from an IAM policy""" 85 | if "Action" not in stmt: 86 | # TODO Implement NotAction 87 | return 88 | self.stmts.append(stmt) 89 | 90 | def get_actions_from_statement(self, stmt): 91 | """Figures out what API calls have been granted from a statement""" 92 | actions = {} 93 | 94 | for action in make_list(stmt["Action"]): 95 | # Normalize it 96 | action = action.lower() 97 | # Convert it's globbing to a regex 98 | action = "^" + action.replace("*", ".*") + "$" 99 | 100 | for possible_action in self.aws_api_list: 101 | for iam_name, cloudtrail_name in EVENT_RENAMES.items(): 102 | if possible_action == cloudtrail_name: 103 | possible_action = iam_name 104 | if re.match(action, possible_action): 105 | actions[possible_action] = True 106 | 107 | return actions 108 | 109 | def determine_allowed(self): 110 | """After statements have been added from IAM policiies, find all the allowed API calls""" 111 | actions = {} 112 | 113 | # Look at alloweds first 114 | for stmt in self.stmts: 115 | if stmt["Effect"] == "Allow": 116 | stmt_actions = self.get_actions_from_statement(stmt) 117 | for action in stmt_actions: 118 | if action not in actions: 119 | actions[action] = [stmt] 120 | else: 121 | actions[action].append(stmt) 122 | 123 | # Look at denied 124 | for stmt in self.stmts: 125 | if ( 126 | stmt["Effect"] == "Deny" 127 | and "*" in make_list(stmt.get("Resource", None)) 128 | and stmt.get("Condition", None) is None 129 | ): 130 | 131 | stmt_actions = self.get_actions_from_statement(stmt) 132 | for action in stmt_actions: 133 | if action in actions: 134 | del actions[action] 135 | 136 | return list(actions) 137 | 138 | 139 | def make_list(obj): 140 | """Convert an object to a list if it is not already""" 141 | if isinstance(obj, list): 142 | return obj 143 | return [obj] 144 | 145 | 146 | def normalize_api_call(service, eventName): 147 | """Translate API calls to a common representation""" 148 | service = service.lower() 149 | eventName = eventName.lower() 150 | 151 | # Remove the dates from event names, such as createdistribution2015_07_27 152 | eventName = eventName.split("20")[0] 153 | 154 | # Rename the service 155 | if service in SERVICE_RENAMES: 156 | service = SERVICE_RENAMES[service] 157 | 158 | return "{}:{}".format(service, eventName) 159 | 160 | 161 | def get_account_iam(account): 162 | """Given account data from the config file, open the IAM file for the account""" 163 | return json.load(open(account["iam"])) 164 | 165 | 166 | def get_allowed_users(account_iam): 167 | """Return all the users in an IAM file""" 168 | return jmespath.search("UserDetailList[].UserName", account_iam) 169 | 170 | 171 | def get_allowed_roles(account_iam): 172 | """Return all the roles in an IAM file""" 173 | return jmespath.search("RoleDetailList[].RoleName", account_iam) 174 | 175 | 176 | def print_actor_diff(performed_actors, allowed_actors, use_color): 177 | """ 178 | Given a list of actors that have performed actions, and a list that exist in the account, 179 | print the actors and whether they are still active. 180 | """ 181 | PERFORMED_AND_ALLOWED = 1 182 | PERFORMED_BUT_NOT_ALLOWED = 2 183 | ALLOWED_BUT_NOT_PERFORMED = 3 184 | 185 | actors = {} 186 | for actor in performed_actors: 187 | if actor in allowed_actors: 188 | actors[actor] = PERFORMED_AND_ALLOWED 189 | else: 190 | actors[actor] = PERFORMED_BUT_NOT_ALLOWED 191 | 192 | for actor in allowed_actors: 193 | if actor not in actors: 194 | actors[actor] = ALLOWED_BUT_NOT_PERFORMED 195 | 196 | for actor in sorted(actors.keys()): 197 | if actors[actor] == PERFORMED_AND_ALLOWED: 198 | colored_print(" {}".format(actor), use_color, "white") 199 | elif actors[actor] == PERFORMED_BUT_NOT_ALLOWED: 200 | # Don't show users that existed but have since been deleted 201 | continue 202 | elif actors[actor] == ALLOWED_BUT_NOT_PERFORMED: 203 | colored_print("- {}".format(actor), use_color, "red") 204 | else: 205 | raise Exception("Unknown constant") 206 | 207 | 208 | def get_user_iam(username, account_iam): 209 | """Given the IAM of an account, and a username, return the IAM data for the user""" 210 | user_iam = jmespath.search( 211 | "UserDetailList[] | [?UserName == `{}`] | [0]".format(username), account_iam 212 | ) 213 | if user_iam is None: 214 | exit("ERROR: Unknown user named {}".format(username)) 215 | return user_iam 216 | 217 | 218 | def get_role_iam(rolename, account_iam): 219 | """Given the IAM of an account, and a role name, return the IAM data for the role""" 220 | role_iam = jmespath.search( 221 | "RoleDetailList[] | [?RoleName == `{}`] | [0]".format(rolename), account_iam 222 | ) 223 | if role_iam is None: 224 | raise Exception("Unknown role named {}".format(rolename)) 225 | return role_iam 226 | 227 | 228 | def get_user_allowed_actions(aws_api_list, user_iam, account_iam): 229 | """Return the privileges granted to a user by IAM""" 230 | groups = user_iam["GroupList"] 231 | managed_policies = user_iam["AttachedManagedPolicies"] 232 | 233 | privileges = Privileges(aws_api_list) 234 | 235 | # Get permissions from groups 236 | for group in groups: 237 | group_iam = jmespath.search( 238 | "GroupDetailList[] | [?GroupName == `{}`] | [0]".format(group), account_iam 239 | ) 240 | if group_iam is None: 241 | continue 242 | # Get privileges from managed policies attached to the group 243 | for managed_policy in group_iam["AttachedManagedPolicies"]: 244 | policy_filter = "Policies[?Arn == `{}`].PolicyVersionList[?IsDefaultVersion == true] | [0][0].Document" 245 | policy = jmespath.search( 246 | policy_filter.format(managed_policy["PolicyArn"]), account_iam 247 | ) 248 | if policy is None: 249 | continue 250 | for stmt in make_list(policy["Statement"]): 251 | privileges.add_stmt(stmt) 252 | 253 | # Get privileges from in-line policies attached to the group 254 | for inline_policy in group_iam["GroupPolicyList"]: 255 | policy = inline_policy["PolicyDocument"] 256 | for stmt in make_list(policy["Statement"]): 257 | privileges.add_stmt(stmt) 258 | 259 | # Get privileges from managed policies attached to the user 260 | for managed_policy in managed_policies: 261 | policy_filter = "Policies[?Arn == `{}`].PolicyVersionList[?IsDefaultVersion == true] | [0][0].Document" 262 | policy = jmespath.search( 263 | policy_filter.format(managed_policy["PolicyArn"]), account_iam 264 | ) 265 | if policy is None: 266 | continue 267 | for stmt in make_list(policy["Statement"]): 268 | privileges.add_stmt(stmt) 269 | 270 | # Get privileges from inline policies attached to the user 271 | for stmt in ( 272 | jmespath.search("UserPolicyList[].PolicyDocument.Statement", user_iam) or [] 273 | ): 274 | privileges.add_stmt(stmt) 275 | 276 | return privileges.determine_allowed() 277 | 278 | 279 | def get_role_allowed_actions(aws_api_list, role_iam, account_iam): 280 | """Return the privileges granted to a role by IAM""" 281 | privileges = Privileges(aws_api_list) 282 | 283 | # Get privileges from managed policies 284 | for managed_policy in role_iam["AttachedManagedPolicies"]: 285 | policy_filter = "Policies[?Arn == `{}`].PolicyVersionList[?IsDefaultVersion == true] | [0][0].Document" 286 | policy = jmespath.search( 287 | policy_filter.format(managed_policy["PolicyArn"]), account_iam 288 | ) 289 | if policy is None: 290 | continue 291 | for stmt in make_list(policy["Statement"]): 292 | privileges.add_stmt(stmt) 293 | 294 | # Get privileges from attached policies 295 | for policy in role_iam["RolePolicyList"]: 296 | for stmt in make_list(policy["PolicyDocument"]["Statement"]): 297 | privileges.add_stmt(stmt) 298 | 299 | return privileges.determine_allowed() 300 | 301 | 302 | def is_recorded_by_cloudtrail(action): 303 | """Given an action, return True if it would be logged by CloudTrail""" 304 | if action in cloudtrail_supported_actions: 305 | return True 306 | return False 307 | 308 | 309 | def colored_print(text, use_color=True, color_name="white"): 310 | """Print with or without color codes""" 311 | if use_color: 312 | print(color(text, fg=color_name)) 313 | else: 314 | print(text) 315 | 316 | 317 | def print_diff(performed_actions, allowed_actions, printfilter, use_color): 318 | """ 319 | For an actor, given the actions they performed, and the privileges they were granted, 320 | print what they were allowed to do but did not, and other differences. 321 | """ 322 | PERFORMED_AND_ALLOWED = 1 323 | PERFORMED_BUT_NOT_ALLOWED = 2 324 | ALLOWED_BUT_NOT_PERFORMED = 3 325 | ALLOWED_BUT_NOT_KNOWN_IF_PERFORMED = 4 326 | 327 | actions = {} 328 | 329 | for action in performed_actions: 330 | # Convert to IAM names 331 | for iam_name, cloudtrail_name in EVENT_RENAMES.items(): 332 | if action == cloudtrail_name: 333 | action = iam_name 334 | 335 | # See if this was allowed or not 336 | if action in allowed_actions: 337 | actions[action] = PERFORMED_AND_ALLOWED 338 | else: 339 | if action in NO_IAM: 340 | # Ignore actions in cloudtrail such as sts:getcalleridentity that are allowed 341 | # whether or not they are in IAM 342 | continue 343 | actions[action] = PERFORMED_BUT_NOT_ALLOWED 344 | 345 | # Find actions that were allowed, but there is no record of them being used 346 | for action in allowed_actions: 347 | if action not in actions: 348 | if not is_recorded_by_cloudtrail(action): 349 | actions[action] = ALLOWED_BUT_NOT_KNOWN_IF_PERFORMED 350 | else: 351 | actions[action] = ALLOWED_BUT_NOT_PERFORMED 352 | 353 | for action in sorted(actions.keys()): 354 | # Convert CloudTrail name back to IAM name 355 | display_name = action 356 | 357 | if not printfilter.get("show_benign", True): 358 | # Ignore actions that won't exfil or modify resources 359 | if ":list" in display_name or ":describe" in display_name: 360 | continue 361 | 362 | if actions[action] == PERFORMED_AND_ALLOWED: 363 | colored_print(" {}".format(display_name), use_color, "white") 364 | elif actions[action] == PERFORMED_BUT_NOT_ALLOWED: 365 | colored_print("+ {}".format(display_name), use_color, "green") 366 | elif actions[action] == ALLOWED_BUT_NOT_PERFORMED: 367 | if printfilter.get("show_used", True): 368 | # Ignore this as it wasn't used 369 | continue 370 | colored_print("- {}".format(display_name), use_color, "red") 371 | elif actions[action] == ALLOWED_BUT_NOT_KNOWN_IF_PERFORMED: 372 | if printfilter.get("show_used", True): 373 | # Ignore this as it wasn't used 374 | continue 375 | if printfilter.get("show_unknown", True): 376 | colored_print("? {}".format(display_name), use_color, "yellow") 377 | else: 378 | raise Exception("Unknown constant") 379 | 380 | 381 | def get_account(accounts, account_name): 382 | """ 383 | Gets the account struct from the config file, for the account name specified 384 | 385 | accounts: array of accounts from the config file 386 | account_name: name to search for (or ID) 387 | """ 388 | for account in accounts: 389 | if account_name == account["name"] or account_name == str(account["id"]): 390 | # Sanity check all values exist 391 | if "name" not in account or "id" not in account or "iam" not in account: 392 | exit( 393 | "ERROR: Account {} does not specify an id or iam in the config file".format( 394 | account_name 395 | ) 396 | ) 397 | 398 | # Sanity check account ID 399 | if not re.search("[0-9]{12}", str(account["id"])): 400 | exit("ERROR: {} is not a 12-digit account id".format(account["id"])) 401 | 402 | return account 403 | exit("ERROR: Account name {} not found in config".format(account_name)) 404 | return None 405 | 406 | 407 | def read_aws_api_list(aws_api_list_file="aws_api_list.txt"): 408 | """Read in the list of all known AWS API calls""" 409 | api_list_path = pkg_resources.resource_filename( 410 | __name__, "data/{}".format(aws_api_list_file) 411 | ) 412 | aws_api_list = {} 413 | with open(api_list_path) as f: 414 | lines = f.readlines() 415 | for line in lines: 416 | service, event = line.rstrip().split(":") 417 | aws_api_list[normalize_api_call(service, event)] = True 418 | return aws_api_list 419 | 420 | 421 | def run(args, config, start, end): 422 | """Perform the requested command""" 423 | use_color = args.use_color 424 | 425 | account = get_account(config["accounts"], args.account) 426 | 427 | if "elasticsearch" in config: 428 | try: 429 | from cloudtracker.datasources.es import ElasticSearch 430 | except ImportError: 431 | exit( 432 | "Elasticsearch support not installed. Install with support via " 433 | "'pip install git+https://github.com/duo-labs/cloudtracker.git#egg=cloudtracker[es1]' for " 434 | "elasticsearch 1 support, or " 435 | "'pip install git+https://github.com/duo-labs/cloudtracker.git#egg=cloudtracker[es6]' for " 436 | "elasticsearch 6 support" 437 | ) 438 | datasource = ElasticSearch(config["elasticsearch"], start, end) 439 | else: 440 | logging.debug("Using Athena") 441 | from cloudtracker.datasources.athena import Athena 442 | 443 | datasource = Athena(config["athena"], account, start, end, args) 444 | 445 | # Read AWS actions 446 | aws_api_list = read_aws_api_list() 447 | 448 | # Read cloudtrail_supported_events 449 | global cloudtrail_supported_actions 450 | ct_actions_path = pkg_resources.resource_filename( 451 | __name__, "data/{}".format("cloudtrail_supported_actions.txt") 452 | ) 453 | cloudtrail_supported_actions = {} 454 | with open(ct_actions_path) as f: 455 | lines = f.readlines() 456 | for line in lines: 457 | (service, event) = line.rstrip().split(":") 458 | cloudtrail_supported_actions[normalize_api_call(service, event)] = True 459 | 460 | account_iam = get_account_iam(account) 461 | 462 | if args.list: 463 | actor_type = args.list 464 | 465 | if actor_type == "users": 466 | allowed_actors = get_allowed_users(account_iam) 467 | performed_actors = datasource.get_performed_users() 468 | elif actor_type == "roles": 469 | allowed_actors = get_allowed_roles(account_iam) 470 | performed_actors = datasource.get_performed_roles() 471 | else: 472 | exit("ERROR: --list argument must be one of 'users' or 'roles'") 473 | 474 | print_actor_diff(performed_actors, allowed_actors, use_color) 475 | 476 | else: 477 | if args.destaccount: 478 | destination_account = get_account(config["accounts"], args.destaccount) 479 | else: 480 | destination_account = account 481 | 482 | destination_iam = get_account_iam(destination_account) 483 | 484 | search_query = datasource.get_search_query() 485 | 486 | if args.user: 487 | username = args.user 488 | 489 | user_iam = get_user_iam(username, account_iam) 490 | print( 491 | "Getting info on {}, user created {}".format( 492 | args.user, user_iam["CreateDate"] 493 | ) 494 | ) 495 | 496 | if args.destrole: 497 | dest_role_iam = get_role_iam(args.destrole, destination_iam) 498 | print("Getting info for AssumeRole into {}".format(args.destrole)) 499 | 500 | allowed_actions = get_role_allowed_actions( 501 | aws_api_list, dest_role_iam, destination_iam 502 | ) 503 | performed_actions = datasource.get_performed_event_names_by_user_in_role( 504 | search_query, user_iam, dest_role_iam 505 | ) 506 | else: 507 | allowed_actions = get_user_allowed_actions( 508 | aws_api_list, user_iam, account_iam 509 | ) 510 | performed_actions = datasource.get_performed_event_names_by_user( 511 | search_query, user_iam 512 | ) 513 | elif args.role: 514 | rolename = args.role 515 | role_iam = get_role_iam(rolename, account_iam) 516 | print("Getting info for role {}".format(rolename)) 517 | 518 | if args.destrole: 519 | dest_role_iam = get_role_iam(args.destrole, destination_iam) 520 | print("Getting info for AssumeRole into {}".format(args.destrole)) 521 | 522 | allowed_actions = get_role_allowed_actions( 523 | aws_api_list, dest_role_iam, destination_iam 524 | ) 525 | performed_actions = datasource.get_performed_event_names_by_role_in_role( 526 | search_query, role_iam, dest_role_iam 527 | ) 528 | else: 529 | allowed_actions = get_role_allowed_actions( 530 | aws_api_list, role_iam, account_iam 531 | ) 532 | performed_actions = datasource.get_performed_event_names_by_role( 533 | search_query, role_iam 534 | ) 535 | else: 536 | exit("ERROR: Must specify a user or a role") 537 | 538 | printfilter = {} 539 | printfilter["show_unknown"] = args.show_unknown 540 | printfilter["show_benign"] = args.show_benign 541 | printfilter["show_used"] = args.show_used 542 | 543 | print_diff(performed_actions, allowed_actions, printfilter, use_color) 544 | -------------------------------------------------------------------------------- /cloudtracker/cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Copyright 2018 Duo Security 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 6 | following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following 9 | disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the 12 | following disclaimer in the documentation and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote 15 | products derived from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 18 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 23 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | --------------------------------------------------------------------------- 25 | """ 26 | 27 | import argparse 28 | import datetime 29 | 30 | import yaml 31 | 32 | from . import run 33 | 34 | 35 | def main(): 36 | now = datetime.datetime.now() 37 | parser = argparse.ArgumentParser() 38 | 39 | # Add mutually exclusive arguments for --list, --user, and --role 40 | action_group = parser.add_mutually_exclusive_group(required=True) 41 | action_group.add_argument( 42 | "--list", 43 | help="List 'users' or 'roles' that have been active", 44 | choices=["users", "roles"], 45 | ) 46 | action_group.add_argument("--user", help="User to investigate", type=str) 47 | action_group.add_argument("--role", help="Role to investigate", type=str) 48 | 49 | parser.add_argument( 50 | "--config", 51 | help="Config file name (default: config.yaml)", 52 | required=False, 53 | default="config.yaml", 54 | type=argparse.FileType("r"), 55 | ) 56 | parser.add_argument( 57 | "--iam", 58 | dest="iam_file", 59 | help="IAM output from running `aws iam get-account-authorization-details`", 60 | required=False, 61 | default="./data/get-account-authorization-details.json", 62 | type=str, 63 | ) 64 | parser.add_argument("--account", help="Account name", required=True, type=str) 65 | parser.add_argument( 66 | "--start", 67 | help="Start of date range (ex. 2018-01-21). Defaults to one year ago.", 68 | default=(now - datetime.timedelta(days=365)).date().isoformat(), 69 | required=False, 70 | type=str, 71 | ) 72 | parser.add_argument( 73 | "--end", 74 | help="End of date range (ex. 2018-01-21). Defaults to today.", 75 | default=now.date().isoformat(), 76 | required=False, 77 | type=str, 78 | ) 79 | parser.add_argument( 80 | "--destrole", help="Role assumed into", required=False, default=None, type=str 81 | ) 82 | parser.add_argument( 83 | "--destaccount", 84 | help="Account assumed into (if different)", 85 | required=False, 86 | default=None, 87 | type=str, 88 | ) 89 | parser.add_argument( 90 | "--show-used", 91 | dest="show_used", 92 | help="Only show privileges that were used", 93 | required=False, 94 | action="store_true", 95 | ) 96 | parser.add_argument( 97 | "--ignore-benign", 98 | dest="show_benign", 99 | help="Don't show actions that aren't likely to be sensitive, " 100 | "such as ones that won't exfil data or modify resources", 101 | required=False, 102 | action="store_false", 103 | ) 104 | parser.add_argument( 105 | "--ignore-unknown", 106 | dest="show_unknown", 107 | help="Don't show granted privileges that aren't recorded in CloudTrail, " 108 | "as we don't know if they are used", 109 | required=False, 110 | action="store_false", 111 | ) 112 | parser.add_argument( 113 | "--no-color", 114 | dest="use_color", 115 | help="Don't use color codes in output", 116 | required=False, 117 | action="store_false", 118 | ) 119 | parser.add_argument( 120 | "--skip-setup", 121 | dest="skip_setup", 122 | help="For Athena, don't create or test for the tables", 123 | required=False, 124 | action="store_true", 125 | default=False, 126 | ) 127 | 128 | args = parser.parse_args() 129 | 130 | # Read config 131 | try: 132 | config = yaml.load(args.config) 133 | except yaml.YAMLError as e: 134 | raise argparse.ArgumentError( 135 | None, 136 | "ERROR: Could not load yaml from config file {}\n{}".format( 137 | args.config.name, e 138 | ), 139 | ) 140 | 141 | run(args, config, args.start, args.end) 142 | -------------------------------------------------------------------------------- /cloudtracker/datasources/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duo-labs/cloudtracker/822ef553266aca2b1d54fc44e09c230f6df77a8a/cloudtracker/datasources/__init__.py -------------------------------------------------------------------------------- /cloudtracker/datasources/athena.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2018 Summit Route 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 5 | following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following 8 | disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the 11 | following disclaimer in the documentation and/or other materials provided with the distribution. 12 | 13 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote 14 | products derived from this software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 17 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 21 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 22 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | --------------------------------------------------------------------------- 24 | """ 25 | 26 | import logging 27 | import boto3 28 | import time 29 | import json 30 | import datetime 31 | from dateutil.relativedelta import relativedelta 32 | 33 | from cloudtracker import normalize_api_call 34 | 35 | # Much thanks to Alex Smolen (https://twitter.com/alsmola) 36 | # for his post "Partitioning CloudTrail Logs in Athena" 37 | # https://medium.com/@alsmola/partitioning-cloudtrail-logs-in-athena-29add93ee070 38 | 39 | # TODO Delete result objects from S3 40 | # TODO Add ability to skip setup 41 | # TODO Add teardown to remove all the athena tables, partitions, and views 42 | 43 | 44 | NUM_MONTHS_FOR_PARTITIONS = 12 45 | 46 | 47 | class Athena(object): 48 | athena = None 49 | s3 = None 50 | database = "cloudtracker" 51 | output_bucket = "aws-athena-query-results-ACCOUNT_ID-REGION" 52 | search_filter = "" 53 | table_name = "" 54 | workgroup = 'primary' 55 | 56 | def query_athena( 57 | self, query, context={"Database": database}, do_not_wait=False, skip_header=True 58 | ): 59 | logging.debug("Making query {}".format(query)) 60 | 61 | # Make query request dependent on whether the context is None or not 62 | if context is None: 63 | response = self.athena.start_query_execution( 64 | QueryString=query, 65 | ResultConfiguration={"OutputLocation": self.output_bucket}, 66 | WorkGroup=self.workgroup 67 | ) 68 | else: 69 | response = self.athena.start_query_execution( 70 | QueryString=query, 71 | QueryExecutionContext=context, 72 | ResultConfiguration={"OutputLocation": self.output_bucket}, 73 | WorkGroup=self.workgroup 74 | ) 75 | 76 | if do_not_wait: 77 | return response["QueryExecutionId"] 78 | 79 | self.wait_for_query_to_complete(response["QueryExecutionId"]) 80 | 81 | # Paginate results and combine them 82 | rows = [] 83 | paginator = self.athena.get_paginator("get_query_results") 84 | response_iterator = paginator.paginate( 85 | QueryExecutionId=response["QueryExecutionId"] 86 | ) 87 | row_count = 0 88 | for response in response_iterator: 89 | for row in response["ResultSet"]["Rows"]: 90 | row_count += 1 91 | if row_count == 1: 92 | if skip_header: 93 | # Skip header 94 | continue 95 | rows.append(self.extract_response_values(row)) 96 | return rows 97 | 98 | def extract_response_values(self, row): 99 | result = [] 100 | for column in row["Data"]: 101 | result.append(column.get("VarCharValue", "")) 102 | return result 103 | 104 | def wait_for_query_to_complete(self, queryExecutionId): 105 | """ 106 | Returns when the query completes successfully, or raises an exception if it fails or is canceled. 107 | Waits until the query finishes running. 108 | """ 109 | 110 | while True: 111 | response = self.athena.get_query_execution( 112 | QueryExecutionId=queryExecutionId 113 | ) 114 | state = response["QueryExecution"]["Status"]["State"] 115 | if state == "SUCCEEDED": 116 | return True 117 | if state == "FAILED" or state == "CANCELLED": 118 | raise Exception( 119 | "Query entered state {state} with reason {reason}".format( 120 | state=state, 121 | reason=response["QueryExecution"]["Status"][ 122 | "StateChangeReason" 123 | ], 124 | ) 125 | ) 126 | logging.debug( 127 | "Sleeping 1 second while query {} completes".format(queryExecutionId) 128 | ) 129 | time.sleep(1) 130 | 131 | def wait_for_query_batch_to_complete(self, queryExecutionIds): 132 | """ 133 | Returns when the query completes successfully, or raises an exception if it fails or is canceled. 134 | Waits until the query finishes running. 135 | """ 136 | 137 | while len(queryExecutionIds) > 0: 138 | response = self.athena.batch_get_query_execution( 139 | QueryExecutionIds=list(queryExecutionIds) 140 | ) 141 | for query_execution in response["QueryExecutions"]: 142 | state = query_execution["Status"]["State"] 143 | if state == "SUCCEEDED": 144 | queryExecutionIds.remove(query_execution["QueryExecutionId"]) 145 | if state == "FAILED" or state == "CANCELLED": 146 | raise Exception( 147 | "Query entered state {state} with reason {reason}".format( 148 | state=state, 149 | reason=response["QueryExecution"]["Status"][ 150 | "StateChangeReason" 151 | ], 152 | ) 153 | ) 154 | 155 | if len(queryExecutionIds) == 0: 156 | return 157 | logging.debug( 158 | "Sleeping 1 second while {} queries complete".format( 159 | len(queryExecutionIds) 160 | ) 161 | ) 162 | time.sleep(1) 163 | 164 | def __init__(self, config, account, start, end, args): 165 | # Mute boto except errors 166 | logging.getLogger("botocore").setLevel(logging.WARN) 167 | logging.info( 168 | "Source of CloudTrail logs: s3://{bucket}/{path}".format( 169 | bucket=config["s3_bucket"], path=config["path"] 170 | ) 171 | ) 172 | 173 | # Check start date is not older than a year, as we only create partitions for that far back 174 | if ( 175 | datetime.datetime.now() - datetime.datetime.strptime(start, "%Y-%m-%d") 176 | ).days > 365: 177 | raise Exception( 178 | "Start date is over a year old. CloudTracker does not create or use partitions over a year old." 179 | ) 180 | 181 | # 182 | # Create date filtering 183 | # 184 | month_restrictions = set() 185 | start = start.split("-") 186 | end = end.split("-") 187 | 188 | if start[0] == end[0]: 189 | for month in range(int(start[1]), int(end[1]) + 1): 190 | month_restrictions.add( 191 | "(year = '{:0>2}' and month = '{:0>2}')".format(start[0], month) 192 | ) 193 | else: 194 | # Add restrictions for months in start year 195 | for month in range(int(start[1]), 12 + 1): 196 | month_restrictions.add( 197 | "(year = '{:0>2}' and month = '{:0>2}')".format(start[0], month) 198 | ) 199 | # Add restrictions for months in middle years 200 | for year in range(int(start[0]), int(end[0])): 201 | for month in (1, 12 + 1): 202 | month_restrictions.add( 203 | "(year = '{:0>2}' and month = '{:0>2}')".format(year, month) 204 | ) 205 | # Add restrictions for months in final year 206 | for month in range(1, int(end[1]) + 1): 207 | month_restrictions.add( 208 | "(year = '{:0>2}' and month = '{:0>2}')".format(end[0], month) 209 | ) 210 | 211 | # Combine date filters and add error filter 212 | self.search_filter = ( 213 | "((" + " or ".join(month_restrictions) + ") and errorcode IS NULL)" 214 | ) 215 | 216 | self.table_name = "cloudtrail_logs_{}".format(account["id"]) 217 | 218 | # 219 | # Display the AWS identity (doubles as a check that boto creds are setup) 220 | # 221 | sts = boto3.client("sts") 222 | identity = sts.get_caller_identity() 223 | logging.info("Using AWS identity: {}".format(identity["Arn"])) 224 | current_account_id = identity["Account"] 225 | region = boto3.session.Session().region_name 226 | 227 | if "output_s3_bucket" in config: 228 | self.output_bucket = config["output_s3_bucket"] 229 | else: 230 | self.output_bucket = "s3://aws-athena-query-results-{}-{}".format( 231 | current_account_id, region 232 | ) 233 | logging.info("Using output bucket: {}".format(self.output_bucket)) 234 | 235 | if "workgroup" in config: 236 | self.workgroup = config["workgroup"] 237 | logging.info("Using workgroup: {}".format(self.workgroup)) 238 | 239 | if not config.get('org_id'): 240 | cloudtrail_log_path = "s3://{bucket}/{path}/AWSLogs/{account_id}/CloudTrail".format( 241 | bucket=config["s3_bucket"], path=config["path"], account_id=account["id"] 242 | ) 243 | else: 244 | cloudtrail_log_path = "s3://{bucket}/{path}/AWSLogs/{org_id}/{account_id}/CloudTrail".format( 245 | bucket=config["s3_bucket"], path=config["path"], org_id=config["org_id"], account_id=account["id"] 246 | ) 247 | 248 | logging.info("Account cloudtrail log path: {}".format(cloudtrail_log_path)) 249 | 250 | # Open connections to needed AWS services 251 | self.athena = boto3.client("athena") 252 | self.s3 = boto3.client("s3") 253 | 254 | if args.skip_setup: 255 | logging.info("Skipping initial table creation") 256 | return 257 | 258 | # Check we can access the S3 bucket 259 | resp = self.s3.list_objects_v2( 260 | Bucket=config["s3_bucket"], Prefix=config["path"], MaxKeys=1 261 | ) 262 | if "Contents" not in resp or len(resp["Contents"]) == 0: 263 | exit( 264 | "ERROR: S3 bucket has no contents. Ensure you have logs at s3://{bucket}/{path}".format( 265 | bucket=config["s3_bucket"], path=config["path"] 266 | ) 267 | ) 268 | 269 | # Ensure our database exists 270 | self.query_athena( 271 | "CREATE DATABASE IF NOT EXISTS {db} {comment}".format( 272 | db=self.database, comment="COMMENT 'Created by CloudTracker'" 273 | ), 274 | context=None, 275 | ) 276 | 277 | # 278 | # Set up table 279 | # 280 | query = """CREATE EXTERNAL TABLE IF NOT EXISTS `{table_name}` ( 281 | `eventversion` string COMMENT 'from deserializer', 282 | `useridentity` struct,sessionissuer:struct>> COMMENT 'from deserializer', 283 | `eventtime` string COMMENT 'from deserializer', 284 | `eventsource` string COMMENT 'from deserializer', 285 | `eventname` string COMMENT 'from deserializer', 286 | `awsregion` string COMMENT 'from deserializer', 287 | `sourceipaddress` string COMMENT 'from deserializer', 288 | `useragent` string COMMENT 'from deserializer', 289 | `errorcode` string COMMENT 'from deserializer', 290 | `errormessage` string COMMENT 'from deserializer', 291 | `requestparameters` string COMMENT 'from deserializer', 292 | `responseelements` string COMMENT 'from deserializer', 293 | `additionaleventdata` string COMMENT 'from deserializer', 294 | `requestid` string COMMENT 'from deserializer', 295 | `eventid` string COMMENT 'from deserializer', 296 | `resources` array> COMMENT 'from deserializer', 297 | `eventtype` string COMMENT 'from deserializer', 298 | `apiversion` string COMMENT 'from deserializer', 299 | `readonly` string COMMENT 'from deserializer', 300 | `recipientaccountid` string COMMENT 'from deserializer', 301 | `serviceeventdetails` string COMMENT 'from deserializer', 302 | `sharedeventid` string COMMENT 'from deserializer', 303 | `vpcendpointid` string COMMENT 'from deserializer') 304 | PARTITIONED BY (region string, year string, month string) 305 | ROW FORMAT SERDE 306 | 'com.amazon.emr.hive.serde.CloudTrailSerde' 307 | STORED AS INPUTFORMAT 308 | 'com.amazon.emr.cloudtrail.CloudTrailInputFormat' 309 | OUTPUTFORMAT 310 | 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' 311 | LOCATION '{cloudtrail_log_path}'""".format( 312 | table_name=self.table_name, cloudtrail_log_path=cloudtrail_log_path 313 | ) 314 | self.query_athena(query) 315 | 316 | # 317 | # Create partitions 318 | # 319 | 320 | logging.info( 321 | "Checking if all partitions for the past {} months exist".format( 322 | NUM_MONTHS_FOR_PARTITIONS 323 | ) 324 | ) 325 | 326 | # Get list of current partitions 327 | query = "SHOW PARTITIONS {table_name}".format(table_name=self.table_name) 328 | partition_list = self.query_athena(query, skip_header=False) 329 | 330 | partition_set = set() 331 | for partition in partition_list: 332 | partition_set.add(partition[0]) 333 | 334 | # Get region list. Using ec2 here just because it exists in all regions. 335 | regions = boto3.session.Session().get_available_regions("ec2") 336 | 337 | queries_to_make = set() 338 | 339 | # Iterate over every month for the past year and build queries to run to create partitions 340 | for num_months_ago in range(0, NUM_MONTHS_FOR_PARTITIONS): 341 | date_of_interest = datetime.datetime.now() - relativedelta( 342 | months=num_months_ago 343 | ) 344 | year = date_of_interest.year 345 | month = "{:0>2}".format(date_of_interest.month) 346 | 347 | query = "" 348 | 349 | for region in regions: 350 | if ( 351 | "region={region}/year={year}/month={month}".format( 352 | region=region, year=year, month=month 353 | ) 354 | in partition_set 355 | ): 356 | continue 357 | 358 | query += "PARTITION (region='{region}',year='{year}',month='{month}') location '{cloudtrail_log_path}/{region}/{year}/{month}/'\n".format( 359 | region=region, 360 | year=year, 361 | month=month, 362 | cloudtrail_log_path=cloudtrail_log_path, 363 | ) 364 | if query != "": 365 | queries_to_make.add( 366 | "ALTER TABLE {table_name} ADD ".format(table_name=self.table_name) 367 | + query 368 | ) 369 | 370 | # Run the queries 371 | query_count = len(queries_to_make) 372 | for query in queries_to_make: 373 | logging.info("Partition groups remaining to create: {}".format(query_count)) 374 | self.query_athena(query) 375 | query_count -= 1 376 | 377 | def get_performed_users(self): 378 | """ 379 | Returns the users that performed actions within the search filters 380 | """ 381 | query = "select distinct userIdentity.userName from {table_name} where {search_filter}".format( 382 | table_name=self.table_name, search_filter=self.search_filter 383 | ) 384 | response = self.query_athena(query) 385 | 386 | user_names = {} 387 | for row in response: 388 | user_name = row[0] 389 | if user_name == "HIDDEN_DUE_TO_SECURITY_REASONS": 390 | # This happens when a user logs in with the wrong username 391 | continue 392 | user_names[user_name] = True 393 | return user_names 394 | 395 | def get_performed_roles(self): 396 | """ 397 | Returns the roles that performed actions within the search filters 398 | """ 399 | query = "select distinct userIdentity.sessionContext.sessionIssuer.userName from {table_name} where {search_filter}".format( 400 | table_name=self.table_name, search_filter=self.search_filter 401 | ) 402 | response = self.query_athena(query) 403 | 404 | role_names = {} 405 | for row in response: 406 | role = row[0] 407 | role_names[role] = True 408 | return role_names 409 | 410 | def get_search_query(self): 411 | # Athena doesn't use this call, but needs to support it being called 412 | return None 413 | 414 | def get_events_from_search(self, searchresults): 415 | """ 416 | Given the results of a query for events, return these in a more usable fashion 417 | """ 418 | event_names = {} 419 | 420 | for event in searchresults: 421 | event = event[0] 422 | # event is now a string like "{field0=s3.amazonaws.com, field1=GetBucketAcl}" 423 | # I parse out the field manually 424 | # TODO Find a smarter way to parse this data 425 | 426 | # Remove the '{' and '}' 427 | event = event[1 : len(event) - 1] 428 | 429 | # Split into 'field0=s3.amazonaws.com' and 'field1=GetBucketAcl' 430 | event = event.split(", ") 431 | # Get the eventsource 's3.amazonaws.com' 432 | service = event[0].split("=")[1] 433 | # Get the service 's3' 434 | service = service.split(".")[0] 435 | 436 | # Get the eventname 'GetBucketAcl' 437 | eventname = event[1].split("=")[1] 438 | 439 | event_names[normalize_api_call(service, eventname)] = True 440 | 441 | return event_names 442 | 443 | def get_performed_event_names_by_user(self, _, user_iam): 444 | """For a user, return all performed events""" 445 | 446 | query = "select distinct (eventsource, eventname) from {table_name} where (userIdentity.arn = '{identity}') and {search_filter}".format( 447 | table_name=self.table_name, 448 | identity=user_iam["Arn"], 449 | search_filter=self.search_filter, 450 | ) 451 | response = self.query_athena(query) 452 | 453 | return self.get_events_from_search(response) 454 | 455 | def get_performed_event_names_by_role(self, _, role_iam): 456 | """For a role, return all performed events""" 457 | 458 | query = "select distinct (eventsource, eventname) from {table_name} where (userIdentity.sessionContext.sessionIssuer.arn = '{identity}') and {search_filter}".format( 459 | table_name=self.table_name, 460 | identity=role_iam["Arn"], 461 | search_filter=self.search_filter, 462 | ) 463 | response = self.query_athena(query) 464 | 465 | return self.get_events_from_search(response) 466 | 467 | def get_performed_event_names_by_user_in_role( 468 | self, searchquery, user_iam, role_iam 469 | ): 470 | """For a user that has assumed into another role, return all performed events""" 471 | raise Exception("Not implemented") 472 | sessionquery = ( 473 | searchquery.query(self.get_query_match("eventName", "AssumeRole")) 474 | .query(self.get_query_match("userIdentity.arn", user_iam["Arn"])) 475 | .query(self.get_query_match("requestParameters.roleArn", role_iam["Arn"])) 476 | ) 477 | 478 | event_names = {} 479 | for roleAssumption in sessionquery.scan(): 480 | sessionKey = roleAssumption.responseElements.credentials.accessKeyId 481 | # I assume the session key is unique enough to use for identifying role assumptions 482 | # TODO: I should also be using sharedEventID as explained in: 483 | # https://aws.amazon.com/blogs/security/aws-cloudtrail-now-tracks-cross-account-activity-to-its-origin/ 484 | # I could also use the timings of these events. 485 | innerquery = searchquery.query( 486 | self.get_query_match("userIdentity.accessKeyId", sessionKey) 487 | ).query( 488 | self.get_query_match( 489 | "userIdentity.sessionContext.sessionIssuer.arn", role_iam["Arn"] 490 | ) 491 | ) 492 | 493 | event_names.update(self.get_events_from_search(innerquery)) 494 | 495 | return event_names 496 | 497 | def get_performed_event_names_by_role_in_role( 498 | self, searchquery, role_iam, dest_role_iam 499 | ): 500 | """For a role that has assumed into another role, return all performed events""" 501 | raise Exception("Not implemented") 502 | sessionquery = ( 503 | searchquery.query(self.get_query_match("eventName", "AssumeRole")) 504 | .query( 505 | self.get_query_match( 506 | "userIdentity.sessionContext.sessionIssuer.arn", role_iam["Arn"] 507 | ) 508 | ) 509 | .query( 510 | self.get_query_match("requestParameters.roleArn", dest_role_iam["Arn"]) 511 | ) 512 | ) 513 | 514 | # TODO I should get a count of the number of role assumptions, since this can be millions 515 | 516 | event_names = {} 517 | count = 0 518 | for roleAssumption in sessionquery.scan(): 519 | count += 1 520 | if count % 1000 == 0: 521 | # This is just info level information, for cases where many role assumptions have happened 522 | # I should advise the user to just look at the final role, especially for cases where the same role 523 | # is continuously assuming into another role and that is the only thing assuming into it. 524 | print("{} role assumptions scanned so far...".format(count)) 525 | sessionKey = roleAssumption.responseElements.credentials.accessKeyId 526 | innerquery = searchquery.query( 527 | self.get_query_match("userIdentity.accessKeyId", sessionKey) 528 | ).query( 529 | self.get_query_match( 530 | "userIdentity.sessionContext.sessionIssuer.arn", 531 | dest_role_iam["Arn"], 532 | ) 533 | ) 534 | 535 | event_names.update(self.get_events_from_search(innerquery)) 536 | 537 | return event_names 538 | -------------------------------------------------------------------------------- /cloudtracker/datasources/es.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2018 Duo Security 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 5 | following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following 8 | disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the 11 | following disclaimer in the documentation and/or other materials provided with the distribution. 12 | 13 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote 14 | products derived from this software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 17 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 21 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 22 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | --------------------------------------------------------------------------- 24 | """ 25 | 26 | from elasticsearch import Elasticsearch 27 | from elasticsearch_dsl import Search, Q 28 | from cloudtracker import normalize_api_call 29 | 30 | 31 | class ElasticSearch(object): 32 | es = None 33 | index = "cloudtrail" 34 | key_prefix = "" 35 | 36 | # Create search filters 37 | searchfilter = None 38 | 39 | def __init__(self, config, start, end): 40 | # Open connection to ElasticSearch 41 | self.es = Elasticsearch([config], timeout=900) 42 | self.searchfilter = {} 43 | self.index = config.get("index", "cloudtrail") 44 | self.key_prefix = config.get("key_prefix", "") 45 | if self.key_prefix != "": 46 | self.key_prefix += "." 47 | self.timestamp_field = config.get("timestamp_field", "eventTime") 48 | 49 | # Used to make elasticsearch query language semantics dynamically based on version 50 | self.es_version = int(self.es.info()["version"]["number"].split(".")[0]) 51 | 52 | # Filter errors 53 | # https://www.elastic.co/guide/en/elasticsearch/reference/2.0/breaking_20_query_dsl_changes.html 54 | # http://www.dlxedu.com/askdetail/3/0620e1124992fb281da93c7efe53b97f.html 55 | if self.es_version < 2: 56 | error_filter = {"exists": {"field": self.get_field_name("errorCode")}} 57 | self.searchfilter["filter_errors"] = ~Q("filtered", filter=error_filter) 58 | else: 59 | self.searchfilter["filter_errors"] = ~Q( 60 | "exists", field=self.get_field_name("errorCode") 61 | ) 62 | 63 | # Filter dates 64 | if start: 65 | self.searchfilter["start_date_filter"] = Q( 66 | "range", **{self.timestamp_field: {"gte": start}} 67 | ) 68 | if end: 69 | self.searchfilter["end_date_filter"] = Q( 70 | "range", **{self.timestamp_field: {"lte": end}} 71 | ) 72 | 73 | def get_field_name(self, field): 74 | return self.key_prefix + field + self.get_field_suffix() 75 | 76 | def get_field_suffix(self): 77 | # The .keyword and .raw suffix only apply to indices whose names match logstash-* 78 | # https://discuss.elastic.co/t/no-raw-field/49342/4 79 | # However, based on our suggested mapping, our fields should have a .keyword suffix 80 | 81 | # https://www.elastic.co/guide/en/logstash/5.0/breaking-changes.html 82 | if self.es_version < 5: 83 | return ".raw" 84 | else: 85 | return ".keyword" 86 | 87 | def get_query_match(self, field, value): 88 | field = self.get_field_name(field) 89 | return {"match": {field: value}} 90 | 91 | def get_performed_users(self): 92 | """ 93 | Returns the users that performed actions within the search filters 94 | """ 95 | search = Search(using=self.es, index=self.index) 96 | for query in self.searchfilter.values(): 97 | search = search.query(query) 98 | 99 | search.aggs.bucket( 100 | "user_names", 101 | "terms", 102 | field=self.get_field_name("userIdentity.userName"), 103 | size=5000, 104 | ) 105 | response = search.execute() 106 | 107 | user_names = {} 108 | for user in response.aggregations.user_names.buckets: 109 | if user.key == "HIDDEN_DUE_TO_SECURITY_REASONS": 110 | # This happens when a user logs in with the wrong username 111 | continue 112 | user_names[user.key] = True 113 | return user_names 114 | 115 | def get_performed_roles(self): 116 | """ 117 | Returns the roles that performed actions within the search filters 118 | """ 119 | search = Search(using=self.es, index=self.index) 120 | for query in self.searchfilter.values(): 121 | search = search.query(query) 122 | 123 | userName_field = self.get_field_name( 124 | "userIdentity.sessionContext.sessionIssuer.userName" 125 | ) 126 | search.aggs.bucket("role_names", "terms", field=userName_field, size=5000) 127 | response = search.execute() 128 | 129 | role_names = {} 130 | for role in response.aggregations.role_names.buckets: 131 | role_names[role.key] = True 132 | return role_names 133 | 134 | def get_search_query(self): 135 | """ 136 | Opens a connection to ElasticSearch and applies the initial filters 137 | """ 138 | search = Search(using=self.es, index=self.index) 139 | for query in self.searchfilter.values(): 140 | search = search.query(query) 141 | 142 | return search 143 | 144 | def get_events_from_search(self, searchquery): 145 | """ 146 | Given a started elasticsearch query, apply the remaining search filters, and 147 | return the API calls that exist for this query. 148 | s: search query 149 | """ 150 | searchquery.aggs.bucket( 151 | "event_names", "terms", field=self.get_field_name("eventName"), size=5000 152 | ).bucket( 153 | "service_names", 154 | "terms", 155 | field=self.get_field_name("eventSource"), 156 | size=5000, 157 | ) 158 | response = searchquery.execute() 159 | 160 | event_names = {} 161 | 162 | for event in response.aggregations.event_names.buckets: 163 | service = event.service_names.buckets[0].key 164 | service = service.split(".")[0] 165 | 166 | event_names[normalize_api_call(service, event.key)] = True 167 | 168 | return event_names 169 | 170 | def get_performed_event_names_by_user(self, searchquery, user_iam): 171 | """For a user, return all performed events""" 172 | searchquery = searchquery.query( 173 | self.get_query_match("userIdentity.arn", user_iam["Arn"]) 174 | ) 175 | return self.get_events_from_search(searchquery) 176 | 177 | def get_performed_event_names_by_role(self, searchquery, role_iam): 178 | """For a role, return all performed events""" 179 | field = "userIdentity.sessionContext.sessionIssuer.arn" 180 | searchquery = searchquery.query(self.get_query_match(field, role_iam["Arn"])) 181 | return self.get_events_from_search(searchquery) 182 | 183 | def get_performed_event_names_by_user_in_role( 184 | self, searchquery, user_iam, role_iam 185 | ): 186 | """For a user that has assumed into another role, return all performed events""" 187 | sessionquery = ( 188 | searchquery.query(self.get_query_match("eventName", "AssumeRole")) 189 | .query(self.get_query_match("userIdentity.arn", user_iam["Arn"])) 190 | .query(self.get_query_match("requestParameters.roleArn", role_iam["Arn"])) 191 | ) 192 | 193 | event_names = {} 194 | for roleAssumption in sessionquery.scan(): 195 | sessionKey = roleAssumption.responseElements.credentials.accessKeyId 196 | # I assume the session key is unique enough to use for identifying role assumptions 197 | # TODO: I should also be using sharedEventID as explained in: 198 | # https://aws.amazon.com/blogs/security/aws-cloudtrail-now-tracks-cross-account-activity-to-its-origin/ 199 | # I could also use the timings of these events. 200 | innerquery = searchquery.query( 201 | self.get_query_match("userIdentity.accessKeyId", sessionKey) 202 | ).query( 203 | self.get_query_match( 204 | "userIdentity.sessionContext.sessionIssuer.arn", role_iam["Arn"] 205 | ) 206 | ) 207 | 208 | event_names.update(self.get_events_from_search(innerquery)) 209 | 210 | return event_names 211 | 212 | def get_performed_event_names_by_role_in_role( 213 | self, searchquery, role_iam, dest_role_iam 214 | ): 215 | """For a role that has assumed into another role, return all performed events""" 216 | sessionquery = ( 217 | searchquery.query(self.get_query_match("eventName", "AssumeRole")) 218 | .query( 219 | self.get_query_match( 220 | "userIdentity.sessionContext.sessionIssuer.arn", role_iam["Arn"] 221 | ) 222 | ) 223 | .query( 224 | self.get_query_match("requestParameters.roleArn", dest_role_iam["Arn"]) 225 | ) 226 | ) 227 | 228 | # TODO I should get a count of the number of role assumptions, since this can be millions 229 | 230 | event_names = {} 231 | count = 0 232 | for roleAssumption in sessionquery.scan(): 233 | count += 1 234 | if count % 1000 == 0: 235 | # This is just info level information, for cases where many role assumptions have happened 236 | # I should advise the user to just look at the final role, especially for cases where the same role 237 | # is continuously assuming into another role and that is the only thing assuming into it. 238 | print("{} role assumptions scanned so far...".format(count)) 239 | sessionKey = roleAssumption.responseElements.credentials.accessKeyId 240 | innerquery = searchquery.query( 241 | self.get_query_match("userIdentity.accessKeyId", sessionKey) 242 | ).query( 243 | self.get_query_match( 244 | "userIdentity.sessionContext.sessionIssuer.arn", 245 | dest_role_iam["Arn"], 246 | ) 247 | ) 248 | 249 | event_names.update(self.get_events_from_search(innerquery)) 250 | 251 | return event_names 252 | -------------------------------------------------------------------------------- /config.yaml.demo: -------------------------------------------------------------------------------- 1 | # Config 2 | elasticsearch: 3 | host: localhost 4 | port: 9200 5 | index: "cloudtrail" 6 | key_prefix: "" 7 | timestamp_field: "eventTime" 8 | accounts: 9 | - name: demo 10 | id: 111111111111 11 | iam: account-data/demo_iam.json 12 | - name: demo2 13 | id: 222222222222 14 | iam: account-data/demo2_iam.json 15 | -------------------------------------------------------------------------------- /docs/elasticsearch.md: -------------------------------------------------------------------------------- 1 | This document describes how to use CloudTracker with ElasticSearch. 2 | 3 | Requirements 4 | ============ 5 | * CloudTrail logs must be loaded into ElasticSearch. For instructions on setting up ElasticSearch and ingesting an archive of CloudTrail logs into it see below. 6 | * ElasticSearch 6.x is supported, but there are reports of ElasticSearch 1.x being used successfully. 7 | 8 | Installation 9 | ============ 10 | 11 | ### Step 1 12 | Install the Python libraries using one of the provided Makefile targets: 13 | 14 | For elasticsearch v6.x: 15 | ``` 16 | python3 -m venv ./venv && source venv/bin/activate 17 | pip install git+https://github.com/duo-labs/cloudtracker.git#egg=cloudtracker[es6] 18 | ``` 19 | 20 | For older versions, such as elasticsearch v1.x: 21 | ``` 22 | python3 -m venv ./venv && source venv/bin/activate 23 | pip install git+https://github.com/duo-labs/cloudtracker.git#egg=cloudtracker[es1] 24 | ``` 25 | 26 | ### Step 2 27 | Get the IAM data of the account 28 | 29 | ``` 30 | aws iam get-account-authorization-details > account-data/demo_iam.json 31 | ``` 32 | 33 | ### Step 3 34 | Edit the `config.yaml`. You need to specify how to connect to the ElasticSearch cluster, what index the CloudTrail logs are stored in, and information about your AWS account, including the location of the IAM file created in Step 3. 35 | 36 | Example `config.yaml` file: 37 | ``` 38 | elasticsearch: 39 | host: localhost 40 | port: 9200 41 | index: "cloudtrail" 42 | key_prefix: "" 43 | timestamp_field: "eventTime" 44 | accounts: 45 | - name: demo 46 | id: 123456789012 47 | iam: account-data/demo_iam.json 48 | ``` 49 | 50 | The ElasticSearch configuration section works the same as what is available to the ElasticSearch python library documented here: http://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch 51 | 52 | Additionally, you can configure: 53 | 54 | - `index`: The index you loaded your files at. 55 | - `key_prefix`: Any prefix you have to your CloudTrail records. For example, if your `eventName` is queryable via `my_cloudtrail_data.eventName`, then the `key_prefix` would be `my_cloudtrail_data`. 56 | 57 | 58 | 59 | Install ElasticSearch 60 | ===================== 61 | 62 | You can use an AWS managed ElasticSearch cluster or one that you manage, including one running locally on a VM on your laptop. However, if your logs exceed a few dozen GBs, or over 100M records, you'll likely run into issues running locally. You'll also want to install Kibana to look at the loaded logs. 63 | 64 | Configure the ElasticSearch mapping 65 | ----------------------------------- 66 | Using Kibana and clicking on "Dev Tools" you can send commands to ElasticSearch. You can also do this using `curl`. Run the following to setup a `cloudtrail` index and increase it's total fields to 5000. If you don't increase that limit, records will be silently dropped. 67 | 68 | 69 | The commands to send 70 | ``` 71 | PUT /cloudtrail 72 | { 73 | "mappings": { 74 | "doc": { 75 | "properties": { 76 | "@timestamp": { 77 | "type": "date" 78 | }, 79 | "@version": { 80 | "type": "text", 81 | "fields": { 82 | "keyword": { 83 | "type": "keyword", 84 | "ignore_above": 256 85 | } 86 | } 87 | }, 88 | "host": { 89 | "type": "text", 90 | "fields": { 91 | "keyword": { 92 | "type": "keyword", 93 | "ignore_above": 256 94 | } 95 | } 96 | }, 97 | "message": { 98 | "properties": { 99 | "additionalEventData": { 100 | "properties": { 101 | "Note": { 102 | "type": "text", 103 | "fields": { 104 | "keyword": { 105 | "type": "keyword", 106 | "ignore_above": 256 107 | } 108 | } 109 | } 110 | } 111 | }, 112 | "apiVersion": { 113 | "type": "text" 114 | } 115 | } 116 | } 117 | } 118 | } 119 | } 120 | } 121 | ``` 122 | 123 | You can save the above file as `cloudtrail_mapping.json` and then send it to your ElasticSearch cluster using: 124 | 125 | ``` 126 | curl -X PUT https://YOUR_ES_SERVER/cloudtrail -T cloudtrail_mapping.json -H "Content-Type: application/json" 127 | ``` 128 | 129 | 130 | Do the same for: 131 | ``` 132 | PUT /cloudtrail/_settings 133 | { 134 | "index.mapping.total_fields.limit": 5000 135 | } 136 | ``` 137 | 138 | You can save that to a file named `cloudtrail_settings.json` and then run: 139 | ``` 140 | curl -X PUT https://YOUR_ES_SERVER/cloudtrail/_settings -T cloudtrail_settings.json -H "Content-Type: application/json" 141 | ``` 142 | 143 | 144 | Ingest CloudTrail logs into ElasticSearch using Hindsight 145 | ========================================================= 146 | 147 | Copy your CloudTrail logs locally and convert them to a single flat file. 148 | 149 | ``` 150 | # Replace YOUR_BUCKET and YOUR_ACCOUNT_ID in the following command 151 | aws s3 sync s3://YOUR_BUCKET/AWSLogs/YOUR_ACCOUNT_ID/CloudTrail/ . 152 | find . -name "*.json.gz" -exec gunzip -c {} \; | jq -cr '.Records[] | del(.responseElements.endpoint)' >> ../cloudtrail.json 153 | ``` 154 | 155 | I'm deleting `.responseElements.endpoint` because different API calls return an object or a string for that value and ElasticSearch can't handle mixed types, so I just ignore that value since it is of little use. 156 | 157 | 158 | Install Hindsight 159 | ----------------- 160 | Hindsight is hard to install as it has a number of dependencies. The project is at https://github.com/mozilla-services/hindsight 161 | 162 | Here are some notes, but you'll still probably run into trouble. Help in improving the installation of those projects would be good. 163 | 164 | For the dependencies: 165 | ``` 166 | sudo yum install -y libcurl-devel autoconf automake libtool cmake 167 | 168 | git clone https://github.com/mozilla-services/lua_sandbox.git 169 | cd lua_sandbox 170 | mkdir release 171 | cd release 172 | 173 | cmake -DCMAKE_BUILD_TYPE=release .. 174 | make 175 | sudo make install 176 | 177 | cd ../.. 178 | 179 | git clone https://github.com/mozilla-services/lua_sandbox_extensions.git 180 | cd lua_sandbox_extensions 181 | mkdir release 182 | cd release 183 | # Disable a bunch of extensions when we build this to avoid further dependencies 184 | cmake -DCMAKE_BUILD_TYPE=release -DEXT_aws=off -DEXT_kafka=off -DEXT_parquet=off -DEXT_jose=off -DEXT_postgres=off -DEXT_systemd=off -DEXT_snappy=off -DCPACK_GENERATOR=RPM .. 185 | make 186 | make packages 187 | sudo make install 188 | # In my experience I needed to manually install files, or copy or link them, as you should have files named 189 | # `rjson.so` and `ltn12.lua` at `/usr/local/lib/luasandbox/io_modules/`. 190 | ``` 191 | 192 | Now install Hindsight from https://github.com/mozilla-services/hindsight 193 | 194 | 195 | Run a proxy 196 | ----------- 197 | This may not be needed, but it's helpful, especially when using an AWS managed ElasticSearch cluster. 198 | 199 | ``` 200 | var http = require('http'), 201 | httpProxy = require('http-proxy'); 202 | 203 | var proxy = httpProxy.createProxyServer({}); 204 | 205 | proxy.on('proxyReq', function(proxyReq, req, res, options) { 206 | console.log("> Proxying: ", req.url); 207 | proxyReq.setHeader('content-type', 'application/json'); 208 | }); 209 | 210 | proxy.on('proxyRes', function (proxyRes, req, res) { 211 | console.log("< ", proxyRes.statusCode); 212 | }); 213 | 214 | var server = http.createServer(function(req, res) { 215 | proxy.web(req, res, { 216 | target: 'https://MY_ES_INSTANCE.us-west-2.es.amazonaws.com', secure: false 217 | }); 218 | }); 219 | 220 | console.log("listening on port 9201") 221 | server.listen(9201); 222 | ``` 223 | 224 | Here you can see I am ignoring any cert errors when making the TLS connection, so you'll need to decide if that is acceptable for your use case. 225 | 226 | Run this with: 227 | ``` 228 | node proxy.js 229 | ``` 230 | 231 | 232 | Configure Hindsight 233 | ------------------- 234 | This repo includes a `hindsight/run` directory. Copy the `run` directory to your hindsight repo. 235 | 236 | Replace `YOUR_FILE` in `run/input/file.cfg` with the full path to your `cloudtrail.json` file. 237 | 238 | Replace `127.0.0.1` and the port `9200` in `run/output/elasticsearch_bulk_api.cfg` if you are not running ElasticSearch on your localhost. 239 | 240 | 241 | Run hindsight 242 | ------------- 243 | To run hindsight use: 244 | 245 | ``` 246 | hindsight_cli hindsight.cfg 247 | ``` 248 | 249 | You will also want to run `rm -rf output/*` in between runs to clear out the cached files. 250 | You may need to modify `hindsight.cfg` to tell it the `io_lua_path` and other paths are in `/usr/local/lib/` not `/usr/lib/` 251 | 252 | -------------------------------------------------------------------------------- /hindsight/run/analysis/counter.cfg: -------------------------------------------------------------------------------- 1 | filename = "counter.lua" 2 | message_matcher = "TRUE" 3 | ticker_interval = 5 4 | -------------------------------------------------------------------------------- /hindsight/run/analysis/counter.lua: -------------------------------------------------------------------------------- 1 | require "string" 2 | msgcount = 0 3 | 4 | function process_message() 5 | msgcount = msgcount + 1 6 | return 0 7 | end 8 | 9 | function timer_event() 10 | inject_payload("txt", "count", string.format("%d message analysed", msgcount)) 11 | end 12 | -------------------------------------------------------------------------------- /hindsight/run/input/file.cfg: -------------------------------------------------------------------------------- 1 | filename = "file.lua" 2 | 3 | -- Name of the input file (nil for stdin) 4 | -- Default: 5 | input_filename = "YOUR_FILE" 6 | 7 | -- Heka message table containing the default header values to use, if they are 8 | -- not populated by the decoder. If 'Fields' is specified it should be in the 9 | -- hashed based format see: http://mozilla-services.github.io/lua_sandbox/heka/message.html 10 | -- Default: 11 | -- default_headers = nil 12 | 13 | -- Specifies a module that will decode the raw data and inject the resulting message. 14 | -- Default: 15 | -- decoder_module = "decoders.heka.json" 16 | 17 | -- Boolean, if true, any decode failure will inject a message of Type "error", 18 | -- with the Payload containing the error. 19 | -- Default: 20 | -- send_decode_failures = false 21 | -------------------------------------------------------------------------------- /hindsight/run/input/file.lua: -------------------------------------------------------------------------------- 1 | -- This Source Code Form is subject to the terms of the Mozilla Public 2 | -- License, v. 2.0. If a copy of the MPL was not distributed with this 3 | -- file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | 5 | --[[ 6 | # Single File Input (new line delimited) 7 | todo: when more than line splitting is needed the file should be read in chunks 8 | and passed to a generic splitter buffer with a token/match specification and a 9 | find function similar to the Heka stream reader. 10 | ## Sample Configuration 11 | ```lua 12 | filename = "file.lua" 13 | -- Name of the input file (nil for stdin) 14 | -- Default: 15 | -- input_filename = nil 16 | -- Heka message table containing the default header values to use, if they are 17 | -- not populated by the decoder. If 'Fields' is specified it should be in the 18 | -- hashed based format see: http://mozilla-services.github.io/lua_sandbox/heka/message.html 19 | -- Default: 20 | -- default_headers = nil 21 | -- Specifies a module that will decode the raw data and inject the resulting message. 22 | -- Default: 23 | -- decoder_module = "decoders.payload" 24 | -- Boolean, if true, any decode failure will inject a message of Type "error", 25 | -- with the Payload containing the error. 26 | -- Default: 27 | -- send_decode_failures = false 28 | ``` 29 | --]] 30 | require "io" 31 | require "string" 32 | 33 | local input_filename = read_config("input_filename") 34 | local default_headers = read_config("default_headers") 35 | assert(default_headers == nil or type(default_headers) == "table", "invalid default_headers cfg") 36 | 37 | local decoder_module = read_config("decoder_module") or "decoders.payload" 38 | local decode = require(decoder_module).decode 39 | if not decode then 40 | error(decoder_module .. " does not provide a decode function") 41 | end 42 | local send_decode_failures = read_config("send_decode_failures") 43 | 44 | local err_msg = { 45 | Type = "error", 46 | Payload = nil, 47 | } 48 | 49 | function process_message(checkpoint) 50 | local fh = io.stdin 51 | if input_filename then 52 | fh = assert(io.open(input_filename, "rb")) -- closed on plugin shutdown 53 | if checkpoint then 54 | fh:seek("set", checkpoint) 55 | else 56 | checkpoint = 0 57 | end 58 | end 59 | 60 | local cnt = 0 61 | for data in fh:lines() do 62 | local ok, err = pcall(decode, data, default_headers) 63 | if (not ok or err) and send_decode_failures then 64 | err_msg.Payload = err 65 | pcall(inject_message, err_msg) 66 | end 67 | 68 | if input_filename then 69 | checkpoint = checkpoint + #data + 1 70 | inject_message(nil, checkpoint) 71 | end 72 | cnt = cnt + 1 73 | end 74 | return 0, string.format("processed %d lines", cnt) 75 | end 76 | -------------------------------------------------------------------------------- /hindsight/run/input/json.lua: -------------------------------------------------------------------------------- 1 | -- This Source Code Form is subject to the terms of the Mozilla Public 2 | -- License, v. 2.0. If a copy of the MPL was not distributed with this 3 | -- file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | 5 | --[[ 6 | # Heka JSON Message Decoder Module 7 | https://wiki.mozilla.org/Firefox/Services/Logging 8 | 9 | The above link describes the Heka message format with a JSON schema. The JSON 10 | will be decoded and passed directly to inject_message so it needs to decode into 11 | a Heka message table described here: 12 | https://mozilla-services.github.io/lua_sandbox/heka/message.html 13 | 14 | ## Decoder Configuration Table 15 | * none 16 | 17 | ## Functions 18 | 19 | ### decode 20 | 21 | Decode and inject the resulting message 22 | 23 | *Arguments* 24 | - data (string) - JSON message with a Heka schema 25 | 26 | *Return* 27 | - nil - throws an error on an invalid data type, JSON parse error, 28 | inject_message failure etc. 29 | 30 | --]] 31 | 32 | -- Imports 33 | local cjson = require "cjson" 34 | 35 | local inject_message = inject_message 36 | 37 | local M = {} 38 | setfenv(1, M) -- Remove external access to contain everything in the module 39 | 40 | function decode(data) 41 | inject_message(cjson.decode(data)) 42 | end 43 | 44 | return M 45 | -------------------------------------------------------------------------------- /hindsight/run/output/elasticsearch_bulk_api.cfg: -------------------------------------------------------------------------------- 1 | filename = "elasticsearch_bulk_api.lua" 2 | message_matcher = "TRUE" 3 | ticker_interval = 10 -- flush every 10 seconds or flush_count (50000) messages 4 | memory_limit = 200e6 5 | 6 | address = "127.0.0.1" 7 | port = 9200 8 | timeout = 10 -- socket timeout 9 | flush_count = 500 -- 50000 10 | flush_on_shutdown = true 11 | preserve_data = false -- there is no state maintained in this plugin 12 | max_retry = 1 -- number of seconds (retries once per second) 13 | discard_on_error = false -- discard the batch after max_retry + 1 failed attempts to send the batch 14 | abort_on_error = false -- stop this plugin after max_retry + 1 failed attempts to send the batch 15 | -- when setting abort_on_error = true, consider also settings shutdown_on_terminate or remove_checkpoints_on_terminate 16 | 17 | -- See the elasticsearch module directory for the various encoders and configuration documentation. 18 | encoder_module = "encoders.elasticsearch.payload" 19 | encoders_elasticsearch_common = { 20 | es_index_from_timestamp = true, 21 | index = "cloudtrail", 22 | type_name = "doc", 23 | } 24 | -------------------------------------------------------------------------------- /hindsight/run/output/elasticsearch_bulk_api.lua: -------------------------------------------------------------------------------- 1 | -- This Source Code Form is subject to the terms of the Mozilla Public 2 | -- License, v. 2.0. If a copy of the MPL was not distributed with this 3 | -- file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | 5 | --[[ 6 | # Elasticsearch Bulk API Output 7 | 8 | ## Sample Configuration 9 | ```lua 10 | filename = "elasticsearch_bulk_api.lua" 11 | message_matcher = "Type == 'nginx'" 12 | ticker_interval = 10 -- flush every 10 seconds or flush_count (50000) messages 13 | memory_limit = 200e6 14 | 15 | address = "127.0.0.1" 16 | port = 9200 17 | timeout = 10 -- socket timeout 18 | flush_count = 50000 19 | flush_on_shutdown = false 20 | preserve_data = false -- there is no state maintained in this plugin 21 | max_retry = 0 -- number of seconds (retries once per second) 22 | discard_on_error = false -- discard the batch after max_retry + 1 failed attempts to send the batch 23 | abort_on_error = false -- stop this plugin after max_retry + 1 failed attempts to send the batch 24 | -- when setting abort_on_error = true, consider also settings shutdown_on_terminate or remove_checkpoints_on_terminate 25 | 26 | -- See the elasticsearch module directory for the various encoders and configuration documentation. 27 | encoder_module = "encoders.elasticsearch.payload" 28 | encoders_elasticsearch_common = { 29 | es_index_from_timestamp = true, 30 | index = "%{Logger}-%{%Y.%m.%d}", 31 | type_name = "%{Type}-%{Hostname}", 32 | } 33 | ``` 34 | --]] 35 | 36 | require "table" 37 | require "rjson" 38 | require "string" 39 | local ltn12 = require "ltn12" 40 | local time = require "os".time 41 | local socket = require "socket" 42 | local http = require("socket.http") 43 | local address = read_config("address") or "127.0.0.1" 44 | local port = read_config("port") or 9200 45 | local timeout = read_config("timeout") or 10 46 | local discard = read_config("discard_on_error") 47 | local abort = read_config("abort_on_error") 48 | local max_retry = read_config("max_retry") or 0 49 | assert(not (abort and discard), "abort_on_error and discard_on_error are mutually exclusive") 50 | 51 | local encoder_module = read_config("encoder_module") or "encoders.elasticsearch.payload" 52 | local encode = require(encoder_module).encode 53 | if not encode then 54 | error(encoder_module .. " does not provide an encode function") 55 | end 56 | 57 | local batch_file = string.format("%s/%s.batch", read_config("output_path"), read_config("Logger")) 58 | local flush_on_shutdown = read_config("flush_on_shutdown") 59 | local ticker_interval = read_config("ticker_interval") 60 | local flush_count = read_config("flush_count") or 50000 61 | assert(flush_count > 0, "flush_count must be greater than zero") 62 | 63 | local client 64 | local function create_client() 65 | local client = http.open(address, port) 66 | client.c:setoption("tcp-nodelay", true) 67 | client.c:setoption("keepalive", true) 68 | client.c:settimeout(timeout) 69 | return client 70 | end 71 | local pcreate_client = socket.protect(create_client); 72 | 73 | 74 | local req_headers = { 75 | ["user-agent"] = http.USERAGENT, 76 | ["content-type"] = "application/x-ndjson", 77 | ["content-length"] = 0, 78 | ["host"] = address .. ":" .. port, 79 | ["accept"] = "application/json", 80 | ["connection"] = "keep-alive", 81 | } 82 | 83 | local function send_request() -- hand coded since socket.http doesn't support keep-alive connections 84 | if not client then client, err = pcreate_client() end 85 | if err then print(err); return false; end 86 | 87 | local success = true 88 | local fh = assert(io.open(batch_file, "r")) 89 | req_headers["content-length"] = fh:seek("end") 90 | client:sendrequestline("POST", "/_bulk") 91 | client:sendheaders(req_headers) 92 | fh:seek("set") 93 | client:sendbody(req_headers, ltn12.source.file(fh, "invalid file handle")) 94 | local code = client:receivestatusline() 95 | local headers 96 | while code == 100 do -- ignore any 100-continue messages 97 | headers = client:receiveheaders() 98 | code = client:receivestatusline() 99 | end 100 | headers = client:receiveheaders() 101 | if code ~= 204 and code ~= 304 and not (code >= 100 and code < 200) then 102 | if code == 200 and string.match(headers["content-type"], "^application/json") then 103 | local body = {} 104 | local sink = ltn12.sink.table(body) 105 | client:receivebody(headers, sink) 106 | local response = table.concat(body) 107 | local ok, doc = pcall(rjson.parse, response) 108 | if ok then 109 | if doc:value(doc:find("errors")) then 110 | print(string.format("ElasticSearch server reported errors processing the submission, not all messages were indexed")) 111 | -- todo track partial batch failure counts https://github.com/mozilla-services/lua_sandbox_extensions/issues/89 112 | -- the partial failure is most likely due to bad input, so no retry is attempted as it would just fail again 113 | end 114 | else 115 | print(string.format("HTTP response didn't contain valid JSON. err: %s", doc)) 116 | end 117 | else 118 | client:receivebody(headers, ltn12.sink.null()) 119 | end 120 | 121 | if code > 304 then 122 | success = false 123 | print(string.format("HTTP response error. Status: %d", code)) 124 | end 125 | end 126 | 127 | if headers.connection == "close" then 128 | client:close() 129 | client = nil 130 | end 131 | 132 | return success 133 | end 134 | local psend_request = socket.protect(function() return send_request() end) 135 | 136 | 137 | local send_on_start = false 138 | local last_flush = time() 139 | local batch_count = 0 140 | local retry_count = 0 141 | local batch = assert(io.open(batch_file, "a+")) 142 | for _ in io.lines(batch_file) do -- ensure we have a correct count when resuming after an abort 143 | batch_count = batch_count + 1 144 | end 145 | batch_count = batch_count / 2 146 | if batch_count >= flush_count then 147 | send_on_start = true 148 | end 149 | 150 | local function finalize_batch() 151 | last_flush = time() 152 | batch_count = 0 153 | retry_count = 0 154 | batch:close() 155 | batch = assert(io.open(batch_file, "w")) 156 | end 157 | 158 | local function send_batch() 159 | batch:flush() 160 | local ok, err = psend_request() 161 | if not ok then 162 | if err then print(err) end 163 | client = nil 164 | retry_count = retry_count + 1 165 | if discard and retry_count > max_retry then 166 | print(string.format("discarded %d messages", batch_count)) 167 | finalize_batch() 168 | return true 169 | elseif abort and retry_count > max_retry then 170 | error(string.format("Abort sending %d messages after %d attempts", batch_count, retry_count)) 171 | end 172 | return false 173 | end 174 | finalize_batch() 175 | return true 176 | end 177 | 178 | 179 | function process_message() 180 | if batch_count >= flush_count then -- attempt to transmit a failed batch before accepting new data 181 | if not send_batch() then 182 | return -3 -- retry until successful or it errors out 183 | end 184 | if not send_on_start then 185 | return 0 -- break the retry loop and allow new data to start flowing again 186 | end 187 | send_on_start = false 188 | end 189 | 190 | local ok, data = pcall(encode) 191 | if not ok then return -1, data end 192 | if not data then return -2 end 193 | batch:write(data) 194 | batch_count = batch_count + 1 195 | 196 | if batch_count >= flush_count then 197 | send_batch() 198 | end 199 | return 0 200 | end 201 | 202 | 203 | function timer_event(ns, shutdown) 204 | local timedout = (ns / 1e9 - last_flush) >= ticker_interval 205 | if (timedout or (shutdown and flush_on_shutdown)) and batch_count > 0 then 206 | send_batch() 207 | end 208 | end 209 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | astroid==2.4.2 2 | autoflake==1.3.1 3 | autopep8==1.5.4 4 | coverage==5.2.1 5 | invoke==1.4.1 6 | isort==4.3.21 7 | lazy-object-proxy==1.4.3 8 | mccabe==0.6.1 9 | nose==1.3.7 10 | pycodestyle==2.6.0 11 | pyflakes==2.2.0 12 | pylint==2.5.3 13 | six==1.15.0 14 | toml==0.10.1 15 | wrapt==1.12.1 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ansicolors==1.1.8 2 | boto3==1.5.32 3 | botocore==1.12.97 4 | docutils==0.16 5 | jmespath==0.9.3 6 | python-dateutil==2.8.1 7 | PyYAML==5.4 8 | s3transfer==0.1.13 9 | six==1.15.0 10 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [nosetests] 2 | with-coverage=1 3 | cover-erase=1 4 | cover-package=cloudtracker 5 | cover-html=1 6 | cover-html-dir=htmlcov 7 | tests = tests/unit 8 | 9 | [aliases] 10 | test=nosetests 11 | 12 | # Exclude: __pycache__ / .pyc 13 | [coverage:run] 14 | omit = 15 | # omit anything in a .local directory anywhere 16 | */.local/* 17 | utils/* 18 | */virtualenv/* 19 | */venv/* 20 | */.venv/* -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Setup script for cloudtracker""" 2 | import os 3 | import re 4 | 5 | from setuptools import find_packages, setup 6 | 7 | 8 | HERE = os.path.dirname(__file__) 9 | VERSION_RE = re.compile(r"""__version__ = ['"]([0-9.]+)['"]""") 10 | TESTS_REQUIRE = ["coverage", "nose"] 11 | 12 | 13 | def get_version(): 14 | init = open(os.path.join(HERE, "cloudtracker", "__init__.py")).read() 15 | return VERSION_RE.search(init).group(1) 16 | 17 | 18 | def get_description(): 19 | return open( 20 | os.path.join(os.path.abspath(HERE), "README.md"), encoding="utf-8" 21 | ).read() 22 | 23 | 24 | setup( 25 | name="cloudtracker", 26 | version=get_version(), 27 | author="Duo Security", 28 | description=( 29 | "CloudTracker helps you find over-privileged IAM users and " 30 | "roles by comparing CloudTrail logs with current IAM policies" 31 | ), 32 | long_description=get_description(), 33 | long_description_content_type="text/markdown", 34 | url="https://github.com/duo-labs/cloudtracker", 35 | entry_points={"console_scripts": "cloudtracker=cloudtracker.cli:main"}, 36 | test_suite="tests/unit", 37 | tests_require=TESTS_REQUIRE, 38 | extras_require={ 39 | "dev": TESTS_REQUIRE + ["autoflake", "autopep8", "pylint", "invoke"], 40 | "es1": ["elasticsearch==1.9.0", "elasticsearch_dsl==0.0.11"], 41 | "es6": ["elasticsearch==6.1.1", "elasticsearch_dsl==6.1.0"], 42 | }, 43 | install_requires=[ 44 | "ansicolors==1.1.8", 45 | "boto3==1.5.32", 46 | "jmespath==0.9.3", 47 | "pyyaml==4.2b4", 48 | ], 49 | setup_requires=["nose"], 50 | packages=find_packages(exclude=["tests*"]), 51 | package_data={"cloudtracker": ["data/*.txt"]}, 52 | zip_safe=True, 53 | license="BSD 3", 54 | keywords="aws cloudtracker cloudtrail privileged iam user roles policy policies", 55 | classifiers=[ 56 | "License :: OSI Approved :: BSD License", 57 | "Programming Language :: Python :: 3", 58 | "Programming Language :: Python :: 3.3", 59 | "Programming Language :: Python :: 3.4", 60 | "Programming Language :: Python :: 3.5", 61 | "Programming Language :: Python :: 3.6", 62 | "Programming Language :: Python :: 3.7", 63 | "Programming Language :: Python :: 3 :: Only", 64 | "Development Status :: 5 - Production/Stable", 65 | ], 66 | ) 67 | -------------------------------------------------------------------------------- /tasks.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import os 4 | import logging 5 | from invoke import task, Collection, UnexpectedExit, Failure 6 | 7 | logger = logging.getLogger(__name__) 8 | # Create the necessary collections (namespaces) 9 | ns = Collection() 10 | 11 | test = Collection("test") 12 | ns.add_collection(test) 13 | 14 | unit = Collection("unit") 15 | ns.add_collection(unit) 16 | 17 | build = Collection("build") 18 | ns.add_collection(build) 19 | 20 | 21 | # Build 22 | @task 23 | def build_package(c): 24 | """Build the package from the current directory contents for use with PyPi""" 25 | c.run("python -m pip install --upgrade setuptools wheel") 26 | c.run("python setup.py -q sdist bdist_wheel") 27 | 28 | 29 | @task(pre=[build_package]) 30 | def install_package(c): 31 | """Install the package built from the current directory contents (not PyPi)""" 32 | c.run("pip3 install -q dist/cloudtracker-*.tar.gz") 33 | 34 | 35 | @task 36 | def uninstall_package(c): 37 | """Uninstall the package""" 38 | c.run('echo "y" | pip3 uninstall cloudtracker', pty=True) 39 | c.run("rm -rf dist/*", pty=True) 40 | 41 | 42 | @task(pre=[install_package]) 43 | def help_check(c): 44 | """Print the version to make sure the package installation didn't irrationally break""" 45 | try: 46 | c.run("./bin/cloudtracker --help", pty=True) 47 | except UnexpectedExit as u_e: 48 | logger.critical(f"FAIL! UnexpectedExit: {u_e}") 49 | sys.exit(1) 50 | except Failure as f_e: 51 | logger.critical(f"FAIL: Failure: {f_e}") 52 | sys.exit(1) 53 | 54 | 55 | # TEST - format 56 | @task 57 | def fmt(c): 58 | """Auto format code with Python autopep8""" 59 | try: 60 | c.run("autopep8 cloudtracker/") 61 | except UnexpectedExit as u_e: 62 | logger.critical(f"FAIL! UnexpectedExit: {u_e}") 63 | sys.exit(1) 64 | except Failure as f_e: 65 | logger.critical(f"FAIL: Failure: {f_e}") 66 | sys.exit(1) 67 | 68 | 69 | # TEST - LINT 70 | @task 71 | def run_linter(c): 72 | """Lint the code""" 73 | try: 74 | c.run("pylint cloudtracker/", warn=False) 75 | except UnexpectedExit as u_e: 76 | logger.critical(f"FAIL! UnexpectedExit: {u_e}") 77 | sys.exit(1) 78 | except Failure as f_e: 79 | logger.critical(f"FAIL: Failure: {f_e}") 80 | sys.exit(1) 81 | 82 | 83 | # TEST - SECURITY 84 | @task 85 | def security_scan(c): 86 | """Runs `bandit` and `safety check`""" 87 | try: 88 | c.run("bandit -r cloudtracker/") 89 | # c.run("safety check") 90 | except UnexpectedExit as u_e: 91 | logger.critical(f"FAIL! UnexpectedExit: {u_e}") 92 | sys.exit(1) 93 | except Failure as f_e: 94 | logger.critical(f"FAIL: Failure: {f_e}") 95 | sys.exit(1) 96 | 97 | 98 | # UNIT TESTING 99 | @task 100 | def run_nosetests(c): 101 | """Unit testing: Runs unit tests using `nosetests`""" 102 | c.run('echo "Running Unit tests"') 103 | try: 104 | c.run("nosetests -v --logging-level=CRITICAL") 105 | except UnexpectedExit as u_e: 106 | logger.critical(f"FAIL! UnexpectedExit: {u_e}") 107 | sys.exit(1) 108 | except Failure as f_e: 109 | logger.critical(f"FAIL: Failure: {f_e}") 110 | sys.exit(1) 111 | 112 | 113 | @task 114 | def run_pytest(c): 115 | """Unit testing: Runs unit tests with pytest and coverage""" 116 | c.run('echo "Running Unit tests"') 117 | try: 118 | c.run("python -m coverage run -m pytest -v") 119 | c.run("python -m coverage report -m") 120 | except UnexpectedExit as u_e: 121 | logger.critical(f"FAIL! UnexpectedExit: {u_e}") 122 | sys.exit(1) 123 | except Failure as f_e: 124 | logger.critical(f"FAIL: Failure: {f_e}") 125 | sys.exit(1) 126 | 127 | 128 | build.add_task(build_package, "build") 129 | build.add_task(install_package, "install") 130 | build.add_task(uninstall_package, "uninstall") 131 | 132 | unit.add_task(run_nosetests, "nose") 133 | unit.add_task(run_pytest, "pytest") 134 | 135 | test.add_task(run_linter, "lint") 136 | test.add_task(fmt, "format") 137 | test.add_task(security_scan, "security") 138 | 139 | test.add_task(help_check, "help") 140 | -------------------------------------------------------------------------------- /tests/scripts/pylint.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | echo 'Starting pylint script' 3 | find . -name '*.py' -not -path './docs/source/*' -not -path './venv/*' -exec pylint '{}' + 4 | -------------------------------------------------------------------------------- /tests/unit/test_cloudtracker.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2018 Duo Security 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 5 | following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following 8 | disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the 11 | following disclaimer in the documentation and/or other materials provided with the distribution. 12 | 13 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote 14 | products derived from this software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 17 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 21 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 22 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | --------------------------------------------------------------------------- 24 | """ 25 | 26 | import sys 27 | import unittest 28 | from unittest.mock import patch 29 | from io import StringIO 30 | from contextlib import contextmanager 31 | 32 | from cloudtracker import (get_role_allowed_actions, 33 | get_role_iam, 34 | make_list, 35 | normalize_api_call, 36 | print_actor_diff, 37 | print_diff, 38 | Privileges, 39 | read_aws_api_list) 40 | 41 | 42 | @contextmanager 43 | def capture(command, *args, **kwargs): 44 | """Capture stdout in order to check it""" 45 | out, sys.stdout = sys.stdout, StringIO() 46 | try: 47 | command(*args, **kwargs) 48 | sys.stdout.seek(0) 49 | yield sys.stdout.read() 50 | finally: 51 | sys.stdout = out 52 | 53 | 54 | class TestCloudtracker(unittest.TestCase): 55 | """Test class for cloudtracker""" 56 | aws_api_list = None 57 | 58 | def __init__(self, *args, **kwargs): 59 | super(TestCloudtracker, self).__init__(*args, **kwargs) 60 | self.aws_api_list = read_aws_api_list() 61 | 62 | def test_make_list(self): 63 | """Test make_list""" 64 | self.assertEquals(["hello"], make_list("hello")) 65 | 66 | 67 | def test_get_actions_from_statement(self): 68 | """Test get_actions_from_statement""" 69 | 70 | privileges = Privileges(self.aws_api_list) 71 | 72 | stmt = {"Action": ["s3:PutObject"], "Resource": "*", "Effect": "Allow"} 73 | self.assertEquals(privileges.get_actions_from_statement(stmt), 74 | {'s3:putobject': True}) 75 | 76 | stmt = {"Action": ["s3:PutObject*"], "Resource": "*", "Effect": "Allow"} 77 | self.assertEquals(privileges.get_actions_from_statement(stmt), 78 | {'s3:putobject': True, 's3:putobjectacl': True, 's3:putobjecttagging': True}) 79 | 80 | stmt = {"Action": ["s3:*ObjectT*"], "Resource": "*", "Effect": "Allow"} 81 | self.assertEquals(privileges.get_actions_from_statement(stmt), 82 | {'s3:deleteobjecttagging': True, 83 | 's3:getobjecttagging': True, 84 | 's3:getobjecttorrent': True, 85 | 's3:putobjecttagging': True}) 86 | 87 | def test_policy(self): 88 | """Test having multiple statements, some allowed, some denied""" 89 | privileges = Privileges(self.aws_api_list) 90 | # Create a privilege object with some allowed and denied 91 | stmt = {"Action": ["s3:*ObjectT*"], "Resource": "*", "Effect": "Allow"} 92 | privileges.add_stmt(stmt) 93 | stmt = {'Action': ['s3:GetObjectTagging', 's3:GetObjectTorrent'], 94 | "Resource": "*", 95 | "Effect": "Deny"} 96 | privileges.add_stmt(stmt) 97 | self.assertEquals(sorted(privileges.determine_allowed()), 98 | sorted(['s3:putobjecttagging', 's3:deleteobjecttagging'])) 99 | 100 | def test_get_actions_from_statement_with_resources(self): 101 | """ 102 | Test that even when we are denied access to one resource, 103 | the actions are still marked as allowed. 104 | """ 105 | privileges = Privileges(self.aws_api_list) 106 | policy = [ 107 | { 108 | "Action": "s3:*", 109 | "Effect": "Allow", 110 | "Resource": "*" 111 | }, 112 | { 113 | "Action": "s3:CreateBucket", 114 | "Effect": "Deny", 115 | "Resource": "*" 116 | }, 117 | { 118 | "Action": "s3:*", 119 | "Effect": "Deny", 120 | "Resource": [ 121 | "arn:aws:s3:::super-sensitive-bucket", 122 | "arn:aws:s3:::super-sensitive-bucket/*" 123 | ] 124 | } 125 | ] 126 | for stmt in policy: 127 | privileges.add_stmt(stmt) 128 | self.assertTrue('s3:deletebucket' in privileges.determine_allowed()) 129 | self.assertTrue('s3:createbucket' not in privileges.determine_allowed()) 130 | 131 | 132 | def test_get_actions_from_statement_with_array_of_resources(self): 133 | """ 134 | Test array of resources 135 | """ 136 | privileges = Privileges(self.aws_api_list) 137 | policy = [ 138 | { 139 | "Action": "s3:*", 140 | "Effect": "Allow", 141 | "Resource": "*" 142 | }, 143 | { 144 | "Action": "s3:CreateBucket", 145 | "Effect": "Deny", 146 | "Resource": ["arn:aws:s3:::super-sensitive-bucket", "*"] 147 | } 148 | ] 149 | for stmt in policy: 150 | privileges.add_stmt(stmt) 151 | self.assertTrue('s3:deletebucket' in privileges.determine_allowed()) 152 | self.assertTrue('s3:createbucket' not in privileges.determine_allowed()) 153 | 154 | 155 | def test_get_actions_from_statement_with_conditions(self): 156 | """ 157 | Test that even when we are denied access based on a condition, 158 | the actions are still marked as allowed. 159 | """ 160 | privileges = Privileges(self.aws_api_list) 161 | policy = [ 162 | { 163 | "Sid": "AllowAllActionsForEC2", 164 | "Effect": "Allow", 165 | "Action": "ec2:*", 166 | "Resource": "*" 167 | }, 168 | { 169 | "Sid": "DenyStopAndTerminateWhenMFAIsNotPresent", 170 | "Effect": "Deny", 171 | "Action": [ 172 | "ec2:StopInstances", 173 | "ec2:TerminateInstances" 174 | ], 175 | "Resource": "*", 176 | "Condition": {"BoolIfExists": {"aws:MultiFactorAuthPresent": False}} 177 | } 178 | ] 179 | for stmt in policy: 180 | privileges.add_stmt(stmt) 181 | self.assertTrue('ec2:startinstances' in privileges.determine_allowed()) 182 | self.assertTrue('ec2:stopinstances' in privileges.determine_allowed()) 183 | 184 | 185 | def test_normalize_api_call(self): 186 | """Test normalize_api_call""" 187 | # Ensure the numbers at the end are removed 188 | self.assertEquals(normalize_api_call('lambda', 'ListTags20170331'), 'lambda:listtags') 189 | # Ensure service renaming occurs 190 | self.assertEquals(normalize_api_call('monitoring', 'DescribeAlarms'), 'cloudwatch:describealarms') 191 | 192 | 193 | def test_print_actor_diff(self): 194 | """Test print_actor_diff""" 195 | with capture(print_actor_diff, [], [], False) as output: 196 | self.assertEquals('', output) 197 | 198 | # Test output when you have 3 configured users, but only two actually did anything 199 | with capture(print_actor_diff, ['alice', 'bob'], ['alice', 'bob', 'charlie'], False) as output: 200 | self.assertEquals(' alice\n bob\n- charlie\n', output) 201 | 202 | 203 | def test_print_diff(self): 204 | """Test print_diff""" 205 | 206 | with capture(print_diff, [], [], {}, False) as output: 207 | self.assertEquals('', output) 208 | 209 | def mocked_is_recorded_by_cloudtrail(action): 210 | """Instead of reading the whole file, just cherry pick this one action used in the tests""" 211 | if action == 's3:putobject': 212 | return False 213 | return True 214 | 215 | # One action allowed, and performed, and should be shown 216 | with patch('cloudtracker.is_recorded_by_cloudtrail', side_effect=mocked_is_recorded_by_cloudtrail): 217 | with capture(print_diff, 218 | ['s3:createbucket'], # performed 219 | ['s3:createbucket'], # allowed 220 | {'show_benign': True, 'show_used': False, 'show_unknown': True}, False) as output: 221 | self.assertEquals(' s3:createbucket\n', output) 222 | 223 | # 3 actions allowed, one is used, one is unused, and one is unknown; show all 224 | with patch('cloudtracker.is_recorded_by_cloudtrail', side_effect=mocked_is_recorded_by_cloudtrail): 225 | with capture(print_diff, 226 | ['s3:createbucket', 'sts:getcalleridentity'], # performed 227 | ['s3:createbucket', 's3:putobject', 's3:deletebucket'], # allowed 228 | {'show_benign': True, 'show_used': False, 'show_unknown': True}, False) as output: 229 | self.assertEquals(' s3:createbucket\n- s3:deletebucket\n? s3:putobject\n', output) 230 | 231 | # Same as above, but only show the used one 232 | with patch('cloudtracker.is_recorded_by_cloudtrail', side_effect=mocked_is_recorded_by_cloudtrail): 233 | with capture(print_diff, 234 | ['s3:createbucket', 'sts:getcalleridentity'], # performed 235 | ['s3:createbucket', 's3:putobject', 's3:deletebucket'], # allowed 236 | {'show_benign': True, 'show_used': True, 'show_unknown': True}, False) as output: 237 | self.assertEquals(' s3:createbucket\n', output) 238 | 239 | # Hide the unknown 240 | with patch('cloudtracker.is_recorded_by_cloudtrail', side_effect=mocked_is_recorded_by_cloudtrail): 241 | with capture(print_diff, 242 | ['s3:createbucket', 'sts:getcalleridentity'], # performed 243 | ['s3:createbucket', 's3:putobject', 's3:deletebucket'], # allowed 244 | {'show_benign': True, 'show_used': False, 'show_unknown': False}, False) as output: 245 | self.assertEquals(' s3:createbucket\n- s3:deletebucket\n', output) 246 | 247 | # Role IAM policy to be used in different tests 248 | role_iam = { 249 | "AssumeRolePolicyDocument": {}, 250 | "RoleId": "AROA00000000000000000", 251 | "CreateDate": "2017-01-01T00:00:00Z", 252 | "InstanceProfileList": [], 253 | "RoleName": "test_role", 254 | "Path": "/", 255 | "AttachedManagedPolicies": [], 256 | "RolePolicyList": [ 257 | { 258 | "PolicyName": "KmsDecryptSecrets", 259 | "PolicyDocument": { 260 | "Version": "2012-10-17", 261 | "Statement": [ 262 | { 263 | "Action": [ 264 | "kms:DescribeKey", 265 | "kms:Decrypt" 266 | ], 267 | "Resource": "*", 268 | "Effect": "Allow", 269 | "Sid": "" 270 | } 271 | ] 272 | } 273 | }, 274 | { 275 | "PolicyName": "S3PutObject", 276 | "PolicyDocument": { 277 | "Version": "2012-10-17", 278 | "Statement": [ 279 | { 280 | "Action": [ 281 | "s3:PutObject", 282 | "s3:PutObjectAcl", 283 | "s3:ListBucket" 284 | ], 285 | "Resource": "*", 286 | "Effect": "Allow" 287 | } 288 | ] 289 | } 290 | } 291 | ], 292 | "Arn": "arn:aws:iam::111111111111:role/test_role" 293 | } 294 | 295 | def test_get_role_iam(self): 296 | """Test get_role_iam""" 297 | account_iam = { 298 | "RoleDetailList": [self.role_iam], 299 | "UserDetailList": [], 300 | "GroupDetailList": [], 301 | "Policies": [] 302 | } 303 | 304 | self.assertEquals(self.role_iam, get_role_iam("test_role", account_iam)) 305 | 306 | 307 | def test_get_role_allowed_actions(self): 308 | """Test get_role_allowed_actions""" 309 | account_iam = { 310 | "RoleDetailList": [self.role_iam], 311 | "UserDetailList": [], 312 | "GroupDetailList": [], 313 | "Policies": [] 314 | } 315 | 316 | aws_api_list = read_aws_api_list() 317 | self.assertEquals(sorted(['s3:putobject', 'kms:describekey', 'kms:decrypt', 's3:putobjectacl']), 318 | sorted(get_role_allowed_actions(aws_api_list, self.role_iam, account_iam))) 319 | --------------------------------------------------------------------------------