├── .github
    └── workflows
    │   ├── publish.yml
    │   └── test.yml
├── .gitignore
├── .pylintrc
├── LICENSE
├── MANIFEST.in
├── README.md
├── account-data
    └── empty
├── bin
    └── cloudtracker
├── cloudtracker
    ├── __init__.py
    ├── cli.py
    ├── data
    │   ├── aws_api_list.txt
    │   └── cloudtrail_supported_actions.txt
    └── datasources
    │   ├── __init__.py
    │   ├── athena.py
    │   └── es.py
├── config.yaml.demo
├── docs
    └── elasticsearch.md
├── hindsight
    └── run
    │   ├── analysis
    │       ├── counter.cfg
    │       └── counter.lua
    │   ├── input
    │       ├── file.cfg
    │       ├── file.lua
    │       └── json.lua
    │   └── output
    │       ├── elasticsearch_bulk_api.cfg
    │       └── elasticsearch_bulk_api.lua
├── requirements-dev.txt
├── requirements.txt
├── setup.cfg
├── setup.py
├── tasks.py
└── tests
    ├── scripts
        └── pylint.sh
    └── unit
        └── test_cloudtracker.py


/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflows will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | name: Upload CloudTracker to PyPI
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [created]
 9 | 
10 | jobs:
11 |   deploy:
12 | 
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v2
17 |     - name: Set up Python
18 |       uses: actions/setup-python@v1
19 |       with:
20 |         python-version: '3.x'
21 |     - name: Install dependencies
22 |       run: |
23 |         python -m pip install --upgrade pip
24 |         pip install setuptools wheel twine
25 |     - name: Build and publish
26 |       env:
27 |         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
28 |         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29 |       run: |
30 |         python setup.py sdist bdist_wheel
31 |         twine upload dist/*
32 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | # .github/workflows/test.yml
 2 | 
 3 | name: Test
 4 | 
 5 | on: [push, pull_request]
 6 | 
 7 | jobs:
 8 |   test:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: actions/checkout@v2
12 | 
13 |       - name: Setup Python
14 |         uses: actions/setup-python@v1
15 |         with:
16 |           python-version: 3.7
17 | 
18 |       - name: Install dependencies
19 |         run: |
20 |           pip install -r requirements.txt
21 |           pip install -r requirements-dev.txt
22 | 
23 | #      - run: invoke build.install-package
24 |       - run: invoke test.help
25 | #      - run: invoke test.security
26 |       - run: invoke unit.nose
27 | #      - run: invoke test.lint
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .DS_Store
  2 | *.pyc
  3 | *.egg-info
  4 | .eggs
  5 | venv/
  6 | .coverage
  7 | htmlcov/
  8 | config.yaml
  9 | account-data/
 10 | my_account_iam.json
 11 | 
 12 | # IDEs
 13 | .idea
 14 | .vscode
 15 | 
 16 | # Working directory
 17 | tmp/*
 18 | 
 19 | ########## Python ##########
 20 | ### From gitignore.io
 21 | # Byte-compiled / optimized / DLL files
 22 | __pycache__/
 23 | *.py[cod]
 24 | *$py.class
 25 | 
 26 | # C extensions
 27 | *.so
 28 | 
 29 | # Distribution / packaging
 30 | .Python
 31 | build/
 32 | develop-eggs/
 33 | dist/
 34 | downloads/
 35 | eggs/
 36 | .eggs/
 37 | lib/
 38 | lib64/
 39 | parts/
 40 | sdist/
 41 | var/
 42 | wheels/
 43 | pip-wheel-metadata/
 44 | share/python-wheels/
 45 | *.egg-info/
 46 | .installed.cfg
 47 | *.egg
 48 | MANIFEST
 49 | 
 50 | # PyInstaller
 51 | #  Usually these files are written by a python script from a template
 52 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 53 | *.manifest
 54 | *.spec
 55 | 
 56 | # Installer logs
 57 | pip-log.txt
 58 | pip-delete-this-directory.txt
 59 | 
 60 | # Unit test / coverage reports
 61 | htmlcov/
 62 | .tox/
 63 | .nox/
 64 | .coverage
 65 | .coverage.*
 66 | .cache
 67 | nosetests.xml
 68 | coverage.xml
 69 | *.cover
 70 | .hypothesis/
 71 | .pytest_cache/
 72 | 
 73 | # Translations
 74 | *.mo
 75 | *.pot
 76 | 
 77 | # Scrapy stuff:
 78 | .scrapy
 79 | 
 80 | # Sphinx documentation
 81 | docs/_build/
 82 | 
 83 | # PyBuilder
 84 | target/
 85 | 
 86 | # pyenv
 87 | .python-version
 88 | 
 89 | # pipenv
 90 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 91 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 92 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 93 | #   install all needed dependencies.
 94 | #Pipfile.lock
 95 | 
 96 | # celery beat schedule file
 97 | celerybeat-schedule
 98 | 
 99 | # SageMath parsed files
100 | *.sage.py
101 | 
102 | # Spyder project settings
103 | .spyderproject
104 | .spyproject
105 | 
106 | # Rope project settings
107 | .ropeproject
108 | 
109 | # Mr Developer
110 | .mr.developer.cfg
111 | .project
112 | .pydevproject
113 | 
114 | # mkdocs documentation
115 | /site
116 | 
117 | # mypy
118 | .mypy_cache/
119 | .dmypy.json
120 | dmypy.json
121 | 
122 | # Pyre type checker
123 | .pyre/


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
  1 | [MASTER]
  2 | 
  3 | # A comma-separated list of package or module names from where C extensions may
  4 | # be loaded. Extensions are loading into the active Python interpreter and may
  5 | # run arbitrary code
  6 | extension-pkg-whitelist=
  7 | 
  8 | # Add files or directories to the blacklist. They should be base names, not
  9 | # paths.
 10 | ignore=CVS
 11 | 
 12 | # Add files or directories matching the regex patterns to the blacklist. The
 13 | # regex matches against base names, not paths.
 14 | ignore-patterns=
 15 | 
 16 | # Python code to execute, usually for sys.path manipulation such as
 17 | # pygtk.require().
 18 | #init-hook=
 19 | 
 20 | # Use multiple processes to speed up Pylint.
 21 | jobs=4
 22 | 
 23 | # List of plugins (as comma separated values of python modules names) to load,
 24 | # usually to register additional checkers.
 25 | load-plugins=
 26 | 
 27 | # Pickle collected data for later comparisons.
 28 | persistent=yes
 29 | 
 30 | # Specify a configuration file.
 31 | #rcfile=
 32 | 
 33 | # Allow loading of arbitrary C extensions. Extensions are imported into the
 34 | # active Python interpreter and may run arbitrary code.
 35 | unsafe-load-any-extension=no
 36 | 
 37 | 
 38 | [MESSAGES CONTROL]
 39 | 
 40 | # Only show warnings with the listed confidence levels. Leave empty to show
 41 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
 42 | confidence=
 43 | 
 44 | # Disable the message, report, category or checker with the given id(s). You
 45 | # can either give multiple identifiers separated by comma (,) or put this
 46 | # option multiple times (only on the command line, not in the configuration
 47 | # file where it should appear only once).You can also use "--disable=all" to
 48 | # disable everything first and then reenable specific checks. For example, if
 49 | # you want to run only the similarities checker, you can use "--disable=all
 50 | # --enable=similarities". If you want to run only the classes checker, but have
 51 | # no Warning level messages displayed, use"--disable=all --enable=classes
 52 | # --disable=W"
 53 | disable=fixme,I0011,E1102,R0912,C0103,C0111,R1702,R0915,C0325,R0914,W0703,R1705,W0603,W0406
 54 | 
 55 | # Enable the message, report, category or checker with the given id(s). You can
 56 | # either give multiple identifier separated by comma (,) or put this option
 57 | # multiple time (only on the command line, not in the configuration file where
 58 | # it should appear only once). See also the "--disable" option for examples.
 59 | enable=
 60 | 
 61 | 
 62 | [REPORTS]
 63 | 
 64 | # Python expression which should return a note less than 10 (10 is the highest
 65 | # note). You have access to the variables errors warning, statement which
 66 | # respectively contain the number of errors / warnings messages and the total
 67 | # number of statements analyzed. This is used by the global evaluation report
 68 | # (RP0004).
 69 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
 70 | 
 71 | # Template used to display messages. This is a python new-style format string
 72 | # used to format the message information. See doc for all details
 73 | #msg-template=
 74 | 
 75 | # Set the output format. Available formats are text, parseable, colorized, json
 76 | # and msvs (visual studio).You can also give a reporter class, eg
 77 | # mypackage.mymodule.MyReporterClass.
 78 | output-format=text
 79 | 
 80 | # Tells whether to display a full report or only the messages
 81 | reports=no
 82 | 
 83 | # Activate the evaluation score.
 84 | score=yes
 85 | 
 86 | 
 87 | [REFACTORING]
 88 | 
 89 | # Maximum number of nested blocks for function / method body
 90 | max-nested-blocks=5
 91 | 
 92 | 
 93 | [BASIC]
 94 | 
 95 | # Naming hint for argument names
 96 | argument-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
 97 | 
 98 | # Regular expression matching correct argument names
 99 | argument-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
100 | 
101 | # Naming hint for attribute names
102 | attr-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
103 | 
104 | # Regular expression matching correct attribute names
105 | attr-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
106 | 
107 | # Bad variable names which should always be refused, separated by a comma
108 | bad-names=foo,bar,baz,toto,tutu,tata
109 | 
110 | # Naming hint for class attribute names
111 | class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
112 | 
113 | # Regular expression matching correct class attribute names
114 | class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
115 | 
116 | # Naming hint for class names
117 | class-name-hint=[A-Z_][a-zA-Z0-9]+$
118 | 
119 | # Regular expression matching correct class names
120 | class-rgx=[A-Z_][a-zA-Z0-9]+$
121 | 
122 | # Naming hint for constant names
123 | const-name-hint=(([a-zA-Z_][a-zA-Z0-9_]*)|(__.*__))$
124 | 
125 | # Regular expression matching correct constant names
126 | const-rgx=(([a-zA-Z_][a-zA-Z0-9_]*)|(__.*__))$
127 | 
128 | # Minimum line length for functions/classes that require docstrings, shorter
129 | # ones are exempt.
130 | docstring-min-length=2
131 | 
132 | # Naming hint for function names
133 | function-name-hint=(([a-z][a-z0-9_]{2,50})|(_[a-z0-9_]*))$
134 | 
135 | # Regular expression matching correct function names
136 | function-rgx=(([a-z][a-z0-9_]{2,50})|(_[a-z0-9_]*))$
137 | 
138 | # Good variable names which should always be accepted, separated by a comma
139 | good-names=e,f,i,j,k,ex,Run,_
140 | 
141 | # Include a hint for the correct naming format with invalid-name
142 | include-naming-hint=no
143 | 
144 | # Naming hint for inline iteration names
145 | inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$
146 | 
147 | # Regular expression matching correct inline iteration names
148 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
149 | 
150 | # Naming hint for method names
151 | method-name-hint=(([a-z][a-z0-9_]{2,50})|(_[a-z0-9_]*))$
152 | 
153 | # Regular expression matching correct method names
154 | method-rgx=(([a-z][a-z0-9_]{2,50})|(_[a-z0-9_]*))$
155 | 
156 | # Naming hint for module names
157 | module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
158 | 
159 | # Regular expression matching correct module names
160 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
161 | 
162 | # Colon-delimited sets of names that determine each other's naming style when
163 | # the name regexes allow several styles.
164 | name-group=
165 | 
166 | # Regular expression which should only match function or class names that do
167 | # not require a docstring.
168 | no-docstring-rgx=^_
169 | 
170 | # List of decorators that produce properties, such as abc.abstractproperty. Add
171 | # to this list to register other decorators that produce valid properties.
172 | property-classes=abc.abstractproperty
173 | 
174 | # Naming hint for variable names
175 | variable-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
176 | 
177 | # Regular expression matching correct variable names
178 | variable-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
179 | 
180 | 
181 | [FORMAT]
182 | 
183 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
184 | expected-line-ending-format=
185 | 
186 | # Regexp for a line that is allowed to be longer than the limit.
187 | ignore-long-lines=^\s*(# )?<?https?://\S+>?$
188 | 
189 | # Number of spaces of indent required inside a hanging  or continued line.
190 | indent-after-paren=4
191 | 
192 | # String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
193 | # tab).
194 | indent-string='    '
195 | 
196 | # Maximum number of characters on a single line.
197 | max-line-length=120
198 | 
199 | # Maximum number of lines in a module
200 | max-module-lines=1000
201 | 
202 | # List of optional constructs for which whitespace checking is disabled. `dict-
203 | # separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
204 | # `trailing-comma` allows a space between comma and closing bracket: (a, ).
205 | # `empty-line` allows space-only lines.
206 | no-space-check=trailing-comma,dict-separator
207 | 
208 | # Allow the body of a class to be on the same line as the declaration if body
209 | # contains single statement.
210 | single-line-class-stmt=no
211 | 
212 | # Allow the body of an if to be on the same line as the test if there is no
213 | # else.
214 | single-line-if-stmt=no
215 | 
216 | 
217 | [LOGGING]
218 | 
219 | # Logging modules to check that the string format arguments are in logging
220 | # function parameter format
221 | logging-modules=logging
222 | 
223 | 
224 | [MISCELLANEOUS]
225 | 
226 | # List of note tags to take in consideration, separated by a comma.
227 | notes=FIXME,XXX,TODO
228 | 
229 | 
230 | [SIMILARITIES]
231 | 
232 | # Ignore comments when computing similarities.
233 | ignore-comments=yes
234 | 
235 | # Ignore docstrings when computing similarities.
236 | ignore-docstrings=yes
237 | 
238 | # Ignore imports when computing similarities.
239 | ignore-imports=no
240 | 
241 | # Minimum lines number of a similarity.
242 | min-similarity-lines=4
243 | 
244 | 
245 | [SPELLING]
246 | 
247 | # Spelling dictionary name. Available dictionaries: none. To make it working
248 | # install python-enchant package.
249 | spelling-dict=
250 | 
251 | # List of comma separated words that should not be checked.
252 | spelling-ignore-words=
253 | 
254 | # A path to a file that contains private dictionary; one word per line.
255 | spelling-private-dict-file=
256 | 
257 | # Tells whether to store unknown words to indicated private dictionary in
258 | # --spelling-private-dict-file option instead of raising a message.
259 | spelling-store-unknown-words=no
260 | 
261 | 
262 | [TYPECHECK]
263 | 
264 | # List of decorators that produce context managers, such as
265 | # contextlib.contextmanager. Add to this list to register other decorators that
266 | # produce valid context managers.
267 | contextmanager-decorators=contextlib.contextmanager
268 | 
269 | # List of members which are set dynamically and missed by pylint inference
270 | # system, and so shouldn't trigger E1101 when accessed. Python regular
271 | # expressions are accepted.
272 | generated-members=
273 | 
274 | # Tells whether missing members accessed in mixin class should be ignored. A
275 | # mixin class is detected if its name ends with "mixin" (case insensitive).
276 | ignore-mixin-members=yes
277 | 
278 | # This flag controls whether pylint should warn about no-member and similar
279 | # checks whenever an opaque object is returned when inferring. The inference
280 | # can return multiple potential results while evaluating a Python object, but
281 | # some branches might not be evaluated, which results in partial inference. In
282 | # that case, it might be useful to still emit no-member and other checks for
283 | # the rest of the inferred objects.
284 | ignore-on-opaque-inference=yes
285 | 
286 | # List of class names for which member attributes should not be checked (useful
287 | # for classes with dynamically set attributes). This supports the use of
288 | # qualified names.
289 | ignored-classes=optparse.Values,thread._local,_thread._local
290 | 
291 | # List of module names for which member attributes should not be checked
292 | # (useful for modules/projects where namespaces are manipulated during runtime
293 | # and thus existing member attributes cannot be deduced by static analysis. It
294 | # supports qualified module names, as well as Unix pattern matching.
295 | ignored-modules=
296 | 
297 | # Show a hint with possible names when a member name was not found. The aspect
298 | # of finding the hint is based on edit distance.
299 | missing-member-hint=yes
300 | 
301 | # The minimum edit distance a name should have in order to be considered a
302 | # similar match for a missing member name.
303 | missing-member-hint-distance=1
304 | 
305 | # The total number of similar names that should be taken in consideration when
306 | # showing a hint for a missing member.
307 | missing-member-max-choices=1
308 | 
309 | 
310 | [VARIABLES]
311 | 
312 | # List of additional names supposed to be defined in builtins. Remember that
313 | # you should avoid to define new builtins when possible.
314 | additional-builtins=
315 | 
316 | # Tells whether unused global variables should be treated as a violation.
317 | allow-global-unused-variables=yes
318 | 
319 | # List of strings which can identify a callback function by name. A callback
320 | # name must start or end with one of those strings.
321 | callbacks=cb_,_cb
322 | 
323 | # A regular expression matching the name of dummy variables (i.e. expectedly
324 | # not used).
325 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
326 | 
327 | # Argument names that match this expression will be ignored. Default to name
328 | # with leading underscore
329 | ignored-argument-names=_.*|^ignored_|^unused_
330 | 
331 | # Tells whether we should check for unused import in __init__ files.
332 | init-import=no
333 | 
334 | # List of qualified module names which can have objects that can redefine
335 | # builtins.
336 | redefining-builtins-modules=six.moves,future.builtins
337 | 
338 | 
339 | [CLASSES]
340 | 
341 | # List of method names used to declare (i.e. assign) instance attributes.
342 | defining-attr-methods=__init__,__new__,setUp
343 | 
344 | # List of member names, which should be excluded from the protected access
345 | # warning.
346 | exclude-protected=_asdict,_fields,_replace,_source,_make
347 | 
348 | # List of valid names for the first argument in a class method.
349 | valid-classmethod-first-arg=cls
350 | 
351 | # List of valid names for the first argument in a metaclass class method.
352 | valid-metaclass-classmethod-first-arg=mcs
353 | 
354 | 
355 | [DESIGN]
356 | 
357 | # Maximum number of arguments for function / method
358 | max-args=5
359 | 
360 | # Maximum number of attributes for a class (see R0902).
361 | max-attributes=10
362 | 
363 | # Maximum number of boolean expressions in a if statement
364 | max-bool-expr=5
365 | 
366 | # Maximum number of branch for function / method body
367 | max-branches=25
368 | 
369 | # Maximum number of locals for function / method body
370 | max-locals=25
371 | 
372 | # Maximum number of parents for a class (see R0901).
373 | max-parents=7
374 | 
375 | # Maximum number of public methods for a class (see R0904).
376 | max-public-methods=30
377 | 
378 | # Maximum number of return / yield for function / method body
379 | max-returns=10
380 | 
381 | # Maximum number of statements in function / method body
382 | max-statements=50
383 | 
384 | # Minimum number of public methods for a class (see R0903).
385 | min-public-methods=0
386 | 
387 | 
388 | [IMPORTS]
389 | 
390 | # Allow wildcard imports from modules that define __all__.
391 | allow-wildcard-with-all=no
392 | 
393 | # Analyse import fallback blocks. This can be used to support both Python 2 and
394 | # 3 compatible code, which means that the block might have code that exists
395 | # only in one or another interpreter, leading to false positives when analysed.
396 | analyse-fallback-blocks=no
397 | 
398 | # Deprecated modules which should not be used, separated by a comma
399 | deprecated-modules=optparse,tkinter.tix
400 | 
401 | # Create a graph of external dependencies in the given file (report RP0402 must
402 | # not be disabled)
403 | ext-import-graph=
404 | 
405 | # Create a graph of every (i.e. internal and external) dependencies in the
406 | # given file (report RP0402 must not be disabled)
407 | import-graph=
408 | 
409 | # Create a graph of internal dependencies in the given file (report RP0402 must
410 | # not be disabled)
411 | int-import-graph=
412 | 
413 | # Force import order to recognize a module as part of the standard
414 | # compatibility libraries.
415 | known-standard-library=
416 | 
417 | # Force import order to recognize a module as part of a third party library.
418 | known-third-party=enchant
419 | 
420 | 
421 | [EXCEPTIONS]
422 | 
423 | # Exceptions that will emit a warning when being caught. Defaults to
424 | # "Exception"
425 | overgeneral-exceptions=Exception


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2018 Duo Security
 2 | 
 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 4 | 
 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 6 | 
 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 8 | 
 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
10 | 
11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | CloudTracker helps you find over-privileged IAM users and roles by comparing CloudTrail logs with current IAM policies.
  2 | 
  3 | *Intro post: https://duo.com/blog/introducing-cloudtracker-an-aws-cloudtrail-log-analyzer*
  4 | 
  5 | 
  6 | This document will describe the setup that uses Athena and how to use the tool.  CloudTracker no longer requires ElasticSearch, but if you'd like to use CloudTracker with ElasticSearch please see [ElasticSearch installation and ingestion](docs/elasticsearch.md).
  7 | 
  8 | Setup
  9 | =====
 10 | 
 11 | ### Step 1: Setup CloudTracker
 12 | 
 13 | ```
 14 | python3 -m venv ./venv && source venv/bin/activate
 15 | pip install cloudtracker
 16 | ```
 17 | 
 18 | Note: To install with ElasticSearch support, see the [ElasticSearch docs](docs/elasticsearch.md).
 19 | 
 20 | ### Step 2: Download your IAM data
 21 | Download a copy of the IAM data of an account using the AWS CLI:
 22 | 
 23 | ```
 24 | mkdir -p account-data
 25 | aws iam get-account-authorization-details > account-data/demo_iam.json
 26 | ```
 27 | 
 28 | ### Step 3: Configure CloudTracker
 29 | 
 30 | Create a `config.yaml` file with contents similar to:
 31 | 
 32 | ```
 33 | athena:
 34 |   s3_bucket: my_log_bucket
 35 |   path: my_prefix
 36 | accounts:
 37 |   - name: demo
 38 |     id: 111111111111
 39 |     iam: account-data/demo_iam.json
 40 | ```
 41 | 
 42 | This assumes your CloudTrail logs are at `s3://my_log_bucket/my_prefix/AWSLogs/111111111111/CloudTrail/`
 43 | Set `my_prefix` to `''` if you have no prefix.
 44 | 
 45 | If your CloudTrail is managed through an organisation you can configure this in the `athena` section:
 46 | 
 47 | ```
 48 | athena:
 49 |   s3_bucket: my_log_bucket
 50 |   path: my_prefix
 51 |   org_id: o-myid123
 52 | ```
 53 | 
 54 | ### Step 4: Run CloudTracker
 55 | 
 56 | CloudTracker uses boto and assumes it has access to AWS credentials in environment variables, which can be done by using [aws-vault](https://github.com/99designs/aws-vault).
 57 | 
 58 | You will need the privilege `arn:aws:iam::aws:policy/AmazonAthenaFullAccess` and also `s3:GetObject` and `s3:ListBucket` for the S3 bucket containing the CloudTrail logs.
 59 | 
 60 | Once you're running in an aws-vault environment (or otherwise have your environment variables setup for an AWS session), you can run:
 61 | 
 62 | ```
 63 | cloudtracker --account demo --list users
 64 | ```
 65 | 
 66 | This will perform all of the initial setup which takes about a minute. Subsequent calls will be faster.
 67 | 
 68 | 
 69 | Clean-up
 70 | --------
 71 | 
 72 | CloudTracker does not currently clean up after itself, so query results are left behind in the default bucket `aws-athena-query-results-ACCOUNT_ID-REGION`.  
 73 | 
 74 | If you wanted to get rid of all signs of CloudTracker, remove the query results from that bucket and in Athena run `DROP DATABASE cloudtracker CASCADE`
 75 | 
 76 | 
 77 | Example usage
 78 | =============
 79 | 
 80 | Listing actors
 81 | --------------
 82 | CloudTracker provides command line options to list the users and roles in an account. For example:
 83 | ```
 84 | $ cloudtracker --account demo --list users --start 2018-01-01
 85 |   alice
 86 | - bob
 87 |   charlie
 88 | ```
 89 | 
 90 | In this example, a list of users was obtained from the the IAM information and then from CloudTrail logs it was found that the user "bob" has no record of being used since January 1, 2018, and therefore CloudTracker is advising the user's removal by prefixing the user with a "-".  
 91 | 
 92 | Note that not all AWS activities are stored in CloudTrail logs.  Specifically, data level events such as reading and writing S3 objects, putting CloudWatch metrics, and more.  Therefore, it is possible that "bob" has been active but only with actions that are not recorded in CloudTrail.  Note also that you may have users or roles that are inactive that you may still wish to keep around.  For example, you may have a role that is only used once a year during an annual task.  You should therefore use this output as guidance, but not always as instructions.
 93 | 
 94 | You can also list roles.
 95 | 
 96 | ```
 97 | $ cloudtracker --account demo --list roles --start 2018-01-01
 98 |   admin
 99 | ```
100 | 
101 | Listing actions of actors
102 | -------------------------
103 | The main purpose of CloudTracker is to look at the API calls made by actors (users and roles).  Let's assume `alice` has `SecurityAditor` privileges for her user which grants her the ability to `List` and `Describe` metadata for resources, plus the ability to `AsssumeRole` to the `admin` role.  We can see her actions:
104 | 
105 | ```
106 | cloudtracker --account demo --user alice
107 | ...
108 |   cloudwatch:describealarmhistory
109 |   cloudwatch:describealarms
110 | - cloudwatch:describealarmsformetric
111 | - cloudwatch:getdashboard
112 | ? cloudwatch:getmetricdata
113 | ...
114 | + s3:createbucket
115 | ...
116 | ```
117 | 
118 | A lot of actions will be shown, many that are unused, as there are over a thousand AWS APIs, and most people tend to only use a few. In the snippet above, we can see that she has called `DescribeAlarmHistory` and `DescribeAlarms`.  She has never called `DescribeAlarmsForMetric` or `GetDashboard` even though she has those privileges, and it is unknown if she has called `GetMetricData` as that call is not recorded in CloudTrail.  Then further down I notice there is a call to `CreateBucket` that she made, but does not have privileges for.  This can happen if the actor previously had privileges for an action and used them, but those privileges were taken away.  Errors are filtered out, so if the actor made a call but was denied, it would not show up as used.
119 | 
120 | As there may be a lot of unused or unknown actions, we can filter things down:
121 | ```
122 | cloudtracker --account demo --user alice --show-used
123 | Getting info on alice, user created 2017-09-02T18:02:14Z
124 |   cloudwatch:describealarmhistory
125 |   cloudwatch:describealarms
126 | + s3:createbucket
127 |   sts:assumerole
128 | ```
129 | 
130 | We can do the same thing for roles.  For example:
131 | ```
132 | cloudtracker --account demo --role admin --show-used
133 | Getting info for role admin
134 |   s3:createbucket
135 |   iam:createuser
136 | ```
137 | 
138 | ### Output explanation
139 | CloudTracker shows a diff of the privileges granted vs used.  The symbols mean the following:
140 | 
141 | - ` ` No symbol means this privilege is used, so leave it as is.
142 | - `-` A minus sign means the privilege was granted, but not used, so you should remove it.
143 | - `?` A question mark means the privilige was granted, but it is unknown if it was used because it is not recorded in CloudTrail.
144 | - `+` A plus sign means the privilege was not granted, but was used. The only way this is possible is if the privilege was previously granted, used, and then removed, so you may want to add that privilege back.
145 | 
146 | 
147 | Advanced functionality (only supported with ElasticSearch currently)
148 | ----------------------
149 | This functionality is not yet supported with the Athena configuration of CloudTracker.
150 | 
151 | You may know that `alice` can assume to the `admin` role, so let's look at what she did there using the `--destrole` argument:
152 | ```
153 | cloudtracker --account demo --user alice --destrole admin --show-used
154 | Getting info on alice, user created 2017-09-02T18:02:14Z
155 | Getting info for AssumeRole into admin
156 |   s3:createbucket
157 |   iam:createuser
158 | ```
159 | 
160 | You may also know that `charlie` can assume to the `admin` role, so let's look at what he did there:
161 | ```
162 | cloudtracker --account demo --user charlie --destrole admin --show-used
163 | Getting info on charlie, user created 2017-10-01T01:01:01Z
164 | Getting info for AssumeRole into admin
165 |   s3:createbucket
166 | ```
167 | 
168 | In this example we can see that `charlie` has only ever created an S3 bucket as `admin`, so we may want to remove `charlie` from being able to assume this role or create another role that does not have the ability to create IAM users which we saw `alice` use.  This is the key feature of CloudTracker as identifying which users are actually making use of the roles they can assume into, and the actions they are using there, is difficult without a tool like CloudTracker.
169 | 
170 | ### Working with multiple accounts
171 | 
172 | Amazon has advocated the use of multiple AWS accounts in much of their recent guidance.  This helps reduce the blast radius of incidents, among other benefits.  Once you start using multiple accounts though, you will find you may need to rethink how you are accessing all these accounts.  One way of working with multiple accounts will have users assuming roles into different accounts.  We can analyze the role assumptions of users into a different account the same way we did previously for a single account, except this time you need to ensure that you have CloudTrail logs from both accounts of interest are loaded into ElasticSearch.
173 | 
174 | 
175 | ```
176 | cloudtracker --account demo --user charlie --destaccount backup --destrole admin --show-used
177 | Getting info on charlie, user created 2017-10-01T01:01:01Z
178 | Getting info for AssumeRole into admin
179 |   s3:createbucket
180 | ```
181 | 
182 | In this example, we used the `--destaccount` option to specify the destination account.
183 | 
184 | 
185 | Data files
186 | ==========
187 | CloudTracker has two long text files that it uses to know what actions exist.
188 | 
189 | aws_actions.txt
190 | ---------------
191 | This file contains all possible AWS API calls that can be made.  One use of this is for identifying privileges granted by an IAM policy when a regex has been used, such as expanding `s3:*`.
192 | 
193 | This file was created by running:
194 | ```
195 | git clone --depth 1 -b master https://github.com/boto/botocore.git
196 | find botocore/botocore/data -name *.json | xargs cat | jq -r 'select(.operations != null) as $parent | .operations | keys | .[] | $parent.metadata.endpointPrefix +":"+.' | sort | uniq > aws_actions.txt
197 | ```
198 | 
199 | cloudtrail_supported_events.txt
200 | -------------------------------
201 | This file contains the AWS API calls that are recorded in CloudTrail logs.  This is used to identify when the status of a privilege is "unknown" (ie. not known whether it has been used or not).
202 | 
203 | This file was creating by copying aws_actions.txt and removing events manually based on the CloudTrail user guide (https://docs.aws.amazon.com/awscloudtrail/latest/userguide/awscloudtrail-ug.pdf) in the section "CloudTrail Supported Services" and following the links to the various services and reading through what is and isn't supported.
204 | 
205 | 


--------------------------------------------------------------------------------
/account-data/empty:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duo-labs/cloudtracker/822ef553266aca2b1d54fc44e09c230f6df77a8a/account-data/empty


--------------------------------------------------------------------------------
/bin/cloudtracker:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from cloudtracker.cli import main
3 | main()
4 | 


--------------------------------------------------------------------------------
/cloudtracker/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright 2018 Duo Security
  3 | 
  4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
  5 | following conditions are met:
  6 | 
  7 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following
  8 | disclaimer.
  9 | 
 10 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
 11 | following disclaimer in the documentation and/or other materials provided with the distribution.
 12 | 
 13 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
 14 | products derived from this software without specific prior written permission.
 15 | 
 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 17 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 21 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 22 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 23 | ---------------------------------------------------------------------------
 24 | """
 25 | __version__ = "2.1.5"
 26 | 
 27 | import json
 28 | import logging
 29 | import pkg_resources
 30 | import re
 31 | 
 32 | from colors import color
 33 | import jmespath
 34 | 
 35 | cloudtrail_supported_actions = None
 36 | 
 37 | logging.basicConfig(level=logging.INFO, format="%(levelname)-8s %(message)s")
 38 | 
 39 | # Translate CloudTrail name -> IAM name
 40 | # Pulled from: http://bit.ly/2txbx1L
 41 | # but some of the names there seem reversed
 42 | SERVICE_RENAMES = {
 43 |     "monitoring": "cloudwatch",
 44 |     "email": "ses",
 45 | }
 46 | 
 47 | # Translate IAM name -> Cloudtrail name (SOAP API name)
 48 | # Pulled from https://docs.aws.amazon.com/AmazonS3/latest/dev/cloudtrail-logging.html
 49 | # I think S3 is the only service where IAM names are different than the API calls.
 50 | EVENT_RENAMES = {
 51 |     "s3:listallmybuckets": "s3:listbuckets",
 52 |     "s3:getbucketaccesscontrolpolicy": "s3:getbucketacl",
 53 |     "s3:setbucketaccesscontrolpolicy": "s3:putbucketacl",
 54 |     "s3:getbucketloggingstatus": "s3:getbucketlogging",
 55 |     "s3:setbucketloggingstatus": "s3:putbucketlogging",
 56 | }
 57 | 
 58 | # List of actions seen in CloudTrail logs for which no IAM policies exist.
 59 | # These are allowed by default.
 60 | NO_IAM = {
 61 |     "sts:getcalleridentity": True,
 62 |     "sts:getsessiontoken": True,
 63 |     "signin:consolelogin": True,
 64 |     "signin:checkmfa": True,
 65 |     "signin:exitrole": True,
 66 |     "signin:renewrole": True,
 67 |     "signin:switchrole": True,
 68 | }
 69 | 
 70 | 
 71 | class Privileges(object):
 72 |     """Keep track of privileges an actor has been granted"""
 73 | 
 74 |     stmts = None
 75 |     roles = None
 76 |     aws_api_list = None
 77 | 
 78 |     def __init__(self, aws_api_list):
 79 |         self.stmts = []
 80 |         self.roles = []
 81 |         self.aws_api_list = aws_api_list
 82 | 
 83 |     def add_stmt(self, stmt):
 84 |         """Adds a statement from an IAM policy"""
 85 |         if "Action" not in stmt:
 86 |             # TODO Implement NotAction
 87 |             return
 88 |         self.stmts.append(stmt)
 89 | 
 90 |     def get_actions_from_statement(self, stmt):
 91 |         """Figures out what API calls have been granted from a statement"""
 92 |         actions = {}
 93 | 
 94 |         for action in make_list(stmt["Action"]):
 95 |             # Normalize it
 96 |             action = action.lower()
 97 |             # Convert it's globbing to a regex
 98 |             action = "^" + action.replace("*", ".*") + "$"
 99 | 
100 |             for possible_action in self.aws_api_list:
101 |                 for iam_name, cloudtrail_name in EVENT_RENAMES.items():
102 |                     if possible_action == cloudtrail_name:
103 |                         possible_action = iam_name
104 |                 if re.match(action, possible_action):
105 |                     actions[possible_action] = True
106 | 
107 |         return actions
108 | 
109 |     def determine_allowed(self):
110 |         """After statements have been added from IAM policiies, find all the allowed API calls"""
111 |         actions = {}
112 | 
113 |         # Look at alloweds first
114 |         for stmt in self.stmts:
115 |             if stmt["Effect"] == "Allow":
116 |                 stmt_actions = self.get_actions_from_statement(stmt)
117 |                 for action in stmt_actions:
118 |                     if action not in actions:
119 |                         actions[action] = [stmt]
120 |                     else:
121 |                         actions[action].append(stmt)
122 | 
123 |         # Look at denied
124 |         for stmt in self.stmts:
125 |             if (
126 |                 stmt["Effect"] == "Deny"
127 |                 and "*" in make_list(stmt.get("Resource", None))
128 |                 and stmt.get("Condition", None) is None
129 |             ):
130 | 
131 |                 stmt_actions = self.get_actions_from_statement(stmt)
132 |                 for action in stmt_actions:
133 |                     if action in actions:
134 |                         del actions[action]
135 | 
136 |         return list(actions)
137 | 
138 | 
139 | def make_list(obj):
140 |     """Convert an object to a list if it is not already"""
141 |     if isinstance(obj, list):
142 |         return obj
143 |     return [obj]
144 | 
145 | 
146 | def normalize_api_call(service, eventName):
147 |     """Translate API calls to a common representation"""
148 |     service = service.lower()
149 |     eventName = eventName.lower()
150 | 
151 |     # Remove the dates from event names, such as createdistribution2015_07_27
152 |     eventName = eventName.split("20")[0]
153 | 
154 |     # Rename the service
155 |     if service in SERVICE_RENAMES:
156 |         service = SERVICE_RENAMES[service]
157 | 
158 |     return "{}:{}".format(service, eventName)
159 | 
160 | 
161 | def get_account_iam(account):
162 |     """Given account data from the config file, open the IAM file for the account"""
163 |     return json.load(open(account["iam"]))
164 | 
165 | 
166 | def get_allowed_users(account_iam):
167 |     """Return all the users in an IAM file"""
168 |     return jmespath.search("UserDetailList[].UserName", account_iam)
169 | 
170 | 
171 | def get_allowed_roles(account_iam):
172 |     """Return all the roles in an IAM file"""
173 |     return jmespath.search("RoleDetailList[].RoleName", account_iam)
174 | 
175 | 
176 | def print_actor_diff(performed_actors, allowed_actors, use_color):
177 |     """
178 |     Given a list of actors that have performed actions, and a list that exist in the account,
179 |     print the actors and whether they are still active.
180 |     """
181 |     PERFORMED_AND_ALLOWED = 1
182 |     PERFORMED_BUT_NOT_ALLOWED = 2
183 |     ALLOWED_BUT_NOT_PERFORMED = 3
184 | 
185 |     actors = {}
186 |     for actor in performed_actors:
187 |         if actor in allowed_actors:
188 |             actors[actor] = PERFORMED_AND_ALLOWED
189 |         else:
190 |             actors[actor] = PERFORMED_BUT_NOT_ALLOWED
191 | 
192 |     for actor in allowed_actors:
193 |         if actor not in actors:
194 |             actors[actor] = ALLOWED_BUT_NOT_PERFORMED
195 | 
196 |     for actor in sorted(actors.keys()):
197 |         if actors[actor] == PERFORMED_AND_ALLOWED:
198 |             colored_print("  {}".format(actor), use_color, "white")
199 |         elif actors[actor] == PERFORMED_BUT_NOT_ALLOWED:
200 |             # Don't show users that existed but have since been deleted
201 |             continue
202 |         elif actors[actor] == ALLOWED_BUT_NOT_PERFORMED:
203 |             colored_print("- {}".format(actor), use_color, "red")
204 |         else:
205 |             raise Exception("Unknown constant")
206 | 
207 | 
208 | def get_user_iam(username, account_iam):
209 |     """Given the IAM of an account, and a username, return the IAM data for the user"""
210 |     user_iam = jmespath.search(
211 |         "UserDetailList[] | [?UserName == `{}`] | [0]".format(username), account_iam
212 |     )
213 |     if user_iam is None:
214 |         exit("ERROR: Unknown user named {}".format(username))
215 |     return user_iam
216 | 
217 | 
218 | def get_role_iam(rolename, account_iam):
219 |     """Given the IAM of an account, and a role name, return the IAM data for the role"""
220 |     role_iam = jmespath.search(
221 |         "RoleDetailList[] | [?RoleName == `{}`] | [0]".format(rolename), account_iam
222 |     )
223 |     if role_iam is None:
224 |         raise Exception("Unknown role named {}".format(rolename))
225 |     return role_iam
226 | 
227 | 
228 | def get_user_allowed_actions(aws_api_list, user_iam, account_iam):
229 |     """Return the privileges granted to a user by IAM"""
230 |     groups = user_iam["GroupList"]
231 |     managed_policies = user_iam["AttachedManagedPolicies"]
232 | 
233 |     privileges = Privileges(aws_api_list)
234 | 
235 |     # Get permissions from groups
236 |     for group in groups:
237 |         group_iam = jmespath.search(
238 |             "GroupDetailList[] | [?GroupName == `{}`] | [0]".format(group), account_iam
239 |         )
240 |         if group_iam is None:
241 |             continue
242 |         # Get privileges from managed policies attached to the group
243 |         for managed_policy in group_iam["AttachedManagedPolicies"]:
244 |             policy_filter = "Policies[?Arn == `{}`].PolicyVersionList[?IsDefaultVersion == true] | [0][0].Document"
245 |             policy = jmespath.search(
246 |                 policy_filter.format(managed_policy["PolicyArn"]), account_iam
247 |             )
248 |             if policy is None:
249 |                 continue
250 |             for stmt in make_list(policy["Statement"]):
251 |                 privileges.add_stmt(stmt)
252 | 
253 |         # Get privileges from in-line policies attached to the group
254 |         for inline_policy in group_iam["GroupPolicyList"]:
255 |             policy = inline_policy["PolicyDocument"]
256 |             for stmt in make_list(policy["Statement"]):
257 |                 privileges.add_stmt(stmt)
258 | 
259 |     # Get privileges from managed policies attached to the user
260 |     for managed_policy in managed_policies:
261 |         policy_filter = "Policies[?Arn == `{}`].PolicyVersionList[?IsDefaultVersion == true] | [0][0].Document"
262 |         policy = jmespath.search(
263 |             policy_filter.format(managed_policy["PolicyArn"]), account_iam
264 |         )
265 |         if policy is None:
266 |             continue
267 |         for stmt in make_list(policy["Statement"]):
268 |             privileges.add_stmt(stmt)
269 | 
270 |     # Get privileges from inline policies attached to the user
271 |     for stmt in (
272 |         jmespath.search("UserPolicyList[].PolicyDocument.Statement", user_iam) or []
273 |     ):
274 |         privileges.add_stmt(stmt)
275 | 
276 |     return privileges.determine_allowed()
277 | 
278 | 
279 | def get_role_allowed_actions(aws_api_list, role_iam, account_iam):
280 |     """Return the privileges granted to a role by IAM"""
281 |     privileges = Privileges(aws_api_list)
282 | 
283 |     # Get privileges from managed policies
284 |     for managed_policy in role_iam["AttachedManagedPolicies"]:
285 |         policy_filter = "Policies[?Arn == `{}`].PolicyVersionList[?IsDefaultVersion == true] | [0][0].Document"
286 |         policy = jmespath.search(
287 |             policy_filter.format(managed_policy["PolicyArn"]), account_iam
288 |         )
289 |         if policy is None:
290 |             continue
291 |         for stmt in make_list(policy["Statement"]):
292 |             privileges.add_stmt(stmt)
293 | 
294 |     # Get privileges from attached policies
295 |     for policy in role_iam["RolePolicyList"]:
296 |         for stmt in make_list(policy["PolicyDocument"]["Statement"]):
297 |             privileges.add_stmt(stmt)
298 | 
299 |     return privileges.determine_allowed()
300 | 
301 | 
302 | def is_recorded_by_cloudtrail(action):
303 |     """Given an action, return True if it would be logged by CloudTrail"""
304 |     if action in cloudtrail_supported_actions:
305 |         return True
306 |     return False
307 | 
308 | 
309 | def colored_print(text, use_color=True, color_name="white"):
310 |     """Print with or without color codes"""
311 |     if use_color:
312 |         print(color(text, fg=color_name))
313 |     else:
314 |         print(text)
315 | 
316 | 
317 | def print_diff(performed_actions, allowed_actions, printfilter, use_color):
318 |     """
319 |     For an actor, given the actions they performed, and the privileges they were granted,
320 |     print what they were allowed to do but did not, and other differences.
321 |     """
322 |     PERFORMED_AND_ALLOWED = 1
323 |     PERFORMED_BUT_NOT_ALLOWED = 2
324 |     ALLOWED_BUT_NOT_PERFORMED = 3
325 |     ALLOWED_BUT_NOT_KNOWN_IF_PERFORMED = 4
326 | 
327 |     actions = {}
328 | 
329 |     for action in performed_actions:
330 |         # Convert to IAM names
331 |         for iam_name, cloudtrail_name in EVENT_RENAMES.items():
332 |             if action == cloudtrail_name:
333 |                 action = iam_name
334 | 
335 |         # See if this was allowed or not
336 |         if action in allowed_actions:
337 |             actions[action] = PERFORMED_AND_ALLOWED
338 |         else:
339 |             if action in NO_IAM:
340 |                 # Ignore actions in cloudtrail such as sts:getcalleridentity that are allowed
341 |                 # whether or not they are in IAM
342 |                 continue
343 |             actions[action] = PERFORMED_BUT_NOT_ALLOWED
344 | 
345 |     # Find actions that were allowed, but there is no record of them being used
346 |     for action in allowed_actions:
347 |         if action not in actions:
348 |             if not is_recorded_by_cloudtrail(action):
349 |                 actions[action] = ALLOWED_BUT_NOT_KNOWN_IF_PERFORMED
350 |             else:
351 |                 actions[action] = ALLOWED_BUT_NOT_PERFORMED
352 | 
353 |     for action in sorted(actions.keys()):
354 |         # Convert CloudTrail name back to IAM name
355 |         display_name = action
356 | 
357 |         if not printfilter.get("show_benign", True):
358 |             # Ignore actions that won't exfil or modify resources
359 |             if ":list" in display_name or ":describe" in display_name:
360 |                 continue
361 | 
362 |         if actions[action] == PERFORMED_AND_ALLOWED:
363 |             colored_print("  {}".format(display_name), use_color, "white")
364 |         elif actions[action] == PERFORMED_BUT_NOT_ALLOWED:
365 |             colored_print("+ {}".format(display_name), use_color, "green")
366 |         elif actions[action] == ALLOWED_BUT_NOT_PERFORMED:
367 |             if printfilter.get("show_used", True):
368 |                 # Ignore this as it wasn't used
369 |                 continue
370 |             colored_print("- {}".format(display_name), use_color, "red")
371 |         elif actions[action] == ALLOWED_BUT_NOT_KNOWN_IF_PERFORMED:
372 |             if printfilter.get("show_used", True):
373 |                 # Ignore this as it wasn't used
374 |                 continue
375 |             if printfilter.get("show_unknown", True):
376 |                 colored_print("? {}".format(display_name), use_color, "yellow")
377 |         else:
378 |             raise Exception("Unknown constant")
379 | 
380 | 
381 | def get_account(accounts, account_name):
382 |     """
383 |     Gets the account struct from the config file, for the account name specified
384 | 
385 |     accounts: array of accounts from the config file
386 |     account_name: name to search for (or ID)
387 |     """
388 |     for account in accounts:
389 |         if account_name == account["name"] or account_name == str(account["id"]):
390 |             # Sanity check all values exist
391 |             if "name" not in account or "id" not in account or "iam" not in account:
392 |                 exit(
393 |                     "ERROR: Account {} does not specify an id or iam in the config file".format(
394 |                         account_name
395 |                     )
396 |                 )
397 | 
398 |             # Sanity check account ID
399 |             if not re.search("[0-9]{12}", str(account["id"])):
400 |                 exit("ERROR: {} is not a 12-digit account id".format(account["id"]))
401 | 
402 |             return account
403 |     exit("ERROR: Account name {} not found in config".format(account_name))
404 |     return None
405 | 
406 | 
407 | def read_aws_api_list(aws_api_list_file="aws_api_list.txt"):
408 |     """Read in the list of all known AWS API calls"""
409 |     api_list_path = pkg_resources.resource_filename(
410 |         __name__, "data/{}".format(aws_api_list_file)
411 |     )
412 |     aws_api_list = {}
413 |     with open(api_list_path) as f:
414 |         lines = f.readlines()
415 |     for line in lines:
416 |         service, event = line.rstrip().split(":")
417 |         aws_api_list[normalize_api_call(service, event)] = True
418 |     return aws_api_list
419 | 
420 | 
421 | def run(args, config, start, end):
422 |     """Perform the requested command"""
423 |     use_color = args.use_color
424 | 
425 |     account = get_account(config["accounts"], args.account)
426 | 
427 |     if "elasticsearch" in config:
428 |         try:
429 |             from cloudtracker.datasources.es import ElasticSearch
430 |         except ImportError:
431 |             exit(
432 |                 "Elasticsearch support not installed. Install with support via "
433 |                 "'pip install git+https://github.com/duo-labs/cloudtracker.git#egg=cloudtracker[es1]' for "
434 |                 "elasticsearch 1 support, or "
435 |                 "'pip install git+https://github.com/duo-labs/cloudtracker.git#egg=cloudtracker[es6]' for "
436 |                 "elasticsearch 6 support"
437 |             )
438 |         datasource = ElasticSearch(config["elasticsearch"], start, end)
439 |     else:
440 |         logging.debug("Using Athena")
441 |         from cloudtracker.datasources.athena import Athena
442 | 
443 |         datasource = Athena(config["athena"], account, start, end, args)
444 | 
445 |     # Read AWS actions
446 |     aws_api_list = read_aws_api_list()
447 | 
448 |     # Read cloudtrail_supported_events
449 |     global cloudtrail_supported_actions
450 |     ct_actions_path = pkg_resources.resource_filename(
451 |         __name__, "data/{}".format("cloudtrail_supported_actions.txt")
452 |     )
453 |     cloudtrail_supported_actions = {}
454 |     with open(ct_actions_path) as f:
455 |         lines = f.readlines()
456 |     for line in lines:
457 |         (service, event) = line.rstrip().split(":")
458 |         cloudtrail_supported_actions[normalize_api_call(service, event)] = True
459 | 
460 |     account_iam = get_account_iam(account)
461 | 
462 |     if args.list:
463 |         actor_type = args.list
464 | 
465 |         if actor_type == "users":
466 |             allowed_actors = get_allowed_users(account_iam)
467 |             performed_actors = datasource.get_performed_users()
468 |         elif actor_type == "roles":
469 |             allowed_actors = get_allowed_roles(account_iam)
470 |             performed_actors = datasource.get_performed_roles()
471 |         else:
472 |             exit("ERROR: --list argument must be one of 'users' or 'roles'")
473 | 
474 |         print_actor_diff(performed_actors, allowed_actors, use_color)
475 | 
476 |     else:
477 |         if args.destaccount:
478 |             destination_account = get_account(config["accounts"], args.destaccount)
479 |         else:
480 |             destination_account = account
481 | 
482 |         destination_iam = get_account_iam(destination_account)
483 | 
484 |         search_query = datasource.get_search_query()
485 | 
486 |         if args.user:
487 |             username = args.user
488 | 
489 |             user_iam = get_user_iam(username, account_iam)
490 |             print(
491 |                 "Getting info on {}, user created {}".format(
492 |                     args.user, user_iam["CreateDate"]
493 |                 )
494 |             )
495 | 
496 |             if args.destrole:
497 |                 dest_role_iam = get_role_iam(args.destrole, destination_iam)
498 |                 print("Getting info for AssumeRole into {}".format(args.destrole))
499 | 
500 |                 allowed_actions = get_role_allowed_actions(
501 |                     aws_api_list, dest_role_iam, destination_iam
502 |                 )
503 |                 performed_actions = datasource.get_performed_event_names_by_user_in_role(
504 |                     search_query, user_iam, dest_role_iam
505 |                 )
506 |             else:
507 |                 allowed_actions = get_user_allowed_actions(
508 |                     aws_api_list, user_iam, account_iam
509 |                 )
510 |                 performed_actions = datasource.get_performed_event_names_by_user(
511 |                     search_query, user_iam
512 |                 )
513 |         elif args.role:
514 |             rolename = args.role
515 |             role_iam = get_role_iam(rolename, account_iam)
516 |             print("Getting info for role {}".format(rolename))
517 | 
518 |             if args.destrole:
519 |                 dest_role_iam = get_role_iam(args.destrole, destination_iam)
520 |                 print("Getting info for AssumeRole into {}".format(args.destrole))
521 | 
522 |                 allowed_actions = get_role_allowed_actions(
523 |                     aws_api_list, dest_role_iam, destination_iam
524 |                 )
525 |                 performed_actions = datasource.get_performed_event_names_by_role_in_role(
526 |                     search_query, role_iam, dest_role_iam
527 |                 )
528 |             else:
529 |                 allowed_actions = get_role_allowed_actions(
530 |                     aws_api_list, role_iam, account_iam
531 |                 )
532 |                 performed_actions = datasource.get_performed_event_names_by_role(
533 |                     search_query, role_iam
534 |                 )
535 |         else:
536 |             exit("ERROR: Must specify a user or a role")
537 | 
538 |         printfilter = {}
539 |         printfilter["show_unknown"] = args.show_unknown
540 |         printfilter["show_benign"] = args.show_benign
541 |         printfilter["show_used"] = args.show_used
542 | 
543 |         print_diff(performed_actions, allowed_actions, printfilter, use_color)
544 | 


--------------------------------------------------------------------------------
/cloudtracker/cli.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Copyright 2018 Duo Security
  4 | 
  5 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
  6 | following conditions are met:
  7 | 
  8 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following
  9 | disclaimer.
 10 | 
 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
 12 | following disclaimer in the documentation and/or other materials provided with the distribution.
 13 | 
 14 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
 15 | products derived from this software without specific prior written permission.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 18 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 20 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 23 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | ---------------------------------------------------------------------------
 25 | """
 26 | 
 27 | import argparse
 28 | import datetime
 29 | 
 30 | import yaml
 31 | 
 32 | from . import run
 33 | 
 34 | 
 35 | def main():
 36 |     now = datetime.datetime.now()
 37 |     parser = argparse.ArgumentParser()
 38 | 
 39 |     # Add mutually exclusive arguments for --list, --user, and --role
 40 |     action_group = parser.add_mutually_exclusive_group(required=True)
 41 |     action_group.add_argument(
 42 |         "--list",
 43 |         help="List 'users' or 'roles' that have been active",
 44 |         choices=["users", "roles"],
 45 |     )
 46 |     action_group.add_argument("--user", help="User to investigate", type=str)
 47 |     action_group.add_argument("--role", help="Role to investigate", type=str)
 48 | 
 49 |     parser.add_argument(
 50 |         "--config",
 51 |         help="Config file name (default: config.yaml)",
 52 |         required=False,
 53 |         default="config.yaml",
 54 |         type=argparse.FileType("r"),
 55 |     )
 56 |     parser.add_argument(
 57 |         "--iam",
 58 |         dest="iam_file",
 59 |         help="IAM output from running `aws iam get-account-authorization-details`",
 60 |         required=False,
 61 |         default="./data/get-account-authorization-details.json",
 62 |         type=str,
 63 |     )
 64 |     parser.add_argument("--account", help="Account name", required=True, type=str)
 65 |     parser.add_argument(
 66 |         "--start",
 67 |         help="Start of date range (ex. 2018-01-21). Defaults to one year ago.",
 68 |         default=(now - datetime.timedelta(days=365)).date().isoformat(),
 69 |         required=False,
 70 |         type=str,
 71 |     )
 72 |     parser.add_argument(
 73 |         "--end",
 74 |         help="End of date range (ex. 2018-01-21). Defaults to today.",
 75 |         default=now.date().isoformat(),
 76 |         required=False,
 77 |         type=str,
 78 |     )
 79 |     parser.add_argument(
 80 |         "--destrole", help="Role assumed into", required=False, default=None, type=str
 81 |     )
 82 |     parser.add_argument(
 83 |         "--destaccount",
 84 |         help="Account assumed into (if different)",
 85 |         required=False,
 86 |         default=None,
 87 |         type=str,
 88 |     )
 89 |     parser.add_argument(
 90 |         "--show-used",
 91 |         dest="show_used",
 92 |         help="Only show privileges that were used",
 93 |         required=False,
 94 |         action="store_true",
 95 |     )
 96 |     parser.add_argument(
 97 |         "--ignore-benign",
 98 |         dest="show_benign",
 99 |         help="Don't show actions that aren't likely to be sensitive, "
100 |         "such as ones that won't exfil data or modify resources",
101 |         required=False,
102 |         action="store_false",
103 |     )
104 |     parser.add_argument(
105 |         "--ignore-unknown",
106 |         dest="show_unknown",
107 |         help="Don't show granted privileges that aren't recorded in CloudTrail, "
108 |         "as we don't know if they are used",
109 |         required=False,
110 |         action="store_false",
111 |     )
112 |     parser.add_argument(
113 |         "--no-color",
114 |         dest="use_color",
115 |         help="Don't use color codes in output",
116 |         required=False,
117 |         action="store_false",
118 |     )
119 |     parser.add_argument(
120 |         "--skip-setup",
121 |         dest="skip_setup",
122 |         help="For Athena, don't create or test for the tables",
123 |         required=False,
124 |         action="store_true",
125 |         default=False,
126 |     )
127 | 
128 |     args = parser.parse_args()
129 | 
130 |     # Read config
131 |     try:
132 |         config = yaml.load(args.config)
133 |     except yaml.YAMLError as e:
134 |         raise argparse.ArgumentError(
135 |             None,
136 |             "ERROR: Could not load yaml from config file {}\n{}".format(
137 |                 args.config.name, e
138 |             ),
139 |         )
140 | 
141 |     run(args, config, args.start, args.end)
142 | 


--------------------------------------------------------------------------------
/cloudtracker/datasources/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duo-labs/cloudtracker/822ef553266aca2b1d54fc44e09c230f6df77a8a/cloudtracker/datasources/__init__.py


--------------------------------------------------------------------------------
/cloudtracker/datasources/athena.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright 2018 Summit Route
  3 | 
  4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
  5 | following conditions are met:
  6 | 
  7 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following
  8 | disclaimer.
  9 | 
 10 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
 11 | following disclaimer in the documentation and/or other materials provided with the distribution.
 12 | 
 13 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
 14 | products derived from this software without specific prior written permission.
 15 | 
 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 17 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 21 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 22 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 23 | ---------------------------------------------------------------------------
 24 | """
 25 | 
 26 | import logging
 27 | import boto3
 28 | import time
 29 | import json
 30 | import datetime
 31 | from dateutil.relativedelta import relativedelta
 32 | 
 33 | from cloudtracker import normalize_api_call
 34 | 
 35 | # Much thanks to Alex Smolen (https://twitter.com/alsmola)
 36 | # for his post "Partitioning CloudTrail Logs in Athena"
 37 | # https://medium.com/@alsmola/partitioning-cloudtrail-logs-in-athena-29add93ee070
 38 | 
 39 | # TODO Delete result objects from S3
 40 | # TODO Add ability to skip setup
 41 | # TODO Add teardown to remove all the athena tables, partitions, and views
 42 | 
 43 | 
 44 | NUM_MONTHS_FOR_PARTITIONS = 12
 45 | 
 46 | 
 47 | class Athena(object):
 48 |     athena = None
 49 |     s3 = None
 50 |     database = "cloudtracker"
 51 |     output_bucket = "aws-athena-query-results-ACCOUNT_ID-REGION"
 52 |     search_filter = ""
 53 |     table_name = ""
 54 |     workgroup = 'primary'
 55 | 
 56 |     def query_athena(
 57 |         self, query, context={"Database": database}, do_not_wait=False, skip_header=True
 58 |     ):
 59 |         logging.debug("Making query {}".format(query))
 60 | 
 61 |         # Make query request dependent on whether the context is None or not
 62 |         if context is None:
 63 |             response = self.athena.start_query_execution(
 64 |                 QueryString=query,
 65 |                 ResultConfiguration={"OutputLocation": self.output_bucket},
 66 |                 WorkGroup=self.workgroup
 67 |             )
 68 |         else:
 69 |             response = self.athena.start_query_execution(
 70 |                 QueryString=query,
 71 |                 QueryExecutionContext=context,
 72 |                 ResultConfiguration={"OutputLocation": self.output_bucket},
 73 |                 WorkGroup=self.workgroup
 74 |             )
 75 | 
 76 |         if do_not_wait:
 77 |             return response["QueryExecutionId"]
 78 | 
 79 |         self.wait_for_query_to_complete(response["QueryExecutionId"])
 80 | 
 81 |         # Paginate results and combine them
 82 |         rows = []
 83 |         paginator = self.athena.get_paginator("get_query_results")
 84 |         response_iterator = paginator.paginate(
 85 |             QueryExecutionId=response["QueryExecutionId"]
 86 |         )
 87 |         row_count = 0
 88 |         for response in response_iterator:
 89 |             for row in response["ResultSet"]["Rows"]:
 90 |                 row_count += 1
 91 |                 if row_count == 1:
 92 |                     if skip_header:
 93 |                         # Skip header
 94 |                         continue
 95 |                 rows.append(self.extract_response_values(row))
 96 |         return rows
 97 | 
 98 |     def extract_response_values(self, row):
 99 |         result = []
100 |         for column in row["Data"]:
101 |             result.append(column.get("VarCharValue", ""))
102 |         return result
103 | 
104 |     def wait_for_query_to_complete(self, queryExecutionId):
105 |         """
106 |         Returns when the query completes successfully, or raises an exception if it fails or is canceled.
107 |         Waits until the query finishes running.
108 |         """
109 | 
110 |         while True:
111 |             response = self.athena.get_query_execution(
112 |                 QueryExecutionId=queryExecutionId
113 |             )
114 |             state = response["QueryExecution"]["Status"]["State"]
115 |             if state == "SUCCEEDED":
116 |                 return True
117 |             if state == "FAILED" or state == "CANCELLED":
118 |                 raise Exception(
119 |                     "Query entered state {state} with reason {reason}".format(
120 |                         state=state,
121 |                         reason=response["QueryExecution"]["Status"][
122 |                             "StateChangeReason"
123 |                         ],
124 |                     )
125 |                 )
126 |             logging.debug(
127 |                 "Sleeping 1 second while query {} completes".format(queryExecutionId)
128 |             )
129 |             time.sleep(1)
130 | 
131 |     def wait_for_query_batch_to_complete(self, queryExecutionIds):
132 |         """
133 |         Returns when the query completes successfully, or raises an exception if it fails or is canceled.
134 |         Waits until the query finishes running.
135 |         """
136 | 
137 |         while len(queryExecutionIds) > 0:
138 |             response = self.athena.batch_get_query_execution(
139 |                 QueryExecutionIds=list(queryExecutionIds)
140 |             )
141 |             for query_execution in response["QueryExecutions"]:
142 |                 state = query_execution["Status"]["State"]
143 |                 if state == "SUCCEEDED":
144 |                     queryExecutionIds.remove(query_execution["QueryExecutionId"])
145 |                 if state == "FAILED" or state == "CANCELLED":
146 |                     raise Exception(
147 |                         "Query entered state {state} with reason {reason}".format(
148 |                             state=state,
149 |                             reason=response["QueryExecution"]["Status"][
150 |                                 "StateChangeReason"
151 |                             ],
152 |                         )
153 |                     )
154 | 
155 |                 if len(queryExecutionIds) == 0:
156 |                     return
157 |                 logging.debug(
158 |                     "Sleeping 1 second while {} queries complete".format(
159 |                         len(queryExecutionIds)
160 |                     )
161 |                 )
162 |                 time.sleep(1)
163 | 
164 |     def __init__(self, config, account, start, end, args):
165 |         # Mute boto except errors
166 |         logging.getLogger("botocore").setLevel(logging.WARN)
167 |         logging.info(
168 |             "Source of CloudTrail logs: s3://{bucket}/{path}".format(
169 |                 bucket=config["s3_bucket"], path=config["path"]
170 |             )
171 |         )
172 | 
173 |         # Check start date is not older than a year, as we only create partitions for that far back
174 |         if (
175 |             datetime.datetime.now() - datetime.datetime.strptime(start, "%Y-%m-%d")
176 |         ).days > 365:
177 |             raise Exception(
178 |                 "Start date is over a year old. CloudTracker does not create or use partitions over a year old."
179 |             )
180 | 
181 |         #
182 |         # Create date filtering
183 |         #
184 |         month_restrictions = set()
185 |         start = start.split("-")
186 |         end = end.split("-")
187 | 
188 |         if start[0] == end[0]:
189 |             for month in range(int(start[1]), int(end[1]) + 1):
190 |                 month_restrictions.add(
191 |                     "(year = '{:0>2}' and month = '{:0>2}')".format(start[0], month)
192 |                 )
193 |         else:
194 |             # Add restrictions for months in start year
195 |             for month in range(int(start[1]), 12 + 1):
196 |                 month_restrictions.add(
197 |                     "(year = '{:0>2}' and month = '{:0>2}')".format(start[0], month)
198 |                 )
199 |             # Add restrictions for months in middle years
200 |             for year in range(int(start[0]), int(end[0])):
201 |                 for month in (1, 12 + 1):
202 |                     month_restrictions.add(
203 |                         "(year = '{:0>2}' and month = '{:0>2}')".format(year, month)
204 |                     )
205 |             # Add restrictions for months in final year
206 |             for month in range(1, int(end[1]) + 1):
207 |                 month_restrictions.add(
208 |                     "(year = '{:0>2}' and month = '{:0>2}')".format(end[0], month)
209 |                 )
210 | 
211 |         # Combine date filters and add error filter
212 |         self.search_filter = (
213 |             "((" + " or ".join(month_restrictions) + ") and errorcode IS NULL)"
214 |         )
215 | 
216 |         self.table_name = "cloudtrail_logs_{}".format(account["id"])
217 | 
218 |         #
219 |         # Display the AWS identity (doubles as a check that boto creds are setup)
220 |         #
221 |         sts = boto3.client("sts")
222 |         identity = sts.get_caller_identity()
223 |         logging.info("Using AWS identity: {}".format(identity["Arn"]))
224 |         current_account_id = identity["Account"]
225 |         region = boto3.session.Session().region_name
226 | 
227 |         if "output_s3_bucket" in config:
228 |             self.output_bucket = config["output_s3_bucket"]
229 |         else:
230 |             self.output_bucket = "s3://aws-athena-query-results-{}-{}".format(
231 |                 current_account_id, region
232 |             )
233 |         logging.info("Using output bucket: {}".format(self.output_bucket))
234 | 
235 |         if "workgroup" in config:
236 |             self.workgroup = config["workgroup"]
237 |         logging.info("Using workgroup: {}".format(self.workgroup))
238 | 
239 |         if not config.get('org_id'):
240 |             cloudtrail_log_path = "s3://{bucket}/{path}/AWSLogs/{account_id}/CloudTrail".format(
241 |                 bucket=config["s3_bucket"], path=config["path"], account_id=account["id"]
242 |             )
243 |         else:
244 |             cloudtrail_log_path = "s3://{bucket}/{path}/AWSLogs/{org_id}/{account_id}/CloudTrail".format(
245 |                 bucket=config["s3_bucket"], path=config["path"], org_id=config["org_id"], account_id=account["id"]
246 |             )
247 | 
248 |         logging.info("Account cloudtrail log path: {}".format(cloudtrail_log_path))
249 | 
250 |         # Open connections to needed AWS services
251 |         self.athena = boto3.client("athena")
252 |         self.s3 = boto3.client("s3")
253 | 
254 |         if args.skip_setup:
255 |             logging.info("Skipping initial table creation")
256 |             return
257 | 
258 |         # Check we can access the S3 bucket
259 |         resp = self.s3.list_objects_v2(
260 |             Bucket=config["s3_bucket"], Prefix=config["path"], MaxKeys=1
261 |         )
262 |         if "Contents" not in resp or len(resp["Contents"]) == 0:
263 |             exit(
264 |                 "ERROR: S3 bucket has no contents.  Ensure you have logs at s3://{bucket}/{path}".format(
265 |                     bucket=config["s3_bucket"], path=config["path"]
266 |                 )
267 |             )
268 | 
269 |         # Ensure our database exists
270 |         self.query_athena(
271 |             "CREATE DATABASE IF NOT EXISTS {db} {comment}".format(
272 |                 db=self.database, comment="COMMENT 'Created by CloudTracker'"
273 |             ),
274 |             context=None,
275 |         )
276 | 
277 |         #
278 |         # Set up table
279 |         #
280 |         query = """CREATE EXTERNAL TABLE IF NOT EXISTS `{table_name}` (
281 |             `eventversion` string COMMENT 'from deserializer', 
282 |             `useridentity` struct<type:string,principalid:string,arn:string,accountid:string,invokedby:string,accesskeyid:string,username:string,sessioncontext:struct<attributes:struct<mfaauthenticated:string,creationdate:string>,sessionissuer:struct<type:string,principalid:string,arn:string,accountid:string,username:string>>> COMMENT 'from deserializer', 
283 |             `eventtime` string COMMENT 'from deserializer', 
284 |             `eventsource` string COMMENT 'from deserializer', 
285 |             `eventname` string COMMENT 'from deserializer', 
286 |             `awsregion` string COMMENT 'from deserializer', 
287 |             `sourceipaddress` string COMMENT 'from deserializer', 
288 |             `useragent` string COMMENT 'from deserializer', 
289 |             `errorcode` string COMMENT 'from deserializer', 
290 |             `errormessage` string COMMENT 'from deserializer', 
291 |             `requestparameters` string COMMENT 'from deserializer', 
292 |             `responseelements` string COMMENT 'from deserializer', 
293 |             `additionaleventdata` string COMMENT 'from deserializer', 
294 |             `requestid` string COMMENT 'from deserializer', 
295 |             `eventid` string COMMENT 'from deserializer', 
296 |             `resources` array<struct<arn:string,accountid:string,type:string>> COMMENT 'from deserializer', 
297 |             `eventtype` string COMMENT 'from deserializer', 
298 |             `apiversion` string COMMENT 'from deserializer', 
299 |             `readonly` string COMMENT 'from deserializer', 
300 |             `recipientaccountid` string COMMENT 'from deserializer', 
301 |             `serviceeventdetails` string COMMENT 'from deserializer', 
302 |             `sharedeventid` string COMMENT 'from deserializer', 
303 |             `vpcendpointid` string COMMENT 'from deserializer')
304 |             PARTITIONED BY (region string, year string, month string)
305 |             ROW FORMAT SERDE 
306 |             'com.amazon.emr.hive.serde.CloudTrailSerde' 
307 |             STORED AS INPUTFORMAT 
308 |             'com.amazon.emr.cloudtrail.CloudTrailInputFormat' 
309 |             OUTPUTFORMAT 
310 |             'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
311 |             LOCATION '{cloudtrail_log_path}'""".format(
312 |             table_name=self.table_name, cloudtrail_log_path=cloudtrail_log_path
313 |         )
314 |         self.query_athena(query)
315 | 
316 |         #
317 |         # Create partitions
318 |         #
319 | 
320 |         logging.info(
321 |             "Checking if all partitions for the past {} months exist".format(
322 |                 NUM_MONTHS_FOR_PARTITIONS
323 |             )
324 |         )
325 | 
326 |         # Get list of current partitions
327 |         query = "SHOW PARTITIONS {table_name}".format(table_name=self.table_name)
328 |         partition_list = self.query_athena(query, skip_header=False)
329 | 
330 |         partition_set = set()
331 |         for partition in partition_list:
332 |             partition_set.add(partition[0])
333 | 
334 |         # Get region list. Using ec2 here just because it exists in all regions.
335 |         regions = boto3.session.Session().get_available_regions("ec2")
336 | 
337 |         queries_to_make = set()
338 | 
339 |         # Iterate over every month for the past year and build queries to run to create partitions
340 |         for num_months_ago in range(0, NUM_MONTHS_FOR_PARTITIONS):
341 |             date_of_interest = datetime.datetime.now() - relativedelta(
342 |                 months=num_months_ago
343 |             )
344 |             year = date_of_interest.year
345 |             month = "{:0>2}".format(date_of_interest.month)
346 | 
347 |             query = ""
348 | 
349 |             for region in regions:
350 |                 if (
351 |                     "region={region}/year={year}/month={month}".format(
352 |                         region=region, year=year, month=month
353 |                     )
354 |                     in partition_set
355 |                 ):
356 |                     continue
357 | 
358 |                 query += "PARTITION (region='{region}',year='{year}',month='{month}') location '{cloudtrail_log_path}/{region}/{year}/{month}/'\n".format(
359 |                     region=region,
360 |                     year=year,
361 |                     month=month,
362 |                     cloudtrail_log_path=cloudtrail_log_path,
363 |                 )
364 |             if query != "":
365 |                 queries_to_make.add(
366 |                     "ALTER TABLE {table_name} ADD ".format(table_name=self.table_name)
367 |                     + query
368 |                 )
369 | 
370 |         # Run the queries
371 |         query_count = len(queries_to_make)
372 |         for query in queries_to_make:
373 |             logging.info("Partition groups remaining to create: {}".format(query_count))
374 |             self.query_athena(query)
375 |             query_count -= 1
376 | 
377 |     def get_performed_users(self):
378 |         """
379 |         Returns the users that performed actions within the search filters
380 |         """
381 |         query = "select distinct userIdentity.userName from {table_name} where {search_filter}".format(
382 |             table_name=self.table_name, search_filter=self.search_filter
383 |         )
384 |         response = self.query_athena(query)
385 | 
386 |         user_names = {}
387 |         for row in response:
388 |             user_name = row[0]
389 |             if user_name == "HIDDEN_DUE_TO_SECURITY_REASONS":
390 |                 # This happens when a user logs in with the wrong username
391 |                 continue
392 |             user_names[user_name] = True
393 |         return user_names
394 | 
395 |     def get_performed_roles(self):
396 |         """
397 |         Returns the roles that performed actions within the search filters
398 |         """
399 |         query = "select distinct userIdentity.sessionContext.sessionIssuer.userName from {table_name} where {search_filter}".format(
400 |             table_name=self.table_name, search_filter=self.search_filter
401 |         )
402 |         response = self.query_athena(query)
403 | 
404 |         role_names = {}
405 |         for row in response:
406 |             role = row[0]
407 |             role_names[role] = True
408 |         return role_names
409 | 
410 |     def get_search_query(self):
411 |         # Athena doesn't use this call, but needs to support it being called
412 |         return None
413 | 
414 |     def get_events_from_search(self, searchresults):
415 |         """
416 |         Given the results of a query for events, return these in a more usable fashion
417 |         """
418 |         event_names = {}
419 | 
420 |         for event in searchresults:
421 |             event = event[0]
422 |             # event is now a string like "{field0=s3.amazonaws.com, field1=GetBucketAcl}"
423 |             # I parse out the field manually
424 |             # TODO Find a smarter way to parse this data
425 | 
426 |             # Remove the '{' and '}'
427 |             event = event[1 : len(event) - 1]
428 | 
429 |             # Split into 'field0=s3.amazonaws.com' and 'field1=GetBucketAcl'
430 |             event = event.split(", ")
431 |             # Get the eventsource 's3.amazonaws.com'
432 |             service = event[0].split("=")[1]
433 |             # Get the service 's3'
434 |             service = service.split(".")[0]
435 | 
436 |             # Get the eventname 'GetBucketAcl'
437 |             eventname = event[1].split("=")[1]
438 | 
439 |             event_names[normalize_api_call(service, eventname)] = True
440 | 
441 |         return event_names
442 | 
443 |     def get_performed_event_names_by_user(self, _, user_iam):
444 |         """For a user, return all performed events"""
445 | 
446 |         query = "select distinct (eventsource, eventname) from {table_name} where (userIdentity.arn = '{identity}') and {search_filter}".format(
447 |             table_name=self.table_name,
448 |             identity=user_iam["Arn"],
449 |             search_filter=self.search_filter,
450 |         )
451 |         response = self.query_athena(query)
452 | 
453 |         return self.get_events_from_search(response)
454 | 
455 |     def get_performed_event_names_by_role(self, _, role_iam):
456 |         """For a role, return all performed events"""
457 | 
458 |         query = "select distinct (eventsource, eventname) from {table_name} where (userIdentity.sessionContext.sessionIssuer.arn = '{identity}') and {search_filter}".format(
459 |             table_name=self.table_name,
460 |             identity=role_iam["Arn"],
461 |             search_filter=self.search_filter,
462 |         )
463 |         response = self.query_athena(query)
464 | 
465 |         return self.get_events_from_search(response)
466 | 
467 |     def get_performed_event_names_by_user_in_role(
468 |         self, searchquery, user_iam, role_iam
469 |     ):
470 |         """For a user that has assumed into another role, return all performed events"""
471 |         raise Exception("Not implemented")
472 |         sessionquery = (
473 |             searchquery.query(self.get_query_match("eventName", "AssumeRole"))
474 |             .query(self.get_query_match("userIdentity.arn", user_iam["Arn"]))
475 |             .query(self.get_query_match("requestParameters.roleArn", role_iam["Arn"]))
476 |         )
477 | 
478 |         event_names = {}
479 |         for roleAssumption in sessionquery.scan():
480 |             sessionKey = roleAssumption.responseElements.credentials.accessKeyId
481 |             # I assume the session key is unique enough to use for identifying role assumptions
482 |             # TODO: I should also be using sharedEventID as explained in:
483 |             # https://aws.amazon.com/blogs/security/aws-cloudtrail-now-tracks-cross-account-activity-to-its-origin/
484 |             # I could also use the timings of these events.
485 |             innerquery = searchquery.query(
486 |                 self.get_query_match("userIdentity.accessKeyId", sessionKey)
487 |             ).query(
488 |                 self.get_query_match(
489 |                     "userIdentity.sessionContext.sessionIssuer.arn", role_iam["Arn"]
490 |                 )
491 |             )
492 | 
493 |             event_names.update(self.get_events_from_search(innerquery))
494 | 
495 |         return event_names
496 | 
497 |     def get_performed_event_names_by_role_in_role(
498 |         self, searchquery, role_iam, dest_role_iam
499 |     ):
500 |         """For a role that has assumed into another role, return all performed events"""
501 |         raise Exception("Not implemented")
502 |         sessionquery = (
503 |             searchquery.query(self.get_query_match("eventName", "AssumeRole"))
504 |             .query(
505 |                 self.get_query_match(
506 |                     "userIdentity.sessionContext.sessionIssuer.arn", role_iam["Arn"]
507 |                 )
508 |             )
509 |             .query(
510 |                 self.get_query_match("requestParameters.roleArn", dest_role_iam["Arn"])
511 |             )
512 |         )
513 | 
514 |         # TODO I should get a count of the number of role assumptions, since this can be millions
515 | 
516 |         event_names = {}
517 |         count = 0
518 |         for roleAssumption in sessionquery.scan():
519 |             count += 1
520 |             if count % 1000 == 0:
521 |                 # This is just info level information, for cases where many role assumptions have happened
522 |                 # I should advise the user to just look at the final role, especially for cases where the same role
523 |                 # is continuously assuming into another role and that is the only thing assuming into it.
524 |                 print("{} role assumptions scanned so far...".format(count))
525 |             sessionKey = roleAssumption.responseElements.credentials.accessKeyId
526 |             innerquery = searchquery.query(
527 |                 self.get_query_match("userIdentity.accessKeyId", sessionKey)
528 |             ).query(
529 |                 self.get_query_match(
530 |                     "userIdentity.sessionContext.sessionIssuer.arn",
531 |                     dest_role_iam["Arn"],
532 |                 )
533 |             )
534 | 
535 |             event_names.update(self.get_events_from_search(innerquery))
536 | 
537 |         return event_names
538 | 


--------------------------------------------------------------------------------
/cloudtracker/datasources/es.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright 2018 Duo Security
  3 | 
  4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
  5 | following conditions are met:
  6 | 
  7 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following
  8 | disclaimer.
  9 | 
 10 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
 11 | following disclaimer in the documentation and/or other materials provided with the distribution.
 12 | 
 13 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
 14 | products derived from this software without specific prior written permission.
 15 | 
 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 17 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 21 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 22 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 23 | ---------------------------------------------------------------------------
 24 | """
 25 | 
 26 | from elasticsearch import Elasticsearch
 27 | from elasticsearch_dsl import Search, Q
 28 | from cloudtracker import normalize_api_call
 29 | 
 30 | 
 31 | class ElasticSearch(object):
 32 |     es = None
 33 |     index = "cloudtrail"
 34 |     key_prefix = ""
 35 | 
 36 |     # Create search filters
 37 |     searchfilter = None
 38 | 
 39 |     def __init__(self, config, start, end):
 40 |         # Open connection to ElasticSearch
 41 |         self.es = Elasticsearch([config], timeout=900)
 42 |         self.searchfilter = {}
 43 |         self.index = config.get("index", "cloudtrail")
 44 |         self.key_prefix = config.get("key_prefix", "")
 45 |         if self.key_prefix != "":
 46 |             self.key_prefix += "."
 47 |         self.timestamp_field = config.get("timestamp_field", "eventTime")
 48 | 
 49 |         # Used to make elasticsearch query language semantics dynamically based on version
 50 |         self.es_version = int(self.es.info()["version"]["number"].split(".")[0])
 51 | 
 52 |         # Filter errors
 53 |         # https://www.elastic.co/guide/en/elasticsearch/reference/2.0/breaking_20_query_dsl_changes.html
 54 |         # http://www.dlxedu.com/askdetail/3/0620e1124992fb281da93c7efe53b97f.html
 55 |         if self.es_version < 2:
 56 |             error_filter = {"exists": {"field": self.get_field_name("errorCode")}}
 57 |             self.searchfilter["filter_errors"] = ~Q("filtered", filter=error_filter)
 58 |         else:
 59 |             self.searchfilter["filter_errors"] = ~Q(
 60 |                 "exists", field=self.get_field_name("errorCode")
 61 |             )
 62 | 
 63 |         # Filter dates
 64 |         if start:
 65 |             self.searchfilter["start_date_filter"] = Q(
 66 |                 "range", **{self.timestamp_field: {"gte": start}}
 67 |             )
 68 |         if end:
 69 |             self.searchfilter["end_date_filter"] = Q(
 70 |                 "range", **{self.timestamp_field: {"lte": end}}
 71 |             )
 72 | 
 73 |     def get_field_name(self, field):
 74 |         return self.key_prefix + field + self.get_field_suffix()
 75 | 
 76 |     def get_field_suffix(self):
 77 |         # The .keyword and .raw suffix only apply to indices whose names match logstash-*
 78 |         # https://discuss.elastic.co/t/no-raw-field/49342/4
 79 |         # However, based on our suggested mapping, our fields should have a .keyword suffix
 80 | 
 81 |         # https://www.elastic.co/guide/en/logstash/5.0/breaking-changes.html
 82 |         if self.es_version < 5:
 83 |             return ".raw"
 84 |         else:
 85 |             return ".keyword"
 86 | 
 87 |     def get_query_match(self, field, value):
 88 |         field = self.get_field_name(field)
 89 |         return {"match": {field: value}}
 90 | 
 91 |     def get_performed_users(self):
 92 |         """
 93 |         Returns the users that performed actions within the search filters
 94 |         """
 95 |         search = Search(using=self.es, index=self.index)
 96 |         for query in self.searchfilter.values():
 97 |             search = search.query(query)
 98 | 
 99 |         search.aggs.bucket(
100 |             "user_names",
101 |             "terms",
102 |             field=self.get_field_name("userIdentity.userName"),
103 |             size=5000,
104 |         )
105 |         response = search.execute()
106 | 
107 |         user_names = {}
108 |         for user in response.aggregations.user_names.buckets:
109 |             if user.key == "HIDDEN_DUE_TO_SECURITY_REASONS":
110 |                 # This happens when a user logs in with the wrong username
111 |                 continue
112 |             user_names[user.key] = True
113 |         return user_names
114 | 
115 |     def get_performed_roles(self):
116 |         """
117 |         Returns the roles that performed actions within the search filters
118 |         """
119 |         search = Search(using=self.es, index=self.index)
120 |         for query in self.searchfilter.values():
121 |             search = search.query(query)
122 | 
123 |         userName_field = self.get_field_name(
124 |             "userIdentity.sessionContext.sessionIssuer.userName"
125 |         )
126 |         search.aggs.bucket("role_names", "terms", field=userName_field, size=5000)
127 |         response = search.execute()
128 | 
129 |         role_names = {}
130 |         for role in response.aggregations.role_names.buckets:
131 |             role_names[role.key] = True
132 |         return role_names
133 | 
134 |     def get_search_query(self):
135 |         """
136 |         Opens a connection to ElasticSearch and applies the initial filters
137 |         """
138 |         search = Search(using=self.es, index=self.index)
139 |         for query in self.searchfilter.values():
140 |             search = search.query(query)
141 | 
142 |         return search
143 | 
144 |     def get_events_from_search(self, searchquery):
145 |         """
146 |         Given a started elasticsearch query, apply the remaining search filters, and
147 |         return the API calls that exist for this query.
148 |         s: search query
149 |         """
150 |         searchquery.aggs.bucket(
151 |             "event_names", "terms", field=self.get_field_name("eventName"), size=5000
152 |         ).bucket(
153 |             "service_names",
154 |             "terms",
155 |             field=self.get_field_name("eventSource"),
156 |             size=5000,
157 |         )
158 |         response = searchquery.execute()
159 | 
160 |         event_names = {}
161 | 
162 |         for event in response.aggregations.event_names.buckets:
163 |             service = event.service_names.buckets[0].key
164 |             service = service.split(".")[0]
165 | 
166 |             event_names[normalize_api_call(service, event.key)] = True
167 | 
168 |         return event_names
169 | 
170 |     def get_performed_event_names_by_user(self, searchquery, user_iam):
171 |         """For a user, return all performed events"""
172 |         searchquery = searchquery.query(
173 |             self.get_query_match("userIdentity.arn", user_iam["Arn"])
174 |         )
175 |         return self.get_events_from_search(searchquery)
176 | 
177 |     def get_performed_event_names_by_role(self, searchquery, role_iam):
178 |         """For a role, return all performed events"""
179 |         field = "userIdentity.sessionContext.sessionIssuer.arn"
180 |         searchquery = searchquery.query(self.get_query_match(field, role_iam["Arn"]))
181 |         return self.get_events_from_search(searchquery)
182 | 
183 |     def get_performed_event_names_by_user_in_role(
184 |         self, searchquery, user_iam, role_iam
185 |     ):
186 |         """For a user that has assumed into another role, return all performed events"""
187 |         sessionquery = (
188 |             searchquery.query(self.get_query_match("eventName", "AssumeRole"))
189 |             .query(self.get_query_match("userIdentity.arn", user_iam["Arn"]))
190 |             .query(self.get_query_match("requestParameters.roleArn", role_iam["Arn"]))
191 |         )
192 | 
193 |         event_names = {}
194 |         for roleAssumption in sessionquery.scan():
195 |             sessionKey = roleAssumption.responseElements.credentials.accessKeyId
196 |             # I assume the session key is unique enough to use for identifying role assumptions
197 |             # TODO: I should also be using sharedEventID as explained in:
198 |             # https://aws.amazon.com/blogs/security/aws-cloudtrail-now-tracks-cross-account-activity-to-its-origin/
199 |             # I could also use the timings of these events.
200 |             innerquery = searchquery.query(
201 |                 self.get_query_match("userIdentity.accessKeyId", sessionKey)
202 |             ).query(
203 |                 self.get_query_match(
204 |                     "userIdentity.sessionContext.sessionIssuer.arn", role_iam["Arn"]
205 |                 )
206 |             )
207 | 
208 |             event_names.update(self.get_events_from_search(innerquery))
209 | 
210 |         return event_names
211 | 
212 |     def get_performed_event_names_by_role_in_role(
213 |         self, searchquery, role_iam, dest_role_iam
214 |     ):
215 |         """For a role that has assumed into another role, return all performed events"""
216 |         sessionquery = (
217 |             searchquery.query(self.get_query_match("eventName", "AssumeRole"))
218 |             .query(
219 |                 self.get_query_match(
220 |                     "userIdentity.sessionContext.sessionIssuer.arn", role_iam["Arn"]
221 |                 )
222 |             )
223 |             .query(
224 |                 self.get_query_match("requestParameters.roleArn", dest_role_iam["Arn"])
225 |             )
226 |         )
227 | 
228 |         # TODO I should get a count of the number of role assumptions, since this can be millions
229 | 
230 |         event_names = {}
231 |         count = 0
232 |         for roleAssumption in sessionquery.scan():
233 |             count += 1
234 |             if count % 1000 == 0:
235 |                 # This is just info level information, for cases where many role assumptions have happened
236 |                 # I should advise the user to just look at the final role, especially for cases where the same role
237 |                 # is continuously assuming into another role and that is the only thing assuming into it.
238 |                 print("{} role assumptions scanned so far...".format(count))
239 |             sessionKey = roleAssumption.responseElements.credentials.accessKeyId
240 |             innerquery = searchquery.query(
241 |                 self.get_query_match("userIdentity.accessKeyId", sessionKey)
242 |             ).query(
243 |                 self.get_query_match(
244 |                     "userIdentity.sessionContext.sessionIssuer.arn",
245 |                     dest_role_iam["Arn"],
246 |                 )
247 |             )
248 | 
249 |             event_names.update(self.get_events_from_search(innerquery))
250 | 
251 |         return event_names
252 | 


--------------------------------------------------------------------------------
/config.yaml.demo:
--------------------------------------------------------------------------------
 1 | # Config
 2 | elasticsearch:
 3 |   host: localhost
 4 |   port: 9200
 5 |   index: "cloudtrail"
 6 |   key_prefix: ""
 7 |   timestamp_field: "eventTime"
 8 | accounts:
 9 |   - name: demo
10 |     id: 111111111111
11 |     iam: account-data/demo_iam.json
12 |   - name: demo2
13 |     id: 222222222222
14 |     iam: account-data/demo2_iam.json 
15 | 


--------------------------------------------------------------------------------
/docs/elasticsearch.md:
--------------------------------------------------------------------------------
  1 | This document describes how to use CloudTracker with ElasticSearch.
  2 | 
  3 | Requirements
  4 | ============
  5 | * CloudTrail logs must be loaded into ElasticSearch.  For instructions on setting up ElasticSearch and ingesting an archive of CloudTrail logs into it see below.
  6 |   * ElasticSearch 6.x is supported, but there are reports of ElasticSearch 1.x being used successfully.
  7 | 
  8 | Installation
  9 | ============
 10 | 
 11 | ### Step 1
 12 | Install the Python libraries using one of the provided Makefile targets:
 13 | 
 14 | For elasticsearch v6.x:
 15 | ```
 16 | python3 -m venv ./venv && source venv/bin/activate
 17 | pip install git+https://github.com/duo-labs/cloudtracker.git#egg=cloudtracker[es6]
 18 | ```
 19 | 
 20 | For older versions, such as elasticsearch v1.x:
 21 | ```
 22 | python3 -m venv ./venv && source venv/bin/activate
 23 | pip install git+https://github.com/duo-labs/cloudtracker.git#egg=cloudtracker[es1]
 24 | ```
 25 | 
 26 | ### Step 2
 27 | Get the IAM data of the account
 28 | 
 29 | ```
 30 | aws iam get-account-authorization-details > account-data/demo_iam.json
 31 | ```
 32 | 
 33 | ### Step 3
 34 | Edit the `config.yaml`.  You need to specify how to connect to the ElasticSearch cluster, what index the CloudTrail logs are stored in, and information about your AWS account, including the location of the IAM file created in Step 3.
 35 | 
 36 | Example `config.yaml` file:
 37 | ```
 38 | elasticsearch:
 39 |   host: localhost
 40 |   port: 9200
 41 |   index: "cloudtrail"
 42 |   key_prefix: ""
 43 |   timestamp_field: "eventTime"
 44 | accounts:
 45 |   - name: demo
 46 |     id: 123456789012
 47 |     iam: account-data/demo_iam.json
 48 | ```
 49 | 
 50 | The ElasticSearch configuration section works the same as what is available to the ElasticSearch python library documented here: http://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch
 51 | 
 52 | Additionally, you can configure:
 53 | 
 54 | - `index`: The index you loaded your files at.
 55 | - `key_prefix`: Any prefix you have to your CloudTrail records.  For example, if your `eventName` is queryable via `my_cloudtrail_data.eventName`, then the `key_prefix` would be `my_cloudtrail_data`.
 56 | 
 57 | 
 58 | 
 59 | Install ElasticSearch
 60 | =====================
 61 | 
 62 | You can use an AWS managed ElasticSearch cluster or one that you manage, including one running locally on a VM on your laptop.  However, if your logs exceed a few dozen GBs, or over 100M records, you'll likely run into issues running locally.  You'll also want to install Kibana to look at the loaded logs.
 63 | 
 64 | Configure the ElasticSearch mapping
 65 | -----------------------------------
 66 | Using Kibana and clicking on "Dev Tools" you can send commands to ElasticSearch. You can also do this using `curl`.  Run the following to setup a `cloudtrail` index and increase it's total fields to 5000.  If you don't increase that limit, records will be silently dropped.
 67 | 
 68 | 
 69 | The commands to send
 70 | ```
 71 | PUT /cloudtrail
 72 | {
 73 |     "mappings": {
 74 |       "doc": {
 75 |         "properties": {
 76 |           "@timestamp": {
 77 |             "type": "date"
 78 |           },
 79 |           "@version": {
 80 |             "type": "text",
 81 |             "fields": {
 82 |               "keyword": {
 83 |                 "type": "keyword",
 84 |                 "ignore_above": 256
 85 |               }
 86 |             }
 87 |           },
 88 |           "host": {
 89 |             "type": "text",
 90 |             "fields": {
 91 |               "keyword": {
 92 |                 "type": "keyword",
 93 |                 "ignore_above": 256
 94 |               }
 95 |             }
 96 |           },
 97 |           "message": {
 98 |             "properties": {
 99 |               "additionalEventData": {
100 |                 "properties": {
101 |                   "Note": {
102 |                     "type": "text",
103 |                     "fields": {
104 |                       "keyword": {
105 |                         "type": "keyword",
106 |                         "ignore_above": 256
107 |                       }
108 |                     }
109 |                   }
110 |                 }
111 |               },
112 |               "apiVersion": {
113 |                 "type": "text"
114 |               }
115 |             }
116 |           }
117 |         }
118 |       }
119 |     }
120 | }
121 | ```
122 | 
123 | You can save the above file as `cloudtrail_mapping.json` and then send it to your ElasticSearch cluster using:
124 | 
125 | ```
126 | curl -X PUT https://YOUR_ES_SERVER/cloudtrail -T cloudtrail_mapping.json  -H "Content-Type: application/json"
127 | ```
128 | 
129 | 
130 | Do the same for:
131 | ```
132 | PUT /cloudtrail/_settings
133 | {
134 |   "index.mapping.total_fields.limit": 5000
135 | }
136 | ```
137 | 
138 | You can save that to a file named `cloudtrail_settings.json` and then run:
139 | ```
140 | curl -X PUT https://YOUR_ES_SERVER/cloudtrail/_settings -T cloudtrail_settings.json  -H "Content-Type: application/json"
141 | ```
142 | 
143 | 
144 | Ingest CloudTrail logs into ElasticSearch using Hindsight
145 | =========================================================
146 | 
147 | Copy your CloudTrail logs locally and convert them to a single flat file.
148 | 
149 | ```
150 | # Replace YOUR_BUCKET and YOUR_ACCOUNT_ID in the following command
151 | aws s3 sync s3://YOUR_BUCKET/AWSLogs/YOUR_ACCOUNT_ID/CloudTrail/ .
152 | find . -name "*.json.gz" -exec gunzip -c {} \; | jq -cr '.Records[] | del(.responseElements.endpoint)' >> ../cloudtrail.json
153 | ```
154 | 
155 | I'm deleting `.responseElements.endpoint` because different API calls return an object or a string for that value and ElasticSearch can't handle mixed types, so I just ignore that value since it is of little use.
156 | 
157 | 
158 | Install Hindsight
159 | -----------------
160 | Hindsight is hard to install as it has a number of dependencies.  The project is at https://github.com/mozilla-services/hindsight
161 | 
162 | Here are some notes, but you'll still probably run into trouble.  Help in improving the installation of those projects would be good.
163 | 
164 | For the dependencies:
165 | ```
166 | sudo yum install -y libcurl-devel autoconf automake libtool cmake
167 | 
168 | git clone https://github.com/mozilla-services/lua_sandbox.git
169 | cd lua_sandbox
170 | mkdir release
171 | cd release
172 | 
173 | cmake -DCMAKE_BUILD_TYPE=release ..
174 | make
175 | sudo make install
176 | 
177 | cd ../..
178 | 
179 | git clone https://github.com/mozilla-services/lua_sandbox_extensions.git
180 | cd lua_sandbox_extensions
181 | mkdir release
182 | cd release
183 | # Disable a bunch of extensions when we build this to avoid further dependencies
184 | cmake -DCMAKE_BUILD_TYPE=release -DEXT_aws=off -DEXT_kafka=off -DEXT_parquet=off -DEXT_jose=off -DEXT_postgres=off -DEXT_systemd=off -DEXT_snappy=off -DCPACK_GENERATOR=RPM ..
185 | make
186 | make packages
187 | sudo make install
188 | # In my experience I needed to manually install files, or copy or link them, as you should have files named
189 | # `rjson.so` and `ltn12.lua` at `/usr/local/lib/luasandbox/io_modules/`.
190 | ```
191 | 
192 | Now install Hindsight from https://github.com/mozilla-services/hindsight
193 | 
194 | 
195 | Run a proxy
196 | -----------
197 | This may not be needed, but it's helpful, especially when using an AWS managed ElasticSearch cluster.
198 | 
199 | ```
200 | var http = require('http'),
201 |     httpProxy = require('http-proxy');
202 | 
203 | var proxy = httpProxy.createProxyServer({});
204 | 
205 | proxy.on('proxyReq', function(proxyReq, req, res, options) {
206 |   console.log("> Proxying: ", req.url);
207 |   proxyReq.setHeader('content-type', 'application/json');
208 | });
209 | 
210 | proxy.on('proxyRes', function (proxyRes, req, res) {
211 |   console.log("< ", proxyRes.statusCode);
212 | });
213 | 
214 | var server = http.createServer(function(req, res) {
215 |   proxy.web(req, res, {
216 |     target: 'https://MY_ES_INSTANCE.us-west-2.es.amazonaws.com', secure: false
217 |   });
218 | });
219 | 
220 | console.log("listening on port 9201")
221 | server.listen(9201);
222 | ```
223 | 
224 | Here you can see I am ignoring any cert errors when making the TLS connection, so you'll need to decide if that is acceptable for your use case.
225 | 
226 | Run this with:
227 | ```
228 | node proxy.js
229 | ```
230 | 
231 | 
232 | Configure Hindsight
233 | -------------------
234 | This repo includes a `hindsight/run` directory. Copy the `run` directory to your hindsight repo.
235 | 
236 | Replace `YOUR_FILE` in `run/input/file.cfg` with the full path to your `cloudtrail.json` file.
237 | 
238 | Replace `127.0.0.1` and the port `9200` in `run/output/elasticsearch_bulk_api.cfg` if you are not running ElasticSearch on your localhost.
239 | 
240 | 
241 | Run hindsight
242 | -------------
243 | To run hindsight use:
244 | 
245 | ```
246 | hindsight_cli hindsight.cfg
247 | ```
248 | 
249 | You will also want to run `rm -rf output/*` in between runs to clear out the cached files.
250 | You may need to modify `hindsight.cfg` to tell it the `io_lua_path` and other paths are in `/usr/local/lib/` not `/usr/lib/`
251 | 
252 | 


--------------------------------------------------------------------------------
/hindsight/run/analysis/counter.cfg:
--------------------------------------------------------------------------------
1 | filename        = "counter.lua"
2 | message_matcher = "TRUE"
3 | ticker_interval = 5
4 | 


--------------------------------------------------------------------------------
/hindsight/run/analysis/counter.lua:
--------------------------------------------------------------------------------
 1 | require "string"
 2 | msgcount = 0
 3 | 
 4 | function process_message()
 5 |     msgcount = msgcount + 1
 6 |     return 0
 7 | end
 8 | 
 9 | function timer_event()
10 |     inject_payload("txt", "count", string.format("%d message analysed", msgcount))
11 | end
12 | 


--------------------------------------------------------------------------------
/hindsight/run/input/file.cfg:
--------------------------------------------------------------------------------
 1 | filename = "file.lua"
 2 | 
 3 | -- Name of the input file (nil for stdin)
 4 | -- Default:
 5 | input_filename = "YOUR_FILE"
 6 | 
 7 | -- Heka message table containing the default header values to use, if they are
 8 | -- not populated by the decoder. If 'Fields' is specified it should be in the
 9 | -- hashed based format see:  http://mozilla-services.github.io/lua_sandbox/heka/message.html
10 | -- Default:
11 | -- default_headers = nil
12 | 
13 | -- Specifies a module that will decode the raw data and inject the resulting message.
14 | -- Default:
15 | -- decoder_module = "decoders.heka.json"
16 | 
17 | -- Boolean, if true, any decode failure will inject a  message of Type "error",
18 | -- with the Payload containing the error.
19 | -- Default:
20 | -- send_decode_failures = false
21 | 


--------------------------------------------------------------------------------
/hindsight/run/input/file.lua:
--------------------------------------------------------------------------------
 1 | -- This Source Code Form is subject to the terms of the Mozilla Public
 2 | -- License, v. 2.0. If a copy of the MPL was not distributed with this
 3 | -- file, You can obtain one at http://mozilla.org/MPL/2.0/.
 4 | 
 5 | --[[
 6 | # Single File Input (new line delimited)
 7 | todo: when more than line splitting is needed the file should be read in chunks
 8 | and passed to a generic splitter buffer with a token/match specification and a
 9 | find function similar to the Heka stream reader.
10 | ## Sample Configuration
11 | ```lua
12 | filename = "file.lua"
13 | -- Name of the input file (nil for stdin)
14 | -- Default:
15 | -- input_filename = nil
16 | -- Heka message table containing the default header values to use, if they are
17 | -- not populated by the decoder. If 'Fields' is specified it should be in the
18 | -- hashed based format see:  http://mozilla-services.github.io/lua_sandbox/heka/message.html
19 | -- Default:
20 | -- default_headers = nil
21 | -- Specifies a module that will decode the raw data and inject the resulting message.
22 | -- Default:
23 | -- decoder_module = "decoders.payload"
24 | -- Boolean, if true, any decode failure will inject a  message of Type "error",
25 | -- with the Payload containing the error.
26 | -- Default:
27 | -- send_decode_failures = false
28 | ```
29 | --]]
30 | require "io"
31 | require "string"
32 | 
33 | local input_filename  = read_config("input_filename")
34 | local default_headers = read_config("default_headers")
35 | assert(default_headers == nil or type(default_headers) == "table", "invalid default_headers cfg")
36 | 
37 | local decoder_module  = read_config("decoder_module") or "decoders.payload"
38 | local decode          = require(decoder_module).decode
39 | if not decode then
40 |     error(decoder_module .. " does not provide a decode function")
41 | end
42 | local send_decode_failures  = read_config("send_decode_failures")
43 | 
44 | local err_msg = {
45 |     Type    = "error",
46 |     Payload = nil,
47 | }
48 | 
49 | function process_message(checkpoint)
50 |     local fh = io.stdin
51 |     if input_filename then
52 |         fh = assert(io.open(input_filename, "rb")) -- closed on plugin shutdown
53 |         if checkpoint then 
54 |             fh:seek("set", checkpoint)
55 |         else
56 |             checkpoint = 0
57 |         end
58 |     end
59 | 
60 |     local cnt = 0
61 |     for data in fh:lines() do
62 |         local ok, err = pcall(decode, data, default_headers)
63 |         if (not ok or err) and send_decode_failures then
64 |             err_msg.Payload = err
65 |             pcall(inject_message, err_msg)
66 |         end
67 | 
68 |         if input_filename then
69 |             checkpoint = checkpoint + #data + 1
70 |             inject_message(nil, checkpoint)
71 |         end
72 |         cnt = cnt + 1
73 |     end
74 |     return 0, string.format("processed %d lines", cnt)
75 | end
76 | 


--------------------------------------------------------------------------------
/hindsight/run/input/json.lua:
--------------------------------------------------------------------------------
 1 | -- This Source Code Form is subject to the terms of the Mozilla Public
 2 | -- License, v. 2.0. If a copy of the MPL was not distributed with this
 3 | -- file, You can obtain one at http://mozilla.org/MPL/2.0/.
 4 | 
 5 | --[[
 6 | # Heka JSON Message Decoder Module
 7 | https://wiki.mozilla.org/Firefox/Services/Logging
 8 | 
 9 | The above link describes the Heka message format with a JSON schema. The JSON
10 | will be decoded and passed directly to inject_message so it needs to decode into
11 | a Heka message table described here:
12 | https://mozilla-services.github.io/lua_sandbox/heka/message.html
13 | 
14 | ## Decoder Configuration Table
15 | * none
16 | 
17 | ## Functions
18 | 
19 | ### decode
20 | 
21 | Decode and inject the resulting message
22 | 
23 | *Arguments*
24 | - data (string) - JSON message with a Heka schema
25 | 
26 | *Return*
27 | - nil - throws an error on an invalid data type, JSON parse error,
28 |   inject_message failure etc.
29 | 
30 | --]]
31 | 
32 | -- Imports
33 | local cjson = require "cjson"
34 | 
35 | local inject_message = inject_message
36 | 
37 | local M = {}
38 | setfenv(1, M) -- Remove external access to contain everything in the module
39 | 
40 | function decode(data)
41 |     inject_message(cjson.decode(data))
42 | end
43 | 
44 | return M
45 | 


--------------------------------------------------------------------------------
/hindsight/run/output/elasticsearch_bulk_api.cfg:
--------------------------------------------------------------------------------
 1 | filename        = "elasticsearch_bulk_api.lua"
 2 | message_matcher = "TRUE"
 3 | ticker_interval = 10 -- flush every 10 seconds or flush_count (50000) messages
 4 | memory_limit    = 200e6
 5 | 
 6 | address             = "127.0.0.1"
 7 | port                = 9200
 8 | timeout             = 10    -- socket timeout
 9 | flush_count         = 500 -- 50000
10 | flush_on_shutdown   = true
11 | preserve_data       = false -- there is no state maintained in this plugin
12 | max_retry           = 1     -- number of seconds (retries once per second)
13 | discard_on_error    = false -- discard the batch after max_retry + 1 failed attempts to send the batch
14 | abort_on_error      = false -- stop this plugin after max_retry + 1 failed attempts to send the batch
15 | -- when setting abort_on_error = true, consider also settings shutdown_on_terminate or remove_checkpoints_on_terminate
16 | 
17 | -- See the elasticsearch module directory for the various encoders and configuration documentation.
18 | encoder_module  = "encoders.elasticsearch.payload"
19 | encoders_elasticsearch_common    = {
20 |     es_index_from_timestamp = true,
21 |     index                   = "cloudtrail",
22 |     type_name               = "doc",
23 | }
24 | 


--------------------------------------------------------------------------------
/hindsight/run/output/elasticsearch_bulk_api.lua:
--------------------------------------------------------------------------------
  1 | -- This Source Code Form is subject to the terms of the Mozilla Public
  2 | -- License, v. 2.0. If a copy of the MPL was not distributed with this
  3 | -- file, You can obtain one at http://mozilla.org/MPL/2.0/.
  4 | 
  5 | --[[
  6 | # Elasticsearch Bulk API Output
  7 | 
  8 | ## Sample Configuration
  9 | ```lua
 10 | filename        = "elasticsearch_bulk_api.lua"
 11 | message_matcher = "Type == 'nginx'"
 12 | ticker_interval = 10 -- flush every 10 seconds or flush_count (50000) messages
 13 | memory_limit    = 200e6
 14 | 
 15 | address             = "127.0.0.1"
 16 | port                = 9200
 17 | timeout             = 10    -- socket timeout
 18 | flush_count         = 50000
 19 | flush_on_shutdown   = false
 20 | preserve_data       = false -- there is no state maintained in this plugin
 21 | max_retry           = 0     -- number of seconds (retries once per second)
 22 | discard_on_error    = false -- discard the batch after max_retry + 1 failed attempts to send the batch
 23 | abort_on_error      = false -- stop this plugin after max_retry + 1 failed attempts to send the batch
 24 | -- when setting abort_on_error = true, consider also settings shutdown_on_terminate or remove_checkpoints_on_terminate
 25 | 
 26 | -- See the elasticsearch module directory for the various encoders and configuration documentation.
 27 | encoder_module  = "encoders.elasticsearch.payload"
 28 | encoders_elasticsearch_common    = {
 29 |     es_index_from_timestamp = true,
 30 |     index                   = "%{Logger}-%{%Y.%m.%d}",
 31 |     type_name               = "%{Type}-%{Hostname}",
 32 | }
 33 | ```
 34 | --]]
 35 | 
 36 | require "table"
 37 | require "rjson"
 38 | require "string"
 39 | local ltn12         = require "ltn12"
 40 | local time          = require "os".time
 41 | local socket        = require "socket"
 42 | local http          = require("socket.http")
 43 | local address       = read_config("address") or "127.0.0.1"
 44 | local port          = read_config("port") or 9200
 45 | local timeout       = read_config("timeout") or 10
 46 | local discard       = read_config("discard_on_error")
 47 | local abort         = read_config("abort_on_error")
 48 | local max_retry     = read_config("max_retry") or 0
 49 | assert(not (abort and discard), "abort_on_error and discard_on_error are mutually exclusive")
 50 | 
 51 | local encoder_module = read_config("encoder_module") or "encoders.elasticsearch.payload"
 52 | local encode = require(encoder_module).encode
 53 | if not encode then
 54 |     error(encoder_module .. " does not provide an encode function")
 55 | end
 56 | 
 57 | local batch_file        = string.format("%s/%s.batch", read_config("output_path"), read_config("Logger"))
 58 | local flush_on_shutdown = read_config("flush_on_shutdown")
 59 | local ticker_interval   = read_config("ticker_interval")
 60 | local flush_count       = read_config("flush_count") or 50000
 61 | assert(flush_count > 0, "flush_count must be greater than zero")
 62 | 
 63 | local client
 64 | local function create_client()
 65 |     local client = http.open(address, port)
 66 |     client.c:setoption("tcp-nodelay", true)
 67 |     client.c:setoption("keepalive", true)
 68 |     client.c:settimeout(timeout)
 69 |     return client
 70 | end
 71 | local pcreate_client = socket.protect(create_client);
 72 | 
 73 | 
 74 | local req_headers = {
 75 |     ["user-agent"]      = http.USERAGENT,
 76 |     ["content-type"]    = "application/x-ndjson",
 77 |     ["content-length"]  = 0,
 78 |     ["host"]            = address .. ":" .. port,
 79 |     ["accept"]          = "application/json",
 80 |     ["connection"]      = "keep-alive",
 81 | }
 82 | 
 83 | local function send_request() -- hand coded since socket.http doesn't support keep-alive connections
 84 |     if not client then client, err = pcreate_client() end
 85 |     if err then print(err); return false; end
 86 | 
 87 |     local success = true
 88 |     local fh = assert(io.open(batch_file, "r"))
 89 |     req_headers["content-length"] = fh:seek("end")
 90 |     client:sendrequestline("POST", "/_bulk")
 91 |     client:sendheaders(req_headers)
 92 |     fh:seek("set")
 93 |     client:sendbody(req_headers, ltn12.source.file(fh, "invalid file handle"))
 94 |     local code = client:receivestatusline()
 95 |     local headers
 96 |     while code == 100 do -- ignore any 100-continue messages
 97 |         headers = client:receiveheaders()
 98 |         code = client:receivestatusline()
 99 |     end
100 |     headers = client:receiveheaders()
101 |     if code ~= 204 and code ~= 304 and not (code >= 100 and code < 200) then
102 |         if code == 200 and string.match(headers["content-type"], "^application/json") then
103 |             local body = {}
104 |             local sink = ltn12.sink.table(body)
105 |             client:receivebody(headers, sink)
106 |             local response = table.concat(body)
107 |             local ok, doc = pcall(rjson.parse, response)
108 |             if ok then
109 |                 if doc:value(doc:find("errors")) then
110 |                     print(string.format("ElasticSearch server reported errors processing the submission, not all messages were indexed"))
111 |                     -- todo track partial batch failure counts https://github.com/mozilla-services/lua_sandbox_extensions/issues/89
112 |                     -- the partial failure is most likely due to bad input, so no retry is attempted as it would just fail again
113 |                 end
114 |             else
115 |                 print(string.format("HTTP response didn't contain valid JSON. err: %s", doc))
116 |             end
117 |         else
118 |             client:receivebody(headers, ltn12.sink.null())
119 |         end
120 | 
121 |         if code > 304 then
122 |             success = false
123 |             print(string.format("HTTP response error. Status: %d", code))
124 |         end
125 |     end
126 | 
127 |     if headers.connection == "close" then
128 |         client:close()
129 |         client = nil
130 |     end
131 | 
132 |     return success
133 | end
134 | local psend_request = socket.protect(function() return send_request() end)
135 | 
136 | 
137 | local send_on_start = false
138 | local last_flush    = time()
139 | local batch_count   = 0
140 | local retry_count   = 0
141 | local batch = assert(io.open(batch_file, "a+"))
142 | for _ in io.lines(batch_file) do  -- ensure we have a correct count when resuming after an abort
143 |     batch_count = batch_count + 1
144 | end
145 | batch_count = batch_count / 2
146 | if batch_count >= flush_count then
147 |     send_on_start = true
148 | end
149 | 
150 | local function finalize_batch()
151 |     last_flush  = time()
152 |     batch_count = 0
153 |     retry_count = 0
154 |     batch:close()
155 |     batch = assert(io.open(batch_file, "w"))
156 | end
157 | 
158 | local function send_batch()
159 |     batch:flush()
160 |     local ok, err = psend_request()
161 |     if not ok then
162 |         if err then print(err) end
163 |         client = nil
164 |         retry_count = retry_count + 1
165 |         if discard and retry_count > max_retry then
166 |             print(string.format("discarded %d messages", batch_count))
167 |             finalize_batch()
168 |             return true
169 |         elseif abort and retry_count > max_retry then
170 |             error(string.format("Abort sending %d messages after %d attempts", batch_count, retry_count))
171 |         end
172 |         return false
173 |     end
174 |     finalize_batch()
175 |     return true
176 | end
177 | 
178 | 
179 | function process_message()
180 |     if batch_count >= flush_count then -- attempt to transmit a failed batch before accepting new data
181 |         if not send_batch() then
182 |             return -3 -- retry until successful or it errors out
183 |         end
184 |         if not send_on_start then
185 |             return 0 -- break the retry loop and allow new data to start flowing again
186 |         end
187 |         send_on_start = false
188 |     end
189 | 
190 |     local ok, data = pcall(encode)
191 |     if not ok then return -1, data end
192 |     if not data then return -2 end
193 |     batch:write(data)
194 |     batch_count = batch_count + 1
195 | 
196 |     if batch_count >= flush_count then
197 |         send_batch()
198 |     end
199 |     return 0
200 | end
201 | 
202 | 
203 | function timer_event(ns, shutdown)
204 |     local timedout = (ns / 1e9 - last_flush) >= ticker_interval
205 |     if (timedout or (shutdown and flush_on_shutdown)) and batch_count > 0 then
206 |         send_batch()
207 |     end
208 | end
209 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | astroid==2.4.2
 2 | autoflake==1.3.1
 3 | autopep8==1.5.4
 4 | coverage==5.2.1
 5 | invoke==1.4.1
 6 | isort==4.3.21
 7 | lazy-object-proxy==1.4.3
 8 | mccabe==0.6.1
 9 | nose==1.3.7
10 | pycodestyle==2.6.0
11 | pyflakes==2.2.0
12 | pylint==2.5.3
13 | six==1.15.0
14 | toml==0.10.1
15 | wrapt==1.12.1
16 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | ansicolors==1.1.8
 2 | boto3==1.5.32
 3 | botocore==1.12.97
 4 | docutils==0.16
 5 | jmespath==0.9.3
 6 | python-dateutil==2.8.1
 7 | PyYAML==5.4
 8 | s3transfer==0.1.13
 9 | six==1.15.0
10 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [nosetests]
 2 | with-coverage=1
 3 | cover-erase=1
 4 | cover-package=cloudtracker
 5 | cover-html=1
 6 | cover-html-dir=htmlcov
 7 | tests = tests/unit
 8 | 
 9 | [aliases]
10 | test=nosetests
11 | 
12 | # Exclude: __pycache__ / .pyc
13 | [coverage:run]
14 | omit =
15 |     # omit anything in a .local directory anywhere
16 |     */.local/*
17 |     utils/*
18 |     */virtualenv/*
19 |     */venv/*
20 |     */.venv/*


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """Setup script for cloudtracker"""
 2 | import os
 3 | import re
 4 | 
 5 | from setuptools import find_packages, setup
 6 | 
 7 | 
 8 | HERE = os.path.dirname(__file__)
 9 | VERSION_RE = re.compile(r"""__version__ = ['"]([0-9.]+)['"]""")
10 | TESTS_REQUIRE = ["coverage", "nose"]
11 | 
12 | 
13 | def get_version():
14 |     init = open(os.path.join(HERE, "cloudtracker", "__init__.py")).read()
15 |     return VERSION_RE.search(init).group(1)
16 | 
17 | 
18 | def get_description():
19 |     return open(
20 |         os.path.join(os.path.abspath(HERE), "README.md"), encoding="utf-8"
21 |     ).read()
22 | 
23 | 
24 | setup(
25 |     name="cloudtracker",
26 |     version=get_version(),
27 |     author="Duo Security",
28 |     description=(
29 |         "CloudTracker helps you find over-privileged IAM users and "
30 |         "roles by comparing CloudTrail logs with current IAM policies"
31 |     ),
32 |     long_description=get_description(),
33 |     long_description_content_type="text/markdown",
34 |     url="https://github.com/duo-labs/cloudtracker",
35 |     entry_points={"console_scripts": "cloudtracker=cloudtracker.cli:main"},
36 |     test_suite="tests/unit",
37 |     tests_require=TESTS_REQUIRE,
38 |     extras_require={
39 |         "dev": TESTS_REQUIRE + ["autoflake", "autopep8", "pylint", "invoke"],
40 |         "es1": ["elasticsearch==1.9.0", "elasticsearch_dsl==0.0.11"],
41 |         "es6": ["elasticsearch==6.1.1", "elasticsearch_dsl==6.1.0"],
42 |     },
43 |     install_requires=[
44 |         "ansicolors==1.1.8",
45 |         "boto3==1.5.32",
46 |         "jmespath==0.9.3",
47 |         "pyyaml==4.2b4",
48 |     ],
49 |     setup_requires=["nose"],
50 |     packages=find_packages(exclude=["tests*"]),
51 |     package_data={"cloudtracker": ["data/*.txt"]},
52 |     zip_safe=True,
53 |     license="BSD 3",
54 |     keywords="aws cloudtracker cloudtrail privileged iam user roles policy policies",
55 |     classifiers=[
56 |         "License :: OSI Approved :: BSD License",
57 |         "Programming Language :: Python :: 3",
58 |         "Programming Language :: Python :: 3.3",
59 |         "Programming Language :: Python :: 3.4",
60 |         "Programming Language :: Python :: 3.5",
61 |         "Programming Language :: Python :: 3.6",
62 |         "Programming Language :: Python :: 3.7",
63 |         "Programming Language :: Python :: 3 :: Only",
64 |         "Development Status :: 5 - Production/Stable",
65 |     ],
66 | )
67 | 


--------------------------------------------------------------------------------
/tasks.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import sys
  3 | import os
  4 | import logging
  5 | from invoke import task, Collection, UnexpectedExit, Failure
  6 | 
  7 | logger = logging.getLogger(__name__)
  8 | # Create the necessary collections (namespaces)
  9 | ns = Collection()
 10 | 
 11 | test = Collection("test")
 12 | ns.add_collection(test)
 13 | 
 14 | unit = Collection("unit")
 15 | ns.add_collection(unit)
 16 | 
 17 | build = Collection("build")
 18 | ns.add_collection(build)
 19 | 
 20 | 
 21 | # Build
 22 | @task
 23 | def build_package(c):
 24 |     """Build the package from the current directory contents for use with PyPi"""
 25 |     c.run("python -m pip install --upgrade setuptools wheel")
 26 |     c.run("python setup.py -q sdist bdist_wheel")
 27 | 
 28 | 
 29 | @task(pre=[build_package])
 30 | def install_package(c):
 31 |     """Install the package built from the current directory contents (not PyPi)"""
 32 |     c.run("pip3 install -q dist/cloudtracker-*.tar.gz")
 33 | 
 34 | 
 35 | @task
 36 | def uninstall_package(c):
 37 |     """Uninstall the package"""
 38 |     c.run('echo "y" | pip3 uninstall cloudtracker', pty=True)
 39 |     c.run("rm -rf dist/*", pty=True)
 40 | 
 41 | 
 42 | @task(pre=[install_package])
 43 | def help_check(c):
 44 |     """Print the version to make sure the package installation didn't irrationally break"""
 45 |     try:
 46 |         c.run("./bin/cloudtracker --help", pty=True)
 47 |     except UnexpectedExit as u_e:
 48 |         logger.critical(f"FAIL! UnexpectedExit: {u_e}")
 49 |         sys.exit(1)
 50 |     except Failure as f_e:
 51 |         logger.critical(f"FAIL: Failure: {f_e}")
 52 |         sys.exit(1)
 53 | 
 54 | 
 55 | # TEST - format
 56 | @task
 57 | def fmt(c):
 58 |     """Auto format code with Python autopep8"""
 59 |     try:
 60 |         c.run("autopep8 cloudtracker/")
 61 |     except UnexpectedExit as u_e:
 62 |         logger.critical(f"FAIL! UnexpectedExit: {u_e}")
 63 |         sys.exit(1)
 64 |     except Failure as f_e:
 65 |         logger.critical(f"FAIL: Failure: {f_e}")
 66 |         sys.exit(1)
 67 | 
 68 | 
 69 | # TEST - LINT
 70 | @task
 71 | def run_linter(c):
 72 |     """Lint the code"""
 73 |     try:
 74 |         c.run("pylint cloudtracker/", warn=False)
 75 |     except UnexpectedExit as u_e:
 76 |         logger.critical(f"FAIL! UnexpectedExit: {u_e}")
 77 |         sys.exit(1)
 78 |     except Failure as f_e:
 79 |         logger.critical(f"FAIL: Failure: {f_e}")
 80 |         sys.exit(1)
 81 | 
 82 | 
 83 | # TEST - SECURITY
 84 | @task
 85 | def security_scan(c):
 86 |     """Runs `bandit` and `safety check`"""
 87 |     try:
 88 |         c.run("bandit -r cloudtracker/")
 89 |         # c.run("safety check")
 90 |     except UnexpectedExit as u_e:
 91 |         logger.critical(f"FAIL! UnexpectedExit: {u_e}")
 92 |         sys.exit(1)
 93 |     except Failure as f_e:
 94 |         logger.critical(f"FAIL: Failure: {f_e}")
 95 |         sys.exit(1)
 96 | 
 97 | 
 98 | # UNIT TESTING
 99 | @task
100 | def run_nosetests(c):
101 |     """Unit testing: Runs unit tests using `nosetests`"""
102 |     c.run('echo "Running Unit tests"')
103 |     try:
104 |         c.run("nosetests -v  --logging-level=CRITICAL")
105 |     except UnexpectedExit as u_e:
106 |         logger.critical(f"FAIL! UnexpectedExit: {u_e}")
107 |         sys.exit(1)
108 |     except Failure as f_e:
109 |         logger.critical(f"FAIL: Failure: {f_e}")
110 |         sys.exit(1)
111 | 
112 | 
113 | @task
114 | def run_pytest(c):
115 |     """Unit testing: Runs unit tests with pytest and coverage"""
116 |     c.run('echo "Running Unit tests"')
117 |     try:
118 |         c.run("python -m coverage run -m pytest -v")
119 |         c.run("python -m coverage report -m")
120 |     except UnexpectedExit as u_e:
121 |         logger.critical(f"FAIL! UnexpectedExit: {u_e}")
122 |         sys.exit(1)
123 |     except Failure as f_e:
124 |         logger.critical(f"FAIL: Failure: {f_e}")
125 |         sys.exit(1)
126 | 
127 | 
128 | build.add_task(build_package, "build")
129 | build.add_task(install_package, "install")
130 | build.add_task(uninstall_package, "uninstall")
131 | 
132 | unit.add_task(run_nosetests, "nose")
133 | unit.add_task(run_pytest, "pytest")
134 | 
135 | test.add_task(run_linter, "lint")
136 | test.add_task(fmt, "format")
137 | test.add_task(security_scan, "security")
138 | 
139 | test.add_task(help_check, "help")
140 | 


--------------------------------------------------------------------------------
/tests/scripts/pylint.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | echo 'Starting pylint script'
3 | find . -name '*.py' -not -path './docs/source/*' -not -path './venv/*' -exec pylint '{}' +
4 | 


--------------------------------------------------------------------------------
/tests/unit/test_cloudtracker.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright 2018 Duo Security
  3 | 
  4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
  5 | following conditions are met:
  6 | 
  7 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following
  8 | disclaimer.
  9 | 
 10 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
 11 | following disclaimer in the documentation and/or other materials provided with the distribution.
 12 | 
 13 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
 14 | products derived from this software without specific prior written permission.
 15 | 
 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 17 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 21 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 22 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 23 | ---------------------------------------------------------------------------
 24 | """
 25 | 
 26 | import sys
 27 | import unittest
 28 | from unittest.mock import patch
 29 | from io import StringIO
 30 | from contextlib import contextmanager
 31 | 
 32 | from cloudtracker import (get_role_allowed_actions,
 33 |                           get_role_iam,
 34 |                           make_list,
 35 |                           normalize_api_call,
 36 |                           print_actor_diff,
 37 |                           print_diff,
 38 |                           Privileges,
 39 |                           read_aws_api_list)
 40 | 
 41 | 
 42 | @contextmanager
 43 | def capture(command, *args, **kwargs):
 44 |     """Capture stdout in order to check it"""
 45 |     out, sys.stdout = sys.stdout, StringIO()
 46 |     try:
 47 |         command(*args, **kwargs)
 48 |         sys.stdout.seek(0)
 49 |         yield sys.stdout.read()
 50 |     finally:
 51 |         sys.stdout = out
 52 | 
 53 | 
 54 | class TestCloudtracker(unittest.TestCase):
 55 |     """Test class for cloudtracker"""
 56 |     aws_api_list = None
 57 | 
 58 |     def __init__(self, *args, **kwargs):
 59 |         super(TestCloudtracker, self).__init__(*args, **kwargs)
 60 |         self.aws_api_list = read_aws_api_list()
 61 | 
 62 |     def test_make_list(self):
 63 |         """Test make_list"""
 64 |         self.assertEquals(["hello"], make_list("hello"))
 65 | 
 66 | 
 67 |     def test_get_actions_from_statement(self):
 68 |         """Test get_actions_from_statement"""
 69 | 
 70 |         privileges = Privileges(self.aws_api_list)
 71 | 
 72 |         stmt = {"Action": ["s3:PutObject"], "Resource": "*", "Effect": "Allow"}
 73 |         self.assertEquals(privileges.get_actions_from_statement(stmt),
 74 |                           {'s3:putobject': True})
 75 | 
 76 |         stmt = {"Action": ["s3:PutObject*"], "Resource": "*", "Effect": "Allow"}
 77 |         self.assertEquals(privileges.get_actions_from_statement(stmt),
 78 |                           {'s3:putobject': True, 's3:putobjectacl': True, 's3:putobjecttagging': True})
 79 | 
 80 |         stmt = {"Action": ["s3:*ObjectT*"], "Resource": "*", "Effect": "Allow"}
 81 |         self.assertEquals(privileges.get_actions_from_statement(stmt),
 82 |                           {'s3:deleteobjecttagging': True,
 83 |                            's3:getobjecttagging': True,
 84 |                            's3:getobjecttorrent': True,
 85 |                            's3:putobjecttagging': True})
 86 | 
 87 |     def test_policy(self):
 88 |         """Test having multiple statements, some allowed, some denied"""
 89 |         privileges = Privileges(self.aws_api_list)
 90 |         # Create a privilege object with some allowed and denied
 91 |         stmt = {"Action": ["s3:*ObjectT*"], "Resource": "*", "Effect": "Allow"}
 92 |         privileges.add_stmt(stmt)
 93 |         stmt = {'Action': ['s3:GetObjectTagging', 's3:GetObjectTorrent'],
 94 |                 "Resource": "*",
 95 |                 "Effect": "Deny"}
 96 |         privileges.add_stmt(stmt)
 97 |         self.assertEquals(sorted(privileges.determine_allowed()),
 98 |                           sorted(['s3:putobjecttagging', 's3:deleteobjecttagging']))
 99 | 
100 |     def test_get_actions_from_statement_with_resources(self):
101 |         """
102 |         Test that even when we are denied access to one resource,
103 |         the actions are still marked as allowed.
104 |         """
105 |         privileges = Privileges(self.aws_api_list)
106 |         policy = [
107 |             {
108 |                 "Action": "s3:*",
109 |                 "Effect": "Allow",
110 |                 "Resource": "*"
111 |             },
112 |             {
113 |                 "Action": "s3:CreateBucket",
114 |                 "Effect": "Deny",
115 |                 "Resource": "*"
116 |             },
117 |             {
118 |                 "Action": "s3:*",
119 |                 "Effect": "Deny",
120 |                 "Resource": [
121 |                     "arn:aws:s3:::super-sensitive-bucket",
122 |                     "arn:aws:s3:::super-sensitive-bucket/*"
123 |                 ]
124 |             }
125 |         ]
126 |         for stmt in policy:
127 |             privileges.add_stmt(stmt)
128 |         self.assertTrue('s3:deletebucket' in privileges.determine_allowed())
129 |         self.assertTrue('s3:createbucket' not in privileges.determine_allowed())
130 | 
131 | 
132 |     def test_get_actions_from_statement_with_array_of_resources(self):
133 |         """
134 |         Test array of resources
135 |         """
136 |         privileges = Privileges(self.aws_api_list)
137 |         policy = [
138 |             {
139 |                 "Action": "s3:*",
140 |                 "Effect": "Allow",
141 |                 "Resource": "*"
142 |             },
143 |             {
144 |                 "Action": "s3:CreateBucket",
145 |                 "Effect": "Deny",
146 |                 "Resource": ["arn:aws:s3:::super-sensitive-bucket", "*"]
147 |             }
148 |         ]
149 |         for stmt in policy:
150 |             privileges.add_stmt(stmt)
151 |         self.assertTrue('s3:deletebucket' in privileges.determine_allowed())
152 |         self.assertTrue('s3:createbucket' not in privileges.determine_allowed())
153 | 
154 | 
155 |     def test_get_actions_from_statement_with_conditions(self):
156 |         """
157 |         Test that even when we are denied access based on a condition,
158 |         the actions are still marked as allowed.
159 |         """
160 |         privileges = Privileges(self.aws_api_list)
161 |         policy = [
162 |             {
163 |                 "Sid": "AllowAllActionsForEC2",
164 |                 "Effect": "Allow",
165 |                 "Action": "ec2:*",
166 |                 "Resource": "*"
167 |             },
168 |             {
169 |                 "Sid": "DenyStopAndTerminateWhenMFAIsNotPresent",
170 |                 "Effect": "Deny",
171 |                 "Action": [
172 |                     "ec2:StopInstances",
173 |                     "ec2:TerminateInstances"
174 |                 ],
175 |                 "Resource": "*",
176 |                 "Condition": {"BoolIfExists": {"aws:MultiFactorAuthPresent": False}}
177 |             }
178 |         ]
179 |         for stmt in policy:
180 |             privileges.add_stmt(stmt)
181 |         self.assertTrue('ec2:startinstances' in privileges.determine_allowed())
182 |         self.assertTrue('ec2:stopinstances' in privileges.determine_allowed())
183 | 
184 | 
185 |     def test_normalize_api_call(self):
186 |         """Test normalize_api_call"""
187 |         # Ensure the numbers at the end are removed
188 |         self.assertEquals(normalize_api_call('lambda', 'ListTags20170331'), 'lambda:listtags')
189 |         # Ensure service renaming occurs
190 |         self.assertEquals(normalize_api_call('monitoring', 'DescribeAlarms'), 'cloudwatch:describealarms')
191 | 
192 | 
193 |     def test_print_actor_diff(self):
194 |         """Test print_actor_diff"""
195 |         with capture(print_actor_diff, [], [], False) as output:
196 |             self.assertEquals('', output)
197 | 
198 |         # Test output when you have 3 configured users, but only two actually did anything
199 |         with capture(print_actor_diff, ['alice', 'bob'], ['alice', 'bob', 'charlie'], False) as output:
200 |             self.assertEquals('  alice\n  bob\n- charlie\n', output)
201 | 
202 | 
203 |     def test_print_diff(self):
204 |         """Test print_diff"""
205 | 
206 |         with capture(print_diff, [], [], {}, False) as output:
207 |             self.assertEquals('', output)
208 | 
209 |         def mocked_is_recorded_by_cloudtrail(action):
210 |             """Instead of reading the whole file, just cherry pick this one action used in the tests"""
211 |             if action == 's3:putobject':
212 |                 return False
213 |             return True
214 | 
215 |         # One action allowed, and performed, and should be shown
216 |         with patch('cloudtracker.is_recorded_by_cloudtrail', side_effect=mocked_is_recorded_by_cloudtrail):
217 |             with capture(print_diff,
218 |                          ['s3:createbucket'], # performed
219 |                          ['s3:createbucket'], # allowed
220 |                          {'show_benign': True, 'show_used': False, 'show_unknown': True}, False) as output:
221 |                 self.assertEquals('  s3:createbucket\n', output)
222 | 
223 |         # 3 actions allowed, one is used, one is unused, and one is unknown; show all
224 |         with patch('cloudtracker.is_recorded_by_cloudtrail', side_effect=mocked_is_recorded_by_cloudtrail):
225 |             with capture(print_diff,
226 |                          ['s3:createbucket', 'sts:getcalleridentity'], # performed
227 |                          ['s3:createbucket', 's3:putobject', 's3:deletebucket'], # allowed
228 |                          {'show_benign': True, 'show_used': False, 'show_unknown': True}, False) as output:
229 |                 self.assertEquals('  s3:createbucket\n- s3:deletebucket\n? s3:putobject\n', output)
230 | 
231 |         # Same as above, but only show the used one
232 |         with patch('cloudtracker.is_recorded_by_cloudtrail', side_effect=mocked_is_recorded_by_cloudtrail):
233 |             with capture(print_diff,
234 |                          ['s3:createbucket', 'sts:getcalleridentity'], # performed
235 |                          ['s3:createbucket', 's3:putobject', 's3:deletebucket'], # allowed
236 |                          {'show_benign': True, 'show_used': True, 'show_unknown': True}, False) as output:
237 |                 self.assertEquals('  s3:createbucket\n', output)
238 | 
239 |         # Hide the unknown
240 |         with patch('cloudtracker.is_recorded_by_cloudtrail', side_effect=mocked_is_recorded_by_cloudtrail):
241 |             with capture(print_diff,
242 |                          ['s3:createbucket', 'sts:getcalleridentity'], # performed
243 |                          ['s3:createbucket', 's3:putobject', 's3:deletebucket'], # allowed
244 |                          {'show_benign': True, 'show_used': False, 'show_unknown': False}, False) as output:
245 |                 self.assertEquals('  s3:createbucket\n- s3:deletebucket\n', output)
246 | 
247 |     # Role IAM policy to be used in different tests
248 |     role_iam = {
249 |         "AssumeRolePolicyDocument": {},
250 |         "RoleId": "AROA00000000000000000",
251 |         "CreateDate": "2017-01-01T00:00:00Z",
252 |         "InstanceProfileList": [],
253 |         "RoleName": "test_role",
254 |         "Path": "/",
255 |         "AttachedManagedPolicies": [],
256 |         "RolePolicyList": [
257 |             {
258 |                 "PolicyName": "KmsDecryptSecrets",
259 |                 "PolicyDocument": {
260 |                     "Version": "2012-10-17",
261 |                     "Statement": [
262 |                         {
263 |                             "Action": [
264 |                                 "kms:DescribeKey",
265 |                                 "kms:Decrypt"
266 |                             ],
267 |                             "Resource": "*",
268 |                             "Effect": "Allow",
269 |                             "Sid": ""
270 |                         }
271 |                     ]
272 |                 }
273 |             },
274 |             {
275 |                 "PolicyName": "S3PutObject",
276 |                 "PolicyDocument": {
277 |                     "Version": "2012-10-17",
278 |                     "Statement": [
279 |                         {
280 |                             "Action": [
281 |                                 "s3:PutObject",
282 |                                 "s3:PutObjectAcl",
283 |                                 "s3:ListBucket"
284 |                             ],
285 |                             "Resource": "*",
286 |                             "Effect": "Allow"
287 |                         }
288 |                     ]
289 |                 }
290 |             }
291 |         ],
292 |         "Arn": "arn:aws:iam::111111111111:role/test_role"
293 |     }
294 | 
295 |     def test_get_role_iam(self):
296 |         """Test get_role_iam"""
297 |         account_iam = {
298 |             "RoleDetailList": [self.role_iam],
299 |             "UserDetailList": [],
300 |             "GroupDetailList": [],
301 |             "Policies": []
302 |         }
303 | 
304 |         self.assertEquals(self.role_iam, get_role_iam("test_role", account_iam))
305 | 
306 | 
307 |     def test_get_role_allowed_actions(self):
308 |         """Test get_role_allowed_actions"""
309 |         account_iam = {
310 |             "RoleDetailList": [self.role_iam],
311 |             "UserDetailList": [],
312 |             "GroupDetailList": [],
313 |             "Policies": []
314 |         }
315 | 
316 |         aws_api_list = read_aws_api_list()
317 |         self.assertEquals(sorted(['s3:putobject', 'kms:describekey', 'kms:decrypt', 's3:putobjectacl']),
318 |                           sorted(get_role_allowed_actions(aws_api_list, self.role_iam, account_iam)))
319 | 


--------------------------------------------------------------------------------