├── .coveragerc ├── .github ├── dependabot.yml └── workflows │ ├── build-test-deploy.yml │ └── build-test.yml ├── .gitignore ├── CHANGELOG ├── CODEOWNERS ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── dev-requirements.txt ├── nightfall ├── __init__.py ├── alerts.py ├── api.py ├── detection_rules.py ├── exceptions.py └── findings.py ├── pytest.ini ├── setup.py └── tests ├── __init__.py └── test_api.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | env/* 4 | venv/* 5 | tests/* 6 | */__init__.py 7 | setup.py 8 | 9 | source = 10 | . 11 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | # Enable version updates for Github Actions 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | # Check for updates once daily 7 | schedule: 8 | interval: "daily" 9 | - package-ecosystem: "pip" 10 | directory: "/" 11 | # Check for updates once daily 12 | schedule: 13 | interval: "daily" 14 | -------------------------------------------------------------------------------- /.github/workflows/build-test-deploy.yml: -------------------------------------------------------------------------------- 1 | name: Nightfall Package Deploy to PyPI 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | test-and-publish: 9 | name: Publish to PyPI 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v3 13 | - name: Set up Python 14 | uses: actions/setup-python@v4 15 | with: 16 | python-version: 3.9 17 | - name: Install dependencies 18 | run: | 19 | python -m pip install --upgrade pip 20 | pip install -r dev-requirements.txt 21 | pip install -e . 22 | - name: Lint with flake8 23 | run: | 24 | # stop the build if there are Python syntax errors or undefined names 25 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 26 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 27 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 28 | - name: Test with pytest 29 | env: 30 | NIGHTFALL_API_KEY: ${{ secrets.NIGHTFALL_API_KEY }} 31 | run: | 32 | pytest -m "not filetest" --cov=nightfall --cov-report term-missing tests 33 | - name: Build a binary wheel and source tarball 34 | run: | 35 | python -m build --sdist --wheel --outdir dist/ 36 | - name: Publish distribution to PyPI 37 | uses: pypa/gh-action-pypi-publish@master 38 | with: 39 | password: ${{ secrets.PYPI_API_TOKEN }} 40 | -------------------------------------------------------------------------------- /.github/workflows/build-test.yml: -------------------------------------------------------------------------------- 1 | name: Nightfall Package Tests 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: [3.7, 3.8, 3.9, "3.10"] 11 | 12 | steps: 13 | - uses: actions/checkout@v3 14 | - name: Set up Python ${{ matrix.python-version }} 15 | uses: actions/setup-python@v4 16 | with: 17 | python-version: ${{ matrix.python-version }} 18 | - name: Install dependencies and package 19 | run: | 20 | python -m pip install --upgrade pip 21 | pip install -r dev-requirements.txt 22 | pip install -e . 23 | - name: Lint with flake8 24 | run: | 25 | # stop the build if there are Python syntax errors or undefined names 26 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 27 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 28 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 29 | - name: Test with pytest 30 | env: 31 | NIGHTFALL_API_KEY: ${{ secrets.NIGHTFALL_API_KEY }} 32 | run: | 33 | pytest -m "not filetest" --cov=nightfall --cov-report term-missing tests 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docsrc/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # Cython debug symbols 138 | cython_debug/ 139 | 140 | # Other 141 | sandbox.py 142 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | Here you can see the full list of changes between each Nightfall release. 5 | 6 | Version 1.4.1 7 | ------------- 8 | 9 | Released on July 19, 2022 10 | 11 | - Added `commitAuthor` to finding response objects 12 | 13 | Version 1.4.0 14 | ------------- 15 | 16 | Released on February 23, 2022 17 | 18 | - Added `policy_uuids` option to text scanning api 19 | - Added `alert_config` option to text and file scanning 20 | - Added `commitHash` to finding response objects 21 | - Added `rowRange` and `columnRange` to finding response objects 22 | - Made some fields `Optional` that were documented to potentially be `None` but not marked as such with types. 23 | - README examples now run as doctests 24 | 25 | Version 1.3.0 26 | ------------- 27 | 28 | Released on January 5, 2022 29 | 30 | - Added `default_redaction_config` option to text scanning api 31 | 32 | Version 1.2.0 33 | ------------- 34 | 35 | Released on December 7, 2021 36 | 37 | - Added automated retry on 429 responses from the Nightfall Developer Platform 38 | - Added optional `request_metadata` field to file scanning requests 39 | - Fixed file scanning requests for binary files 40 | 41 | Version 1.1.1 42 | ------------- 43 | 44 | Released on November 29, 2021 45 | 46 | - Add CONTRIBUTING, CODEOWNERS and update README 47 | - Update User-Agent header 48 | 49 | Version 1.1.0 50 | ------------- 51 | 52 | Released on November 22, 2021 53 | 54 | - *[BACKWARDS INCOMPATIBLE]* `validate_webhook` returns False instead of 55 | throwing an exception if the request is invalid 56 | - Added optional context byte setting to scan_text 57 | - Updated Docutils to 0.18 58 | 59 | Version 1.0.3 60 | ------------- 61 | 62 | Released on November 2, 2021 63 | 64 | - Bugfix fix classifier on module. 65 | 66 | Version 1.0.2 67 | ------------- 68 | 69 | Released on November 2, 2021 70 | 71 | - Bugfix to send detection rule name in request. 72 | 73 | Version 1.0.1 74 | ------------- 75 | 76 | Released on November 1, 2021 77 | 78 | - Updated documentation link 79 | 80 | Version 1.0.0 81 | ------------- 82 | 83 | Released on November 1, 2021 84 | 85 | - Revised to use Nightfall v3 API. 86 | - Add in file scanning functionality. 87 | 88 | .. warning:: 89 | This is a breaking change compared to all versions before it, using 90 | Nightfall's new APIs and adding in file scanning. Please check the 91 | new methods provided to migrate from the older API. 92 | 93 | Version 0.6.0 94 | ------------- 95 | 96 | Released on July 25, 2021 97 | 98 | - Update scan and chunking interface to handle dict with multiple items 99 | instead of list of dicts. 100 | 101 | .. warning:: 102 | This is a breaking change compared to version 0.5.0, but all users are 103 | recommended to upgrade to this version. This version represents an 104 | improvement on the previous iteration where instead of handling a list of 105 | dicts, we now handle a single dict with multiple entries. This simplifies 106 | the library code and makes the interface much more usable. 107 | 108 | The previous version of the SDK required users to pass in a list of dicts. 109 | We now require users to pass in a single dict with multiple entries. 110 | ``nightfall.scan([{'id': 'string}])`` should now be 111 | ``nightfall.scan({'id': 'string'})``. 112 | 113 | Version 0.5.0 114 | ------------- 115 | 116 | Released on July 12, 2021 117 | 118 | - Publish Sphinx docs to GitHub pages 119 | - Split up unit and integration tests, use mocking for unit tests 120 | - Update scan and chunking method to scan strings from a dict 121 | - Stop attempting to split strings and instead raise an exception 122 | - Redefine ``MAX_PAYLOAD_SIZE`` constant to be actual max of 500_000 bytes 123 | - Updated documentation to illustrate how to use debug logging with this library 124 | 125 | .. warning:: 126 | Scanning dicts instead of strings is a breaking change. Previous versions 127 | of the SDK accepted a list of strings for scanning. We now require users 128 | to pass in a list of dicts instead. ``nightfall.scan(['string'])`` should 129 | now be ``nightfall.scan([{'id': 'string'}])``. This allows you to keep 130 | track of the reference of where the string came from for further processing. 131 | 132 | Version 0.4.0 133 | ------------- 134 | 135 | Released on June 20, 2021 136 | 137 | - Add debug logs to Nightfall module 138 | - Change primary class name from ``Api`` to ``Nightfall`` to make things a bit 139 | more clear when this library is used in other programs and allow ``from 140 | nightfall import Nightfall`` 141 | 142 | .. warning:: 143 | This is a breaking change, since the previous version of this SDK 144 | imported Nightfall using ``from nightfall.api import Api`` 145 | 146 | Version 0.3.0 147 | ------------- 148 | 149 | Released on June 13, 2021 150 | 151 | - Implement basic chunking algorithm to split payloads per the API limits. 152 | 153 | Version 0.2.0 154 | ------------- 155 | 156 | .. note:: 157 | This is an initial Beta release. 158 | 159 | Released on June 13, 2021 160 | 161 | - Basic project tooling put into place. 162 | - Continuous Integration with GitHub Actions 163 | - Packaging and uploading to PyPI 164 | - Code Coverage 165 | - Testing with unittest 166 | - Basic Documentation in place 167 | - Add support for using the API with a token 168 | - Add support for scan API endpoint 169 | 170 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | # These owners will be the default owners for everything in 2 | # the repo. 3 | * @evanfuller @belambert @dhertz 4 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Thank you for considering contributing to the Nightfall Python SDK. 4 | 5 | There are many ways to contribute, such as: 6 | * Writing code samples 7 | * Suggesting documentation improvements 8 | * Submitting bug reports and feature requests 9 | * Writing code to improve the library itself. 10 | 11 | Please, don't use the issue tracker for personal support questions. Feel free to reach out to `support@nightfall.ai` 12 | to address those issues. 13 | 14 | ## Responsibilities 15 | * Ensure cross-platform compatibility for every change that's accepted. Windows, Mac, Debian & Ubuntu Linux, etc. 16 | * Ensure backwards compatibility with Python 3.7+. 17 | * Create issues for each major change and enhancement that you wish to make. 18 | * Discuss proposed changes transparently and collect community feedback. 19 | * Avoid introducing new external dependencies whenever possible. When absolutely required, validate the software 20 | licenses used by these dependencies (e.g. avoid unintentional copyleft requirements). 21 | 22 | ## How to report a bug 23 | 24 | ### Security Disclosures 25 | If you find a security vulnerability, do NOT open an issue. Email `security@nightfall.ai` instead. 26 | 27 | In order to determine whether you are dealing with a security issue, ask yourself the following questions: 28 | * Can I access something that's not mine, or something I shouldn't have access to? 29 | * Can I disable something for other people? 30 | * Is there a potential vulnerability stemming from a library dependency? 31 | 32 | If you answered yes to any of the above questions, then you're probably dealing with a security issue. 33 | Note that even if you answer "no" to all questions, you may still be dealing with a security issue, so if you're 34 | unsure, just email us at `security@nightfall.ai`. 35 | 36 | ### Creating an Issue 37 | When filing an issue, make sure to answer these questions: 38 | 1. What version of Java are you using? 39 | 2. What operating system and processor architecture are you using? 40 | 3. How did you discover the issue? 41 | 4. Is the issue reproducible? What are the steps to reproduce? 42 | 5. What did you expect to see? 43 | 6. What did you see instead? 44 | 45 | 46 | ## Suggesting a New Feature 47 | 48 | If you find yourself wishing for a feature that doesn't exist in this SDK, you are probably not alone. 49 | There are bound to be others out there with similar needs. Open an issue on our issues list on GitHub which 50 | describes the feature you would like to see, why you need it, and how it should work. 51 | 52 | ## Code Review 53 | 54 | The core team looks at open pull requests on a regular basis. In order for your pull request to be merged, it 55 | must meet the following requirements: 56 | * It must pass the checkstyle linter; this should be run automatically when you run `mvn package`. 57 | * It must add unit tests to cover any new functionality. 58 | * It must get approval from one of the code owners. 59 | 60 | If a pull request remains idle for more than two weeks, we may close it. 61 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Nightfall 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Nightfall Python SDK 2 | 3 | **Embed Nightfall scanning and detection functionality into Python applications** 4 | 5 | [![PyPI version](https://badge.fury.io/py/nightfall.svg)](https://badge.fury.io/py/nightfall) 6 | 7 | ## Features 8 | 9 | This SDK provides Python functions for interacting with the Nightfall API. It allows you to add functionality to your 10 | applications to scan plain text and files in order to detect different categories of information. You can leverage any 11 | of the detectors in Nightfall's pre-built library, or you may programmatically define your own custom detectors. 12 | 13 | Additionally, this library provides convenience features such as encapsulating the steps to chunk and upload files. 14 | 15 | To obtain an API Key, login to the [Nightfall dashboard](https://app.nightfall.ai/) and click the section 16 | titled "Manage API Keys". 17 | 18 | See our [developer documentation](https://docs.nightfall.ai/docs/entities-and-terms-to-know) for more details about 19 | integrating with the Nightfall API. 20 | 21 | ## Dependencies 22 | 23 | The Nightfall Python SDK requires Python 3.7 or later. 24 | 25 | For a full list of external dependencies please consult `setup.py`. 26 | 27 | 28 | ## Installation 29 | 30 | Python 2 31 | ``` 32 | pip install nightfall 33 | ``` 34 | 35 | Python 3 36 | ``` 37 | pip3 install nightfall 38 | ``` 39 | 40 | ## Usage 41 | 42 | 43 | ### Scanning Plain Text 44 | 45 | Nightfall provides pre-built detector types, covering data types ranging from PII to PHI to credentials. The following 46 | snippet shows an example of how to scan using pre-built detectors. 47 | 48 | #### Sample Code 49 | 50 | ```python 51 | from nightfall import Confidence, DetectionRule, Detector, Nightfall 52 | 53 | # By default, the client reads the API key from the environment variable NIGHTFALL_API_KEY 54 | nightfall = Nightfall() 55 | 56 | # A rule contains a set of detectors to scan with 57 | cc = Detector(min_confidence=Confidence.LIKELY, nightfall_detector="CREDIT_CARD_NUMBER") 58 | ssn = Detector(min_confidence=Confidence.POSSIBLE, nightfall_detector="US_SOCIAL_SECURITY_NUMBER") 59 | detection_rule = DetectionRule([cc, ssn]) 60 | 61 | findings, _ = nightfall.scan_text( ["hello world", "my SSN is 678-99-8212", "4242-4242-4242-4242"], detection_rules=[detection_rule]) 62 | 63 | print(findings) 64 | # Example output: [[], [Finding(finding='678-99-8212', redacted_finding=...)]] 65 | 66 | ``` 67 | 68 | 69 | 70 | ### Scanning Files 71 | 72 | Scanning common file types like PDF's or office documents typically requires cumbersome text 73 | extraction methods like OCR. 74 | 75 | Rather than implementing this functionality yourself, the Nightfall API allows you to upload the 76 | original files, and then we'll handle the heavy lifting. 77 | 78 | The file upload process is implemented as a series of requests to upload the file in chunks. The library 79 | provides a single method that wraps the steps required to upload your file. Please refer to the 80 | [API Reference](https://docs.nightfall.ai/reference) for more details. 81 | 82 | The file is uploaded synchronously, but as files can be arbitrarily large, the scan itself is conducted asynchronously. 83 | The results from the scan are delivered by webhook; for more information about setting up a webhook server, refer to 84 | [the docs](https://docs.nightfall.ai/docs/creating-a-webhook-server). 85 | 86 | #### Sample Code 87 | 88 | ```python 89 | from nightfall import Confidence, DetectionRule, Detector, Nightfall 90 | import os 91 | 92 | # By default, the client reads the API key from the environment variable NIGHTFALL_API_KEY 93 | nightfall = Nightfall() 94 | 95 | # A rule contains a set of detectors to scan with 96 | cc = Detector(min_confidence=Confidence.LIKELY, nightfall_detector="CREDIT_CARD_NUMBER") 97 | ssn = Detector(min_confidence=Confidence.POSSIBLE, nightfall_detector="US_SOCIAL_SECURITY_NUMBER") 98 | detection_rule = DetectionRule([cc, ssn]) 99 | 100 | 101 | # Upload the file and start the scan. 102 | # These are conducted asynchronously, so provide a webhook route to an HTTPS server to send results to. 103 | id, message = nightfall.scan_file( "./README.md", os.environ["WEBHOOK_ENDPOINT"], detection_rules=[detection_rule]) 104 | print("started scan", id, message) 105 | # Example output: started scan...scan initiated 106 | 107 | ``` 108 | 109 | ## Contributing 110 | 111 | Contributions are welcome! Open a pull request to fix a bug, or open an issue to discuss a new feature 112 | or change. Please adhere to the linting criteria expected by flake8, and be sure to add unit tests for 113 | any new functionality you add. 114 | 115 | Refer to `CONTRIBUTING.md` for the full details. 116 | 117 | ## License 118 | 119 | This code is licensed under the terms of the MIT License. See [here](https://opensource.org/licenses/MIT) 120 | for more information. 121 | 122 | Please create an issue with a description of your problem, or open a pull request with the fix. 123 | 124 | ## Development 125 | 126 | ### Installing Development Dependencies 127 | 128 | If you want to hack on this project, you should set up your local development 129 | environment with the following commands: 130 | 131 | 1. Fork and clone this repo and open a terminal with the root of this repository in your working directory. 132 | 1. Create and activate a virtualenv `python3 -m venv venv && source venv/bin/activate` 133 | 1. Install development dependencies with `pip install -r dev-requirements.txt` 134 | 1. Install an editable version of this package `pip install -e .` 135 | 136 | ### Run Unit Tests 137 | 138 | Unit and Integration tests can be found in the `tests/` directory. You can run them with `pytest`. Be sure to have `NIGHTFALL_API_KEY` set as an environment variable before running the tests. 139 | 140 | ### View Code Coverage 141 | 142 | You can view the code coverage report by running `coverage html` and `python3 -m http.server --directory htmlcov` after running the unit tests. 143 | 144 | ### Creating a Release 145 | 146 | Releases are automatically published to PyPI using GitHub Actions. Creating a release in GitHub will trigger a new build that will publish the latest version of this library to [PyPI](https://pypi.org/project/nightfall/). 147 | 148 | The steps to do this are: 149 | 150 | 1. Add what changed to the CHANGELOG file 151 | 2. Update the version in `setup.py` 152 | 3. Commit changes and push to the main branch. 153 | 4. Create a new release in the GitHub UI. 154 | 5. Observe the release action succeed and see the latest version of this library on PyPI. 155 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | docutils==0.19 2 | pylint 3 | doc8 4 | coverage 5 | codecov 6 | twine 7 | sphinx 8 | sphinx-autobuild 9 | sphinx_rtd_theme 10 | build 11 | flake8 12 | autopep8 13 | pytest 14 | pytest-cov 15 | requests 16 | responses 17 | freezegun 18 | importlib-metadata==4.13.0 19 | -------------------------------------------------------------------------------- /nightfall/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | nightfall module 3 | ~~~~~~~~~~~~~~~~ 4 | This module provides an SDK for Nightfall. 5 | :copyright: (c) 2021 Nightfall 6 | :license: MIT, see LICENSE for more details. 7 | """ 8 | from .api import Nightfall 9 | from .alerts import SlackAlert, EmailAlert, WebhookAlert, AlertConfig 10 | from .detection_rules import (Regex, WordList, Confidence, ContextRule, MatchType, ExclusionRule, MaskConfig, 11 | RedactionConfig, Detector, LogicalOp, DetectionRule) 12 | from .findings import Finding, Range 13 | 14 | __all__ = ["Nightfall", "SlackAlert", "EmailAlert", "WebhookAlert", "AlertConfig", "Regex", "WordList", "Confidence", 15 | "ContextRule", "MatchType", "ExclusionRule", "MaskConfig", "RedactionConfig", "Detector", "LogicalOp", 16 | "DetectionRule", "Finding", "Range"] 17 | -------------------------------------------------------------------------------- /nightfall/alerts.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from typing import List, Tuple, Optional 4 | 5 | @dataclass 6 | class SlackAlert: 7 | """SlackAlert contains the configuration required to allow clients to send asynchronous alerts to a Slack 8 | workspace when findings are detected. Note that in order for Slack alerts to be delivered to your workspace, 9 | you must use authenticate Nightfall to your Slack workspace under the Settings menu on the Nightfall Dashboard. 10 | 11 | Currently, Nightfall supports delivering alerts to public channels, formatted like "#general". 12 | Alerts are only sent if findings are detected. 13 | Attributes: 14 | target (str): the channel name, formatted like "#general". 15 | """ 16 | target: str 17 | 18 | def as_dict(self): 19 | return {"target": self.target} 20 | 21 | @dataclass 22 | class EmailAlert: 23 | """EmailAlert contains the configuration required to allow clients to send an asynchronous email message 24 | when findings are detected. The findings themselves will be delivered as a file attachment on the email. 25 | Alerts are only sent if findings are detected. 26 | Attributes: 27 | address (str): the email address to which alerts should be sent. 28 | """ 29 | address: str 30 | 31 | def as_dict(self): 32 | return {"address": self.address} 33 | 34 | @dataclass 35 | class WebhookAlert: 36 | """WebhookAlert contains the configuration required to allow clients to send a webhook event to an 37 | external URL when findings are detected. The URL provided must (1) use the HTTPS scheme, (2) have a 38 | route defined on the HTTP POST method, and (3) return a 200 status code upon receipt of the event. 39 | 40 | In contrast to other platforms, when using the file scanning APIs, an alert is also sent to this webhook 41 | *even when there are no findings*. 42 | Attributes: 43 | address (str): the URL to which alerts should be sent. 44 | """ 45 | address: str 46 | 47 | def as_dict(self): 48 | return {"address": self.address} 49 | 50 | @dataclass 51 | class AlertConfig: 52 | """AlertConfig allows clients to specify where alerts should be delivered when findings are discovered as 53 | part of a scan. These alerts are delivered asynchronously to all destinations specified in the object instance. 54 | Attributes: 55 | slack (SlackAlert): Send alerts to a Slack workspace when findings are detected. 56 | email (EmailAlert): Send alerts to an email address when findings are detected. 57 | url (WebhookAlert): Send an HTTP webhook event to a URL when findings are detected. 58 | """ 59 | slack: Optional[SlackAlert] = None 60 | email: Optional[EmailAlert] = None 61 | url: Optional[WebhookAlert] = None 62 | 63 | def as_dict(self): 64 | result = {} 65 | if self.slack: 66 | result["slack"] = self.slack.as_dict() 67 | if self.email: 68 | result["email"] = self.email.as_dict() 69 | if self.url: 70 | result["url"] = self.url.as_dict() 71 | return result 72 | 73 | -------------------------------------------------------------------------------- /nightfall/api.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | nightfall.api 4 | ~~~~~~~~~~~~~ 5 | This module provides a class which abstracts the Nightfall REST API. 6 | """ 7 | from datetime import datetime, timedelta 8 | import hmac 9 | import hashlib 10 | import logging 11 | import os 12 | from typing import List, Tuple, Optional 13 | 14 | import requests 15 | from requests.adapters import HTTPAdapter 16 | from urllib3 import Retry 17 | 18 | from nightfall.alerts import AlertConfig 19 | from nightfall.detection_rules import DetectionRule, RedactionConfig 20 | from nightfall.exceptions import NightfallUserError, NightfallSystemError 21 | from nightfall.findings import Finding 22 | 23 | 24 | class Nightfall: 25 | PLATFORM_URL = "https://api.nightfall.ai" 26 | TEXT_SCAN_ENDPOINT_V3 = PLATFORM_URL + "/v3/scan" 27 | FILE_SCAN_INITIALIZE_ENDPOINT = PLATFORM_URL + "/v3/upload" 28 | FILE_SCAN_UPLOAD_ENDPOINT = PLATFORM_URL + "/v3/upload/{0}" 29 | FILE_SCAN_COMPLETE_ENDPOINT = PLATFORM_URL + "/v3/upload/{0}/finish" 30 | FILE_SCAN_SCAN_ENDPOINT = PLATFORM_URL + "/v3/upload/{0}/scan" 31 | 32 | def __init__(self, key: Optional[str] = None, signing_secret: Optional[str] = None): 33 | """Instantiate a new Nightfall object. 34 | :param key: Your Nightfall API key. If None it will be read from the environment variable NIGHTFALL_API_KEY. 35 | :type key: str or None 36 | :param signing_secret: Your Nightfall signing secret used for webhook validation. 37 | :type signing_secret: str or None 38 | """ 39 | if key: 40 | self.key = key 41 | else: 42 | self.key = os.getenv("NIGHTFALL_API_KEY") 43 | 44 | if not self.key: 45 | raise NightfallUserError("need an API key either in constructor or in NIGHTFALL_API_KEY environment var", 46 | 40001) 47 | 48 | self.signing_secret = signing_secret 49 | self.logger = logging.getLogger(__name__) 50 | self.session = requests.Session() 51 | retries = Retry(total=5, allowed_methods=Retry.DEFAULT_ALLOWED_METHODS | {"PATCH", "POST"}) 52 | self.session.mount('https://', HTTPAdapter(max_retries=retries)) 53 | self.session.headers = { 54 | "Content-Type": "application/json", 55 | "User-Agent": "nightfall-python-sdk/1.4.1", 56 | 'Authorization': f'Bearer {self.key}', 57 | } 58 | 59 | def scan_text(self, texts: List[str], policy_uuids: List[str] = None, detection_rules: Optional[List[DetectionRule]] = None, 60 | detection_rule_uuids: Optional[List[str]] = None, context_bytes: Optional[int] = None, 61 | default_redaction_config: Optional[RedactionConfig] = None, alert_config: Optional[AlertConfig] = None) ->\ 62 | Tuple[List[List[Finding]], List[str]]: 63 | """Scan text with Nightfall. 64 | 65 | This method takes the specified config and then makes 66 | one or more requests to the Nightfall API for scanning. 67 | 68 | A caller must provide exactly one of the following: 69 | * a non-empty policy_uuids list (current maximum supported length = 1) 70 | * at least one of detection_rule_uuids or detection_rules 71 | 72 | :param texts: List of strings to scan. 73 | :type texts: List[str] 74 | :param policy_uuids: List of policy UUIDs to scan each text with. 75 | These can be created in the Nightfall UI. 76 | :type policy_uuids: List[str] or None 77 | :param detection_rules: List of detection rules to scan each text with. 78 | :type detection_rules: List[DetectionRule] or None 79 | :param detection_rule_uuids: List of detection rule UUIDs to scan each text with. 80 | These can be created in the Nightfall UI. 81 | :type detection_rule_uuids: List[str] or None 82 | :param context_bytes: The number of bytes of context (leading and trailing) to return with any matched findings. 83 | :type context_bytes: int or None 84 | :param default_redaction_config: The default redaction configuration to apply to all detection rules, unless 85 | there is a more specific config within a detector. 86 | :type default_redaction_config: RedactionConfig or None 87 | :param alert_config: Configures external destinations to fan out alerts to in the event that findings are detected. 88 | :type alert_config: AlertConfig or None 89 | :returns: list of findings, list of redacted input texts 90 | """ 91 | 92 | if not policy_uuids and not detection_rule_uuids and not detection_rules: 93 | raise NightfallUserError("at least one of policy_uuids, detection_rule_uuids, or detection_rules is required", 40001) 94 | 95 | policy = {} 96 | if detection_rule_uuids: 97 | policy["detectionRuleUUIDs"] = detection_rule_uuids 98 | if detection_rules: 99 | policy["detectionRules"] = [d.as_dict() for d in detection_rules] 100 | if context_bytes: 101 | policy["contextBytes"] = context_bytes 102 | if default_redaction_config: 103 | policy["defaultRedactionConfig"] = default_redaction_config.as_dict() 104 | if alert_config: 105 | policy["alertConfig"] = alert_config.as_dict() 106 | 107 | request_body = { 108 | "payload": texts 109 | } 110 | if policy: 111 | request_body["policy"] = policy 112 | if policy_uuids: 113 | request_body["policyUUIDs"] = policy_uuids 114 | response = self._scan_text_v3(request_body) 115 | 116 | _validate_response(response, 200) 117 | 118 | parsed_response = response.json() 119 | 120 | findings = [[Finding.from_dict(f) for f in item_findings] for item_findings in parsed_response["findings"]] 121 | return findings, parsed_response.get("redactedPayload") 122 | 123 | def _scan_text_v3(self, data: dict): 124 | response = self.session.post(url=self.TEXT_SCAN_ENDPOINT_V3, json=data) 125 | 126 | self.logger.debug(f"HTTP Request URL: {response.request.url}") 127 | self.logger.debug(f"HTTP Request Body: {response.request.body}") 128 | self.logger.debug(f"HTTP Request Headers: {response.request.headers}") 129 | self.logger.debug(f"HTTP Status Code: {response.status_code}") 130 | self.logger.debug(f"HTTP Response Headers: {response.headers}") 131 | self.logger.debug(f"HTTP Response Text: {response.text}") 132 | 133 | return response 134 | 135 | # File Scan 136 | 137 | def scan_file(self, location: str, webhook_url: Optional[str] = None, policy_uuid: Optional[str] = None, 138 | detection_rules: Optional[List[DetectionRule]] = None, 139 | detection_rule_uuids: Optional[List[str]] = None, 140 | request_metadata: Optional[str] = None, 141 | alert_config: Optional[AlertConfig] = None) -> Tuple[str, str]: 142 | """Scan file with Nightfall. 143 | At least one of policy_uuid, detection_rule_uuids or detection_rules is required. 144 | 145 | :param location: location of file to scan. 146 | :param webhook_url: webhook endpoint which will receive the results of the scan. 147 | :param policy_uuid: policy UUID. 148 | :type policy_uuid: str or None 149 | :param detection_rules: list of detection rules. 150 | :type detection_rules: List[DetectionRule] or None 151 | :param detection_rule_uuids: list of detection rule UUIDs. 152 | :type detection_rule_uuids: List[str] or None 153 | :param request_metadata: additional metadata that will be returned with the webhook response 154 | :type request_metadata: str or None 155 | :param alert_config: Configures external destinations to fan out alerts to in the event that findings are detected. 156 | :type alert_config: AlertConfig or None 157 | :returns: (scan_id, message) 158 | """ 159 | 160 | if not policy_uuid and not detection_rule_uuids and not detection_rules: 161 | raise NightfallUserError("at least one of policy_uuid, detection_rule_uuids or detection_rules required", 162 | 40001) 163 | 164 | response = self._file_scan_initialize(location) 165 | _validate_response(response, 200) 166 | result = response.json() 167 | session_id, chunk_size = result['id'], result['chunkSize'] 168 | 169 | uploaded = self._file_scan_upload(session_id, location, chunk_size) 170 | if not uploaded: 171 | raise NightfallSystemError("File upload failed", 50000) 172 | 173 | response = self._file_scan_finalize(session_id) 174 | _validate_response(response, 200) 175 | 176 | response = self._file_scan_scan(session_id, 177 | detection_rules=detection_rules, 178 | detection_rule_uuids=detection_rule_uuids, 179 | webhook_url=webhook_url, policy_uuid=policy_uuid, 180 | request_metadata=request_metadata, 181 | alert_config=alert_config) 182 | _validate_response(response, 200) 183 | parsed_response = response.json() 184 | 185 | return parsed_response["id"], parsed_response["message"] 186 | 187 | def _file_scan_initialize(self, location: str): 188 | data = { 189 | "fileSizeBytes": os.path.getsize(location) 190 | } 191 | response = self.session.post(url=self.FILE_SCAN_INITIALIZE_ENDPOINT, json=data) 192 | 193 | return response 194 | 195 | def _file_scan_upload(self, session_id: str, location: str, chunk_size: int): 196 | 197 | def read_chunks(fp, chunk_size): 198 | ix = 0 199 | while True: 200 | data = fp.read(chunk_size) 201 | if not data: 202 | break 203 | yield ix, data 204 | ix = ix + 1 205 | 206 | def upload_chunk(id, data, headers): 207 | response = self.session.patch( 208 | url=self.FILE_SCAN_UPLOAD_ENDPOINT.format(id), 209 | data=data, 210 | headers=headers 211 | ) 212 | return response 213 | 214 | with open(location, 'rb') as fp: 215 | for ix, piece in read_chunks(fp, chunk_size): 216 | headers = {"X-UPLOAD-OFFSET": str(ix * chunk_size)} 217 | response = upload_chunk(session_id, piece, headers) 218 | _validate_response(response, 204) 219 | 220 | return True 221 | 222 | def _file_scan_finalize(self, session_id: str): 223 | response = self.session.post(url=self.FILE_SCAN_COMPLETE_ENDPOINT.format(session_id)) 224 | return response 225 | 226 | def _file_scan_scan(self, session_id: str, detection_rules: Optional[List[DetectionRule]] = None, 227 | detection_rule_uuids: Optional[List[str]] = None, webhook_url: Optional[str] = None, 228 | policy_uuid: Optional[str] = None, request_metadata: Optional[str] = None, 229 | alert_config: Optional[AlertConfig] = None) -> requests.Response: 230 | if policy_uuid: 231 | data = {"policyUUID": policy_uuid} 232 | else: 233 | data = {"policy": {}} 234 | if webhook_url: 235 | data["policy"]["webhookURL"] = webhook_url 236 | if detection_rule_uuids: 237 | data["policy"]["detectionRuleUUIDs"] = detection_rule_uuids 238 | if detection_rules: 239 | data["policy"]["detectionRules"] = [d.as_dict() for d in detection_rules] 240 | if alert_config: 241 | data["policy"]["alertConfig"] = alert_config.as_dict() 242 | 243 | if request_metadata: 244 | data["requestMetadata"] = request_metadata 245 | 246 | response = self.session.post(url=self.FILE_SCAN_SCAN_ENDPOINT.format(session_id), json=data) 247 | return response 248 | 249 | def validate_webhook(self, request_signature: str, request_timestamp: str, request_data: str) -> bool: 250 | """ 251 | Validate the integrity of webhook requests coming from Nightfall. 252 | 253 | :param request_signature: value of X-Nightfall-Signature header 254 | :type request_signature: str 255 | :param request_timestamp: value of X-Nightfall-Timestamp header 256 | :type request_timestamp: str 257 | :param request_data: request body as a unicode string 258 | Flask: request.get_data(as_text=True) 259 | Django: request.body.decode("utf-8") 260 | :type request_data: str 261 | :returns: validation status boolean 262 | """ 263 | 264 | now = datetime.now() 265 | request_datetime = datetime.fromtimestamp(int(request_timestamp)) 266 | if request_datetime < now-timedelta(minutes=5) or request_datetime > now: 267 | return False 268 | computed_signature = hmac.new( 269 | self.signing_secret.encode(), 270 | msg=F"{request_timestamp}:{request_data}".encode(), 271 | digestmod=hashlib.sha256 272 | ).hexdigest().lower() 273 | if computed_signature != request_signature: 274 | return False 275 | return True 276 | 277 | 278 | # Utility 279 | def _validate_response(response: requests.Response, expected_status_code: int): 280 | if response.status_code == expected_status_code: 281 | return 282 | response_json = response.json() 283 | error_code = response_json.get('code', None) 284 | if error_code is None: 285 | raise NightfallSystemError(response.text, 50000) 286 | if error_code < 40000 or error_code >= 50000: 287 | raise NightfallSystemError(response.text, error_code) 288 | else: 289 | raise NightfallUserError(response.text, error_code) 290 | -------------------------------------------------------------------------------- /nightfall/detection_rules.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from enum import Enum 3 | from typing import List, Optional 4 | 5 | from nightfall.exceptions import NightfallUserError 6 | 7 | 8 | @dataclass 9 | class Regex: 10 | """A RE2 regular expression and config for use with Nightfall 11 | Attributes: 12 | pattern (str): The RE2 pattern to use. 13 | is_case_sensitive (bool): Whether to make matches have the same case as the expression given 14 | """ 15 | pattern: str 16 | is_case_sensitive: bool 17 | 18 | def as_dict(self): 19 | return {"pattern": self.pattern, "isCaseSensitive": self.is_case_sensitive} 20 | 21 | 22 | @dataclass 23 | class WordList: 24 | """A list of words that can be used to customize the behavior of a detector while Nightfall performs a scan. 25 | Attributes: 26 | word_list (List[str]): The list of words to use. 27 | is_case_sensitive (bool): Whether to make matches have the same case as each word given 28 | """ 29 | word_list: List[str] 30 | is_case_sensitive: bool 31 | 32 | def as_dict(self): 33 | return {"values": self.word_list, "isCaseSensitive": self.is_case_sensitive} 34 | 35 | 36 | class Confidence(Enum): 37 | """Confidence describes the certainty that a piece of content matches a detector.""" 38 | VERY_UNLIKELY = "VERY_UNLIKELY" 39 | UNLIKELY = "UNLIKELY" 40 | POSSIBLE = "POSSIBLE" 41 | LIKELY = "LIKELY" 42 | VERY_LIKELY = "VERY_LIKELY" 43 | 44 | 45 | @dataclass 46 | class ContextRule: 47 | """An object that describes how a regular expression may be used to adjust the confidence of a candidate finding. 48 | This context rule will be applied within the provided byte proximity, and if the regular expression matches, then 49 | the confidence associated with the finding will be adjusted to the value prescribed. 50 | Attributes: 51 | regex (Regex): The regular expression configuration to run within the context of a candidate finding. 52 | window_before (int): The number of leading characters to consider as context. 53 | window_after (int): The number of trailing characters to consider as context. 54 | fixed_confidence (Confidence): How to adjust the result of the match if the context rule matches. 55 | """ 56 | regex: Regex 57 | window_before: int 58 | window_after: int 59 | fixed_confidence: Confidence 60 | 61 | def as_dict(self): 62 | return { 63 | "regex": self.regex.as_dict(), 64 | "proximity": {"windowBefore": self.window_before, "windowAfter": self.window_after}, 65 | "confidenceAdjustment": {"fixedConfidence": self.fixed_confidence.value} 66 | } 67 | 68 | 69 | class MatchType(Enum): 70 | FULL = "FULL" 71 | PARTIAL = "PARTIAL" 72 | 73 | 74 | @dataclass 75 | class ExclusionRule: 76 | """An object that describes a regular expression or list of keywords that may be used to disqualify a 77 | candidate finding from triggering a detector match. One of regex or word_list is required. 78 | Attributes: 79 | match_type (MatchType): the match type. 80 | regex (Regex or None): The regular expression configuration to run on a candidate finding. 81 | word_list (WordList or None): The list of words to compare to a candidate finding. 82 | """ 83 | match_type: MatchType 84 | regex: Optional[Regex] = None 85 | word_list: Optional[WordList] = None 86 | 87 | def __post_init__(self): 88 | if (not self.regex and not self.word_list) or (self.regex and self.word_list): 89 | raise NightfallUserError("need either regex or word_list to build an ExclusionRule", 40001) 90 | 91 | def as_dict(self): 92 | result = {"matchType": self.match_type.value} 93 | if self.regex: 94 | result["regex"] = self.regex.as_dict() 95 | result["exclusionType"] = "REGEX" 96 | if self.word_list: 97 | result["wordList"] = self.word_list.as_dict() 98 | result["exclusionType"] = "WORD_LIST" 99 | return result 100 | 101 | 102 | @dataclass 103 | class MaskConfig: 104 | """An object that specifies how findings should be masked when returned by the API. 105 | Attributes: 106 | masking_char (chr): character that will be repeated to replace the finding. 107 | This character may be a multi-byte character, but it must be exactly one codepoint. 108 | num_chars_to_leave_unmasked (int): the number of characters to leave unmasked at either the left or right of 109 | the finding when it is returned. 110 | mask_right_to_left (bool): True if num_chars_to_leave_unmasked should be on the right, False otherwise. 111 | chars_to_ignore (List[chr]): the set of characters to leave unmasked when the finding is returned. These 112 | characters may be multi-byte characters, but each entry in the array must be exactly one codepoint. 113 | """ 114 | masking_char: chr 115 | num_chars_to_leave_unmasked: int = 0 116 | mask_right_to_left: bool = False 117 | chars_to_ignore: List[chr] = field(default_factory=list) 118 | 119 | def as_dict(self): 120 | return { 121 | "maskingChar": self.masking_char, 122 | "numCharsToLeaveUnmasked": self.num_chars_to_leave_unmasked, 123 | "maskRightToLeft": self.mask_right_to_left, 124 | "charsToIgnore": self.chars_to_ignore 125 | } 126 | 127 | 128 | @dataclass 129 | class RedactionConfig: 130 | """An object that configures how any detected findings should be redacted when returned to the client. When this 131 | configuration is provided as part of a request, exactly one of the four types of redaction should be set. 132 | One of mask_config, substitution_phrase, infotype_substitution or public_key is required: 133 | - Masking: replacing the characters of a finding with another character, such as '*' or '👀' 134 | - Info Type Substitution: replacing the finding with the name of the detector it matched, such 135 | as CREDIT_CARD_NUMBER 136 | - Substitution: replacing the finding with a custom string, such as "oh no!" 137 | - Encryption: encrypting the finding with an RSA public key 138 | Attributes: 139 | remove_finding (bool): Whether the original finding should be omitted in responses from the API. 140 | mask_config (MaskConfig): Build a redaction config with masking. 141 | substitution_phrase (str or None): Build a redaction config with a substitution phrase. 142 | infotype_substitution (bool or None): Build a redaction config with info type substitution. 143 | public_key (str or None): Build a redaction config with RSA encryption. 144 | """ 145 | remove_finding: bool 146 | mask_config: Optional[MaskConfig] = None 147 | substitution_phrase: Optional[str] = None 148 | infotype_substitution: bool = False 149 | public_key: Optional[str] = None 150 | 151 | def __post_init__(self): 152 | config_counts = [self.mask_config, self.substitution_phrase, self.public_key].count(None) 153 | if (self.infotype_substitution and config_counts != 3) or \ 154 | (config_counts != 2 and not self.infotype_substitution): 155 | raise NightfallUserError("need one of mask_config, substitution_phrase, infotype_substitution," 156 | " or public_key", 40001) 157 | 158 | def as_dict(self): 159 | result = {"removeFinding": self.remove_finding} 160 | if self.mask_config: 161 | result["maskConfig"] = self.mask_config.as_dict() 162 | if self.substitution_phrase: 163 | result["substitutionConfig"] = {"substitutionPhrase": self.substitution_phrase} 164 | if self.infotype_substitution: 165 | result["infoTypeSubstitutionConfig"] = {} 166 | if self.public_key: 167 | result["cryptoConfig"] = {"publicKey": self.public_key} 168 | return result 169 | 170 | 171 | @dataclass 172 | class Detector: 173 | """An object that represents a data type or category of information. Detectors are used to scan content 174 | for findings. One of nightfall_detector, regex, word_list or uuid required. 175 | Attributes: 176 | min_confidence (Confidence): The minimum confidence threshold for the detector trigger a finding. 177 | min_num_findings (int): The minimum number of occurrences of the detector required to trigger a finding. 178 | nightfall_detector (str or None): Create an instance of a detector based on a pre-built Nightfall detector. 179 | regex (Regex or None): Create an instance of a detector based on a regular expression. 180 | word_list (WordList or None): Create an instance of a detector based on a word list. 181 | uuid (str or None): Create an instance of a detector by using an existing detector's UUID. 182 | display_name (str or None): A display name for this detector. 183 | context_rules (List[ContextRule] or None): The context rules to use to customize the behavior of this detector. 184 | exclusion_rules (List[ExclusionRule] or None): The exclusion rules to use to customize the behavior of this 185 | detector. 186 | redaction_config (RedactionConfig or None): The redaction configuration to-be-applied to this detector. 187 | This configuration is currently only supported for scanning plaintext, not for file scanning. 188 | """ 189 | min_confidence: Confidence 190 | min_num_findings: int = 1 191 | nightfall_detector: Optional[str] = None 192 | regex: Optional[Regex] = None 193 | word_list: Optional[WordList] = None 194 | uuid: Optional[str] = None 195 | display_name: Optional[str] = None 196 | context_rules: Optional[List[ContextRule]] = None 197 | exclusion_rules: Optional[List[ExclusionRule]] = None 198 | redaction_config: Optional[RedactionConfig] = None 199 | 200 | def __post_init__(self): 201 | if [self.nightfall_detector, self.regex, self.word_list, self.uuid].count(None) != 3: 202 | raise NightfallUserError("need one of nightfall_detector, regex, word_list, or uuid", 40001) 203 | 204 | def as_dict(self): 205 | result = {"minConfidence": self.min_confidence.value, "minNumFindings": self.min_num_findings} 206 | if self.nightfall_detector: 207 | result["nightfallDetector"] = self.nightfall_detector 208 | result["detectorType"] = "NIGHTFALL_DETECTOR" 209 | if self.regex: 210 | result["regex"] = self.regex.as_dict() 211 | result["detectorType"] = "REGEX" 212 | if self.word_list: 213 | result["wordList"] = self.word_list.as_dict() 214 | result["detectorType"] = "WORD_LIST" 215 | if self.uuid: 216 | result["detectorUUID"] = self.uuid 217 | if self.display_name: 218 | result["displayName"] = self.display_name 219 | if self.context_rules: 220 | result["contextRules"] = [c.as_dict() for c in self.context_rules] 221 | if self.exclusion_rules: 222 | result["exclusionRules"] = [e.as_dict() for e in self.exclusion_rules] 223 | if self.redaction_config: 224 | result["redactionConfig"] = self.redaction_config.as_dict() 225 | return result 226 | 227 | 228 | class LogicalOp(Enum): 229 | """ A modifier that is used to decide when a finding should be surfaced in the context of a detection rule. 230 | - When ALL is specified, all detectors in a detection rule must trigger a match in order for the finding to be 231 | reported. This is the equivalent of a logical "AND" operator. 232 | - When ANY is specified, only one of the detectors in a detection rule must trigger a match in order for the finding 233 | to be reported. This is the equivalent of a logical "OR" operator. 234 | """ 235 | ANY = "ANY" 236 | ALL = "ALL" 237 | 238 | 239 | @dataclass 240 | class DetectionRule: 241 | """An object that contains a set of detectors to be used when scanning content. 242 | Attributes: 243 | detectors (List[Detector]): A list of Detectors to scan content with. 244 | logical_op (LogicalOp): The method for combining the detectors. One of: 245 | - LogicalOp.ANY (logical or, i.e. a finding is emitted only if any of the provided detectors match) 246 | - LogicalOp.ALL (logical and, i.e. a finding is emitted only if all provided detectors match) 247 | name (str): The name of the detection rule. 248 | """ 249 | detectors: List[Detector] 250 | logical_op: LogicalOp = LogicalOp.ANY 251 | name: Optional[str] = None 252 | 253 | def as_dict(self): 254 | result = {"detectors": [d.as_dict() for d in self.detectors], "logicalOp": self.logical_op.value} 255 | if self.name: 256 | result["name"] = self.name 257 | return result 258 | -------------------------------------------------------------------------------- /nightfall/exceptions.py: -------------------------------------------------------------------------------- 1 | class NightfallError(Exception): 2 | def __init__(self, message, error_code): 3 | super().__init__(message) 4 | self.message = message 5 | self.error_code = error_code 6 | 7 | def __str__(self): 8 | return f"{str(self.error_code)}: {self.message}" 9 | 10 | 11 | class NightfallUserError(NightfallError): 12 | pass 13 | 14 | 15 | class NightfallSystemError(NightfallError): 16 | pass 17 | -------------------------------------------------------------------------------- /nightfall/findings.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Any 2 | from dataclasses import dataclass 3 | 4 | from nightfall.detection_rules import Confidence 5 | 6 | 7 | @dataclass 8 | class Range: 9 | """An object representing where a finding was discovered in content. 10 | :param start: The start of the range. 11 | :type start: int 12 | :param end: The end of the range. 13 | :type end: int 14 | """ 15 | start: int 16 | end: int 17 | 18 | 19 | @dataclass 20 | class Finding: 21 | """An object representing an occurrence of a configured detector (i.e. finding) in the provided data. 22 | Attributes: 23 | finding (str): The data that triggered a detector match. 24 | redacted_finding (str): The redacted finding if redaction was configured, None otherwise. 25 | before_context (str): The data that immediately preceded the finding if configured, None otherwise. 26 | after_context (str): The data that immediately succeeded the finding if configured, None otherwise. 27 | detector_name (str): The the name of the detector, if configured, None otherwise. 28 | detector_uuid (str): The ID that uniquely identifies this detector. 29 | confidence (Confidence): The confidence that the data contained in Finding is an instance of the matched 30 | detector. 31 | byte_range (Range): The byte range in which a finding was detected within the item. 32 | codepoint_range (Range): The codepoint range in which a finding was detected within the item. This differs 33 | from byte range since a codepoint may contain multiple bytes. 34 | row_range (Range): The row in which a finding was detected, if it was in a tabular document. Index starts at 1. 35 | column_range (Range): The column(s) in which a finding was detected, if it was in a tabular document. Index starts at 1. 36 | commit_hash (str): The hash of the commit in which the finding was detected, if known. 37 | commit_author(str): The author of the commit in which the finding was detected, if known. 38 | matched_detection_rule_uuids (List[str]): The list of detection rule UUIDs that contained a detector that 39 | triggered a match. 40 | matched_detection_rules (List[str]): The list of inline detection rules that contained a detector that triggered 41 | a match. 42 | """ 43 | finding: str 44 | redacted_finding: Optional[str] 45 | before_context: Optional[str] 46 | after_context: Optional[str] 47 | detector_name: Optional[str] 48 | detector_uuid: str 49 | confidence: Confidence 50 | byte_range: Range 51 | codepoint_range: Range 52 | row_range: Optional[Range] 53 | column_range: Optional[Range] 54 | commit_hash: str 55 | commit_author: str 56 | matched_detection_rule_uuids: List[str] 57 | matched_detection_rules: List[str] 58 | 59 | @classmethod 60 | def from_dict(cls, resp: dict) -> "Finding": 61 | return cls( 62 | resp["finding"], 63 | resp.get("redactedFinding"), 64 | resp.get("beforeContext"), 65 | resp.get("afterContext"), 66 | resp["detector"].get("name"), 67 | resp["detector"].get("uuid"), 68 | Confidence[resp["confidence"]], 69 | Range(resp["location"]["byteRange"]["start"], resp["location"]["byteRange"]["end"]), 70 | Range(resp["location"]["codepointRange"]["start"], resp["location"]["codepointRange"]["end"]), 71 | _range_or_none(resp["location"]["rowRange"]), 72 | _range_or_none(resp["location"]["columnRange"]), 73 | resp["location"].get("commitHash", ""), 74 | resp["location"].get("commitAuthor", ""), 75 | resp["matchedDetectionRuleUUIDs"], 76 | resp["matchedDetectionRules"] 77 | ) 78 | 79 | def _range_or_none(range_or_none: Any) -> Optional[Range]: 80 | """Some ranges are not always present, this function returns either None or a Range.""" 81 | if range_or_none is None: 82 | return None 83 | start = range_or_none["start"] 84 | end = range_or_none["end"] 85 | return Range(start, end) 86 | 87 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = 3 | filetest: marks tests as requiring a valid webhook to run 4 | integration: marks tests as calling out to the nightfall api to run 5 | 6 | addopts = --doctest-glob=README.md 7 | doctest_optionflags = ELLIPSIS 8 | 9 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | 4 | def readme(): 5 | """Return long description from file.""" 6 | with open('README.md') as f: 7 | return f.read() 8 | 9 | 10 | setup( 11 | name="nightfall", 12 | version="1.4.1", 13 | description="Python SDK for Nightfall", 14 | long_description=readme(), 15 | long_description_content_type="text/markdown", 16 | url="https://github.com/nightfallai/nightfall-python-sdk", 17 | author="Nightfall", 18 | author_email="support@nightfall.ai", 19 | license="MIT", 20 | classifiers=[ 21 | "Development Status :: 5 - Production/Stable", 22 | "Intended Audience :: Developers", 23 | "Intended Audience :: System Administrators", 24 | "License :: OSI Approved :: MIT License", 25 | "Operating System :: OS Independent", 26 | "Topic :: Software Development :: Build Tools", 27 | "Topic :: Software Development :: Libraries :: Python Modules", 28 | "Topic :: Internet", 29 | "Programming Language :: Python :: 3 :: Only", 30 | ], 31 | keywords='nightfall dlp api sdk', 32 | packages=find_packages(exclude=['tests*']), 33 | install_requires=[ 34 | 'requests', 35 | 'urllib3' 36 | ], 37 | python_requires='~=3.7' 38 | ) 39 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nightfallai/nightfall-python-sdk/bf1b734cde0336fd29ea56d207375966dfafd027/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from freezegun import freeze_time 5 | import pytest 6 | import responses 7 | import time 8 | 9 | from nightfall.api import Nightfall, NightfallUserError 10 | from nightfall.detection_rules import DetectionRule, Detector, LogicalOp, Confidence, ExclusionRule, ContextRule, \ 11 | WordList, MatchType, RedactionConfig, MaskConfig, Regex 12 | from nightfall.findings import Finding, Range 13 | 14 | 15 | @pytest.fixture 16 | def nightfall(): 17 | yield Nightfall(os.environ['NIGHTFALL_API_KEY']) 18 | 19 | 20 | @pytest.mark.integration 21 | def test_scan_text_detection_rules_v3(nightfall): 22 | result, redactions = nightfall.scan_text( 23 | ["4916-6734-7572-5015 is my credit card number, 489-36-8350 ssn"], 24 | detection_rules=[ 25 | DetectionRule(logical_op=LogicalOp.ANY, detectors=[ 26 | Detector(min_confidence=Confidence.LIKELY, 27 | min_num_findings=1, 28 | display_name="Credit Card Number", 29 | nightfall_detector="CREDIT_CARD_NUMBER", 30 | context_rules=[ContextRule(regex=Regex("fake regex", is_case_sensitive=False), 31 | window_before=10, window_after=10, 32 | fixed_confidence=Confidence.VERY_UNLIKELY)], 33 | exclusion_rules=[ExclusionRule(MatchType.FULL, 34 | word_list=WordList(["never", "match"], 35 | is_case_sensitive=True))], 36 | redaction_config=RedactionConfig(remove_finding=False, 37 | mask_config=MaskConfig(masking_char='👀', 38 | num_chars_to_leave_unmasked=3, 39 | chars_to_ignore=["-"])), 40 | ), 41 | Detector(min_confidence=Confidence.LIKELY, nightfall_detector="US_SOCIAL_SECURITY_NUMBER")])], 42 | context_bytes=10, 43 | default_redaction_config=RedactionConfig(remove_finding=False, substitution_phrase="[REDACTED]") 44 | ) 45 | 46 | assert len(result) == 1 47 | assert len(result[0]) == 2 48 | 49 | def finding_orderer(f): 50 | return f.codepoint_range.start 51 | 52 | result[0].sort(key=finding_orderer) 53 | assert result[0][0] == Finding( 54 | "4916-6734-7572-5015", 55 | "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀", 56 | None, " is my cre", 57 | "Credit Card Number", 58 | result[0][0].detector_uuid, 59 | Confidence.VERY_LIKELY, 60 | Range(0, 19), Range(0, 19), None, None, "", "", 61 | [], ["Inline Detection Rule #1"]) 62 | assert result[0][1] == Finding( 63 | "489-36-8350", 64 | "[REDACTED]", 65 | "d number, ", " ssn", 66 | "US_SOCIAL_SECURITY_NUMBER", 67 | result[0][1].detector_uuid, 68 | Confidence.VERY_LIKELY, 69 | Range(46, 57), Range(46, 57), None, None, "", "", 70 | [], ["Inline Detection Rule #1"]) 71 | assert len(redactions) == 1 72 | assert redactions[0] == "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number, [REDACTED] ssn" 73 | 74 | 75 | @pytest.mark.filetest 76 | @pytest.mark.integration 77 | def test_scan_file_detection_rules(nightfall, tmpdir): 78 | file = tmpdir.mkdir("test_data").join("file.txt") 79 | 80 | file.write("4916-6734-7572-5015 is my credit card number") 81 | 82 | id, message = nightfall.scan_file( 83 | file, 84 | os.environ['WEBHOOK_ENDPOINT'], 85 | detection_rules=[DetectionRule(logical_op=LogicalOp.ANY, detectors=[ 86 | Detector(min_confidence=Confidence.LIKELY, min_num_findings=1, 87 | display_name="Credit Card Number", nightfall_detector="CREDIT_CARD_NUMBER")])] 88 | ) 89 | 90 | assert id is not None 91 | assert message == 'scan initiated' 92 | 93 | 94 | @responses.activate 95 | def test_scan_text(): 96 | nightfall = Nightfall("NF-NOT_REAL") 97 | responses.add(responses.POST, 'https://api.nightfall.ai/v3/scan', 98 | json={ 99 | "findings": 100 | [ 101 | [ 102 | { 103 | "finding": "4916-6734-7572-5015", 104 | "redactedFinding": "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀", 105 | "afterContext": " is my cre", 106 | "detector": 107 | { 108 | "name": "Credit Card Number", 109 | "uuid": "74c1815e-c0c3-4df5-8b1e-6cf98864a454" 110 | }, 111 | "confidence": "VERY_LIKELY", 112 | "location": 113 | { 114 | "byteRange": 115 | { 116 | "start": 0, 117 | "end": 19 118 | }, 119 | "codepointRange": 120 | { 121 | "start": 0, 122 | "end": 19 123 | }, 124 | "rowRange": None, 125 | "columnRange": None, 126 | }, 127 | "redactedLocation": 128 | { 129 | "byteRange": 130 | { 131 | "start": 0, 132 | "end": 19 133 | }, 134 | "codepointRange": 135 | { 136 | "start": 0, 137 | "end": 19 138 | }, 139 | "rowRange": None, 140 | "columnRange": None, 141 | }, 142 | "matchedDetectionRuleUUIDs": 143 | [], 144 | "matchedDetectionRules": 145 | [ 146 | "Inline Detection Rule #1" 147 | ] 148 | }, 149 | { 150 | "finding": "489-36-8350", 151 | "redactedFinding": "[REDACTED]", 152 | "beforeContext": "d number, ", 153 | "afterContext": " ssn", 154 | "detector": 155 | { 156 | "name": "", 157 | "uuid": "e30d9a87-f6c7-46b9-a8f4-16547901e069" 158 | }, 159 | "confidence": "VERY_LIKELY", 160 | "location": 161 | { 162 | "byteRange": 163 | { 164 | "start": 46, 165 | "end": 57 166 | }, 167 | "codepointRange": 168 | { 169 | "start": 46, 170 | "end": 57 171 | }, 172 | "rowRange": 173 | { 174 | "start": 2, 175 | "end": 4, 176 | }, 177 | "columnRange": 178 | { 179 | "start": 1, 180 | "end": 1, 181 | }, 182 | }, 183 | "redactedLocation": 184 | { 185 | "byteRange": 186 | { 187 | "start": 46, 188 | "end": 56 189 | }, 190 | "codepointRange": 191 | { 192 | "start": 46, 193 | "end": 56 194 | }, 195 | "rowRange": None, 196 | "columnRange": None, 197 | }, 198 | "matchedDetectionRuleUUIDs": 199 | [], 200 | "matchedDetectionRules": 201 | [ 202 | "Inline Detection Rule #1" 203 | ] 204 | } 205 | ] 206 | ], 207 | "redactedPayload": 208 | [ 209 | "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number, [REDACTED] ssn" 210 | ] 211 | }) 212 | result, redactions = nightfall.scan_text( 213 | ["4916-6734-7572-5015 is my credit card number, 489-36-8350 ssn"], 214 | detection_rules=[ 215 | DetectionRule(logical_op=LogicalOp.ANY, detectors=[ 216 | Detector(min_confidence=Confidence.LIKELY, 217 | min_num_findings=1, 218 | display_name="Credit Card Number", 219 | nightfall_detector="CREDIT_CARD_NUMBER", 220 | context_rules=[ContextRule(regex=Regex("fake regex", is_case_sensitive=False), 221 | window_before=10, window_after=10, 222 | fixed_confidence=Confidence.VERY_UNLIKELY)], 223 | exclusion_rules=[ExclusionRule(MatchType.FULL, 224 | word_list=WordList(["never", "match"], 225 | is_case_sensitive=True))], 226 | redaction_config=RedactionConfig(remove_finding=False, 227 | mask_config=MaskConfig(masking_char='👀', 228 | num_chars_to_leave_unmasked=3, 229 | chars_to_ignore=["-"])), 230 | ), 231 | Detector(min_confidence=Confidence.LIKELY, nightfall_detector="US_SOCIAL_SECURITY_NUMBER")])], 232 | context_bytes=10, 233 | default_redaction_config=RedactionConfig(remove_finding=False, substitution_phrase="[REDACTED]") 234 | ) 235 | 236 | assert len(responses.calls) == 1 237 | assert responses.calls[0].request.headers.get("Authorization") == "Bearer NF-NOT_REAL" 238 | assert json.loads(responses.calls[0].request.body) == { 239 | "payload": 240 | [ 241 | "4916-6734-7572-5015 is my credit card number, 489-36-8350 ssn" 242 | ], 243 | "policy": 244 | { 245 | "detectionRules": 246 | [ 247 | { 248 | "detectors": 249 | [ 250 | { 251 | "minConfidence": "LIKELY", 252 | "minNumFindings": 1, 253 | "nightfallDetector": "CREDIT_CARD_NUMBER", 254 | "detectorType": "NIGHTFALL_DETECTOR", 255 | "displayName": "Credit Card Number", 256 | "contextRules": 257 | [ 258 | { 259 | "regex": 260 | { 261 | "pattern": "fake regex", 262 | "isCaseSensitive": False 263 | }, 264 | "proximity": 265 | { 266 | "windowBefore": 10, 267 | "windowAfter": 10 268 | }, 269 | "confidenceAdjustment": 270 | { 271 | "fixedConfidence": "VERY_UNLIKELY" 272 | } 273 | } 274 | ], 275 | "exclusionRules": 276 | [ 277 | { 278 | "matchType": "FULL", 279 | "wordList": 280 | { 281 | "values": 282 | [ 283 | "never", 284 | "match" 285 | ], 286 | "isCaseSensitive": True 287 | }, 288 | "exclusionType": "WORD_LIST" 289 | } 290 | ], 291 | "redactionConfig": 292 | { 293 | "removeFinding": False, 294 | "maskConfig": 295 | { 296 | "maskingChar": "👀", 297 | "numCharsToLeaveUnmasked": 3, 298 | "maskRightToLeft": False, 299 | "charsToIgnore": 300 | [ 301 | "-" 302 | ] 303 | } 304 | } 305 | }, 306 | { 307 | "minConfidence": "LIKELY", 308 | "minNumFindings": 1, 309 | "nightfallDetector": "US_SOCIAL_SECURITY_NUMBER", 310 | "detectorType": "NIGHTFALL_DETECTOR" 311 | } 312 | ], 313 | "logicalOp": "ANY" 314 | } 315 | ], 316 | "contextBytes": 10, 317 | "defaultRedactionConfig": 318 | { 319 | "removeFinding": False, 320 | "substitutionConfig": 321 | { 322 | "substitutionPhrase": "[REDACTED]" 323 | } 324 | } 325 | } 326 | } 327 | 328 | assert len(result) == 1 329 | assert len(result[0]) == 2 330 | assert result[0][0] == Finding( 331 | "4916-6734-7572-5015", 332 | '491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀', 333 | None, " is my cre", 334 | "Credit Card Number", 335 | result[0][0].detector_uuid, 336 | Confidence.VERY_LIKELY, 337 | Range(0, 19), Range(0, 19), None, None, "", "", 338 | [], ["Inline Detection Rule #1"]) 339 | assert result[0][1] == Finding( 340 | "489-36-8350", 341 | "[REDACTED]", 342 | "d number, ", " ssn", 343 | "", 344 | result[0][1].detector_uuid, 345 | Confidence.VERY_LIKELY, 346 | Range(46, 57), Range(46, 57), Range(2,4), Range(1,1), "", "", 347 | [], ["Inline Detection Rule #1"]) 348 | assert len(redactions) == 1 349 | assert redactions[0] == "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number, [REDACTED] ssn" 350 | 351 | @responses.activate 352 | def test_scan_text_with_policy_uuids(): 353 | nightfall = Nightfall("NF-NOT_REAL") 354 | responses.add(responses.POST, 'https://api.nightfall.ai/v3/scan', 355 | json={ 356 | "findings": 357 | [ 358 | [ 359 | { 360 | "finding": "4916-6734-7572-5015", 361 | "redactedFinding": "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀", 362 | "afterContext": " is my cre", 363 | "detector": 364 | { 365 | "name": "Credit Card Number", 366 | "uuid": "74c1815e-c0c3-4df5-8b1e-6cf98864a454" 367 | }, 368 | "confidence": "VERY_LIKELY", 369 | "location": 370 | { 371 | "byteRange": 372 | { 373 | "start": 0, 374 | "end": 19 375 | }, 376 | "codepointRange": 377 | { 378 | "start": 0, 379 | "end": 19 380 | }, 381 | "rowRange": None, 382 | "columnRange": None, 383 | }, 384 | "redactedLocation": 385 | { 386 | "byteRange": 387 | { 388 | "start": 0, 389 | "end": 19 390 | }, 391 | "codepointRange": 392 | { 393 | "start": 0, 394 | "end": 19 395 | }, 396 | "rowRange": None, 397 | "columnRange": None, 398 | }, 399 | "matchedDetectionRuleUUIDs": 400 | ["0d8efd7b-b87a-478b-984e-9cf5534a46bc"], 401 | "matchedDetectionRules": 402 | [] 403 | }, 404 | ] 405 | ], 406 | "redactedPayload": 407 | [ 408 | "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number, [REDACTED] ssn" 409 | ] 410 | }) 411 | result, redactions = nightfall.scan_text( 412 | ["4916-6734-7572-5015 is my credit card number, 489-36-8350 ssn"], 413 | policy_uuids=["2388f83f-cd31-4689-971b-4ee94f798281"] 414 | ) 415 | 416 | assert len(responses.calls) == 1 417 | assert responses.calls[0].request.headers.get("Authorization") == "Bearer NF-NOT_REAL" 418 | assert json.loads(responses.calls[0].request.body) == { 419 | "payload": 420 | [ 421 | "4916-6734-7572-5015 is my credit card number, 489-36-8350 ssn" 422 | ], 423 | "policyUUIDs": ["2388f83f-cd31-4689-971b-4ee94f798281"] 424 | } 425 | 426 | assert len(result) == 1 427 | assert len(result[0]) == 1 428 | assert result[0][0] == Finding( 429 | "4916-6734-7572-5015", 430 | '491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀', 431 | None, " is my cre", 432 | "Credit Card Number", 433 | result[0][0].detector_uuid, 434 | Confidence.VERY_LIKELY, 435 | Range(0, 19), Range(0, 19), None, None, "", "", 436 | ["0d8efd7b-b87a-478b-984e-9cf5534a46bc"], []) 437 | assert len(redactions) == 1 438 | assert redactions[0] == "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number, [REDACTED] ssn" 439 | 440 | def test_scan_text_no_detection_rules_or_policy_uuids(): 441 | nightfall = Nightfall("NF-NOT_REAL") 442 | with pytest.raises(NightfallUserError): 443 | nightfall.scan_text(texts=["will", "fail"]) 444 | 445 | 446 | @responses.activate 447 | def test_scan_file(tmpdir): 448 | file = tmpdir.mkdir("test_data").join("file.txt") 449 | 450 | file.write("4916-6734-7572-5015 is my credit card number") 451 | 452 | nightfall = Nightfall("NF-NOT_REAL") 453 | responses.add(responses.POST, 'https://api.nightfall.ai/v3/upload', status=200, json={"id": 1, "chunkSize": 22}) 454 | responses.add(responses.PATCH, 'https://api.nightfall.ai/v3/upload/1', status=204) 455 | responses.add(responses.POST, 'https://api.nightfall.ai/v3/upload/1/finish', status=200) 456 | responses.add(responses.POST, 'https://api.nightfall.ai/v3/upload/1/scan', status=200, 457 | json={"id": 1, "message": "scan_started"}) 458 | 459 | id, message = nightfall.scan_file(file, "https://my-website.example/callback", detection_rule_uuids=["a_uuid"], 460 | request_metadata="some test data") 461 | 462 | assert len(responses.calls) == 5 463 | for call in responses.calls: 464 | assert call.request.headers.get("Authorization") == "Bearer NF-NOT_REAL" 465 | 466 | assert responses.calls[0].request.body == b'{"fileSizeBytes": 44}' 467 | assert responses.calls[1].request.body == b"4916-6734-7572-5015 is" 468 | assert responses.calls[1].request.headers.get("X-UPLOAD-OFFSET") == '0' 469 | assert responses.calls[2].request.body == b" my credit card number" 470 | assert responses.calls[2].request.headers.get("X-UPLOAD-OFFSET") == '22' 471 | assert responses.calls[4].request.body == b'{"policy": {"webhookURL": "https://my-website.example/callback", ' \ 472 | b'"detectionRuleUUIDs": ["a_uuid"]}, "requestMetadata": "some test data"}' 473 | assert id == 1 474 | assert message == "scan_started" 475 | 476 | 477 | @responses.activate 478 | def test_file_scan_upload_short(tmpdir): 479 | file = tmpdir.mkdir("test_data").join("file.txt") 480 | 481 | file.write("4916-6734-7572-5015 is my credit card number") 482 | 483 | nightfall = Nightfall("NF-NOT_REAL") 484 | 485 | responses.add(responses.PATCH, 'https://api.nightfall.ai/v3/upload/1', status=204) 486 | 487 | assert nightfall._file_scan_upload(1, file, 200) 488 | assert len(responses.calls) == 1 489 | assert responses.calls[0].request.headers.get("Authorization") == "Bearer NF-NOT_REAL" 490 | assert responses.calls[0].request.body == b"4916-6734-7572-5015 is my credit card number" 491 | assert responses.calls[0].request.headers.get("X-UPLOAD-OFFSET") == "0" 492 | 493 | 494 | @responses.activate 495 | def test_file_scan_upload_long(tmpdir): 496 | file = tmpdir.mkdir("test_data").join("file.txt") 497 | test_str = b"4916-6734-7572-5015 is my credit card number" 498 | file.write_binary(test_str) 499 | 500 | responses.add(responses.PATCH, 'https://api.nightfall.ai/v3/upload/1', status=204) 501 | 502 | nightfall = Nightfall("NF-NOT_REAL") 503 | 504 | assert nightfall._file_scan_upload(1, file, 1) 505 | assert len(responses.calls) == 44 506 | for i, call in enumerate(responses.calls): 507 | assert call.request.headers.get("Authorization") == "Bearer NF-NOT_REAL" 508 | assert call.request.body.decode('utf-8') == test_str.decode('utf-8')[i] 509 | assert call.request.headers.get("X-UPLOAD-OFFSET") == str(i) 510 | 511 | 512 | @freeze_time("2021-10-04T17:30:50Z") 513 | def test_validate_webhook(nightfall): 514 | nightfall.signing_secret = "super-secret-shhhh" 515 | timestamp = 1633368645 516 | body = "hello world foo bar goodnight moon" 517 | expected = "1bb7619a9504474ffc14086d0423ad15db42606d3ca52afccb4a5b2125d7b703" 518 | assert nightfall.validate_webhook(expected, timestamp, body) 519 | 520 | 521 | @freeze_time("2021-10-04T19:30:50Z") 522 | def test_validate_webhook_too_old(nightfall): 523 | nightfall.signing_secret = "super-secret-shhhh" 524 | timestamp = 1633368645 525 | body = "hello world foo bar goodnight moon" 526 | expected = "1bb7619a9504474ffc14086d0423ad15db42606d3ca52afccb4a5b2125d7b703" 527 | assert not nightfall.validate_webhook(expected, timestamp, body) 528 | 529 | 530 | @freeze_time("2021-10-04T17:30:50Z") 531 | def test_validate_webhook_incorrect_sig(nightfall): 532 | nightfall.signing_secret = "super-secret-shhhh" 533 | timestamp = 1633368645 534 | body = "hello world foo bar goodnight moon" 535 | expected = "not matching" 536 | assert not nightfall.validate_webhook(expected, timestamp, body) 537 | --------------------------------------------------------------------------------