├── .bandit ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── documentation-issue-report.md │ └── feature_request.md ├── dependabot.yml └── workflows │ ├── bandit.yml │ ├── black.yml │ ├── build.yml │ ├── docs-build.yml │ ├── docs-deploy.yml │ ├── mypy.yml │ ├── publish.yml │ └── test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── SECURITY.md ├── docs ├── Makefile ├── README.md ├── docs │ ├── CNAME │ ├── about-us.md │ ├── faq.md │ ├── getting-started │ │ ├── cli.md │ │ ├── cloud-services │ │ │ ├── azure-ml-notebooks.md │ │ │ ├── images │ │ │ │ ├── azure-jle-1.png │ │ │ │ ├── azure-jle-3.png │ │ │ │ ├── azure-jle-4.png │ │ │ │ ├── azure-jle-5.png │ │ │ │ ├── sm-notebook-1.png │ │ │ │ ├── sm-notebook-2.png │ │ │ │ ├── sm-notebook-3.png │ │ │ │ ├── sm-notebook-4.png │ │ │ │ ├── sm-notebook-5.png │ │ │ │ └── studio-lab-screenshot.png │ │ │ ├── index.md │ │ │ ├── sage-maker-notebook-instances.md │ │ │ ├── sage-maker-studio-lab.md │ │ │ └── sage-maker-studio.md │ │ ├── index.md │ │ └── jupyter-lab-extension.md │ ├── imgs │ │ ├── cli-help-message.png │ │ ├── focus-cell-from-issue.mp4 │ │ ├── html-scan-report.png │ │ ├── html-scan-with-errors.png │ │ ├── jle-initial-setup.png │ │ ├── jupyter-extension-example.mp4 │ │ ├── jupyter-settings.mp4 │ │ ├── open-nb-defense-panel.mp4 │ │ ├── run-scan-from-toolbar.mp4 │ │ ├── show-contextual-help-toggle.mp4 │ │ ├── show-dropdowns-toggle.mp4 │ │ └── side-panel-with-results.png │ ├── index.md │ ├── robots.txt │ ├── scan-settings │ │ ├── cli-flags.md │ │ ├── cli-settings.md │ │ ├── index.md │ │ └── jupyterlab-settings.md │ └── supported-scans │ │ ├── detecting-CVEs.md │ │ ├── detecting-PII.md │ │ ├── detecting-licenses.md │ │ ├── detecting-secrets.md │ │ └── images │ │ ├── nbd-cli-cve-results.png │ │ ├── nbd-cli-license-results.png │ │ ├── nbd-jle-cve-results.png │ │ ├── nbd-jle-license-results.png │ │ └── nbd-jle-scan-pii-found.png ├── mkdocs.yml ├── overrides │ ├── .icons │ │ ├── social-discussion.svg │ │ ├── social-github.svg │ │ ├── social-linkedin.svg │ │ └── social-slack.svg │ ├── assets │ │ ├── css │ │ │ ├── extra.css │ │ │ └── landing.css │ │ ├── fonts │ │ │ ├── SF-Pro-Display-Bold.woff │ │ │ ├── lg.svg │ │ │ ├── lg.ttf │ │ │ ├── lg.woff │ │ │ ├── sf-pro-text-bold.woff │ │ │ └── sf-pro-text-regular.woff │ │ ├── imgs │ │ │ ├── background.png │ │ │ ├── branch-bottom.svg │ │ │ ├── circle-arrow.svg │ │ │ ├── favicon.svg │ │ │ ├── feature-image-customizable.svg │ │ │ ├── feature-image-cve.svg │ │ │ ├── feature-image-jle.svg │ │ │ ├── feature-image-repository-scan.svg │ │ │ ├── footer-logo.svg │ │ │ ├── git-contributors.svg │ │ │ ├── git-created.svg │ │ │ ├── git-updated.svg │ │ │ ├── header-image.svg │ │ │ ├── loading.gif │ │ │ ├── top-logo.svg │ │ │ ├── vulnerability-cve.svg │ │ │ ├── vulnerability-pii.svg │ │ │ ├── vulnerability-secrets.svg │ │ │ ├── vulnerability-third-party.svg │ │ │ └── world-back.svg │ │ └── js │ │ │ └── timeago_mkdocs.js │ ├── home.html │ ├── main.html │ └── partials │ │ ├── content.html │ │ ├── copyright.html │ │ ├── footer.html │ │ ├── header.html │ │ ├── logo.html │ │ └── source-file.html ├── poetry.lock └── pyproject.toml ├── nbdefense ├── __init__.py ├── _version.py ├── cli.py ├── codebase.py ├── constants.py ├── dependencies.py ├── errors.py ├── issues.py ├── nbdefense.py ├── notebook.py ├── plugins │ ├── __init__.py │ ├── cve │ │ ├── cve_dependency_file_plugin.py │ │ ├── cve_notebooks_plugin.py │ │ └── cve_plugin.py │ ├── licenses │ │ ├── license_plugin.py │ │ ├── license_plugin_settings.py │ │ ├── licenses_dependency_file_plugin.py │ │ └── licenses_notebooks_plugin.py │ ├── pii.py │ ├── plugin.py │ └── secrets.py ├── reports.py ├── settings.py ├── templates │ ├── errors.html │ ├── files-scanned-dialog.html │ ├── footer.html │ ├── header.html │ ├── icons │ │ ├── alert-icon-critical.svg │ │ ├── alert-icon-high.svg │ │ ├── alert-icon-low.svg │ │ ├── alert-icon-medium.svg │ │ ├── header-github.svg │ │ └── x.svg │ ├── issue-codes │ │ ├── dependency-file.html │ │ ├── license-not-found-dep-file.html │ │ ├── pii-found.html │ │ ├── secrets.html │ │ ├── unapproved-license-dep-file.html │ │ └── vulnerable-dependency-dep-file.html │ ├── issue.html │ ├── issues.html │ ├── navbar.html │ ├── no-issues.html │ ├── report.html │ ├── scripts.html │ └── summary-card.html ├── templating.py ├── tools.py └── utils.py ├── poetry.lock ├── pyproject.toml └── tests ├── __init__.py ├── conftest.py ├── default_settings.py ├── mock_notebooks ├── fixtures.py └── mock_notebook.py ├── plugin_tests ├── cve │ ├── fixtures.py │ ├── mock_files │ │ └── test-cve.ipynb │ ├── test_cve_dependency_file_plugin.py │ └── test_cve_notebook_plugin.py ├── licenses │ ├── fixtures.py │ ├── mock_files │ │ └── test-license.ipynb │ ├── test_license_dependency_file_plugin.py │ ├── test_license_notebook_plugin.py │ └── test_license_plugin.py ├── test_pii.py └── test_secrets.py ├── test_codebase.py ├── test_issues.py └── test_notebook.py /.bandit: -------------------------------------------------------------------------------- 1 | # FILE: .bandit 2 | [bandit] 3 | exclude = ./tests/ -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[A Few Words Describing the Bug]" 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Additional context** 32 | Add any other context about the problem here. 33 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation-issue-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Documentation issue report 3 | about: Create a report to help us improve our documentation 4 | title: "[A Few Words Describing the Issue]" 5 | labels: bug, documentation 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the issue** 11 | A clear and concise description of what the issue is. 12 | 13 | **Relevant page** 14 | Link the page that should be addressed 15 | 16 | **Expected behavior/text** 17 | A clear and concise description of what you expected to see in the documentation. 18 | 19 | **Screenshots** 20 | If applicable, add screenshots to help explain your problem. 21 | 22 | **Desktop (please complete the following information):** 23 | - OS: [e.g. iOS] 24 | - Browser [e.g. chrome, safari] 25 | - Version [e.g. 22] 26 | 27 | **Smartphone (please complete the following information):** 28 | - Device: [e.g. iPhone6] 29 | - OS: [e.g. iOS8.1] 30 | - Browser [e.g. stock browser, safari] 31 | - Version [e.g. 22] 32 | 33 | **Additional context** 34 | Add any other context about the problem here. 35 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for NB Defense 4 | title: "[A Few Words Describing the Feature]" 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: / 5 | schedule: 6 | interval: weekly 7 | -------------------------------------------------------------------------------- /.github/workflows/bandit.yml: -------------------------------------------------------------------------------- 1 | name: Bandit 2 | 3 | on: 4 | push: 5 | branches: main 6 | pull_request: 7 | branches: "*" 8 | 9 | jobs: 10 | bandit: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: actions/setup-python@v5 15 | with: 16 | python-version: "3.8" 17 | - uses: snok/install-poetry@v1 18 | with: 19 | version: 1.8.0 20 | virtualenvs-create: true 21 | virtualenvs-in-project: true 22 | installer-parallel: true 23 | - name: Load cached venv 24 | id: cached-poetry-dependencies 25 | uses: actions/cache@v4 26 | with: 27 | path: .venv 28 | key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} 29 | - name: Install Dependencies 30 | if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' 31 | run: | 32 | poetry install --with test --no-root 33 | - name: Run Bandit 34 | run: poetry run bandit -c pyproject.toml -r $(git ls-files '*.py') 35 | -------------------------------------------------------------------------------- /.github/workflows/black.yml: -------------------------------------------------------------------------------- 1 | name: Lint with Black 2 | 3 | on: 4 | push: 5 | branches: main 6 | pull_request: 7 | branches: '*' 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: psf/black@stable 15 | with: 16 | version: "22.8.0" 17 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | branches: main 6 | pull_request: 7 | branches: '*' 8 | 9 | permissions: 10 | id-token: write 11 | contents: read 12 | 13 | jobs: 14 | build: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v4 18 | with: 19 | fetch-depth: 0 # Necessary to get tags 20 | - uses: actions/setup-python@v5 21 | with: 22 | python-version: "3.8" 23 | - uses: snok/install-poetry@v1 24 | with: 25 | version: 1.8.0 26 | virtualenvs-create: true 27 | virtualenvs-in-project: true 28 | installer-parallel: true 29 | - name: Load cached venv 30 | id: cached-poetry-dependencies 31 | uses: actions/cache@v4 32 | with: 33 | path: .venv 34 | key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} 35 | - uses: mtkennerly/dunamai-action@v1 36 | with: 37 | env-var: NBD_VERSION 38 | args: --style pep440 --format "{base}.dev{distance}+{commit}" 39 | - name: Install Dependencies 40 | if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' 41 | run: | 42 | make install-prod 43 | - name: Build Package 44 | run: | 45 | make build-prod 46 | - name: PYPI Publish Dry Run 47 | run: | 48 | poetry publish --dry-run 49 | -------------------------------------------------------------------------------- /.github/workflows/docs-build.yml: -------------------------------------------------------------------------------- 1 | name: Build Docs 2 | on: 3 | push: 4 | paths: 5 | - docs/** 6 | branches: 7 | - main 8 | pull_request: 9 | branches: '*' 10 | permissions: 11 | contents: write 12 | jobs: 13 | docs-build: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v4 17 | - uses: actions/setup-python@v5 18 | with: 19 | python-version: "3.8" 20 | - uses: snok/install-poetry@v1 21 | with: 22 | version: 1.8.0 23 | virtualenvs-create: true 24 | virtualenvs-in-project: true 25 | installer-parallel: true 26 | - name: Load cached venv 27 | id: cached-poetry-dependencies 28 | uses: actions/cache@v4 29 | with: 30 | path: .venv 31 | key: venv-doc-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/docs/poetry.lock') }} 32 | - name: Install Dependencies 33 | if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' 34 | run: | 35 | cd docs 36 | poetry install --no-interaction 37 | - name: Build 38 | run: | 39 | cd docs 40 | poetry run mkdocs build -------------------------------------------------------------------------------- /.github/workflows/docs-deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Docs to Github Pages 2 | on: 3 | push: 4 | paths: 5 | - docs/** 6 | branches: 7 | - main 8 | permissions: 9 | contents: write 10 | jobs: 11 | docs-deploy: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | with: 16 | fetch-depth: 0 # Necessary to get tags 17 | - uses: actions/setup-python@v5 18 | with: 19 | python-version: "3.8" 20 | - uses: snok/install-poetry@v1 21 | with: 22 | version: 1.8.0 23 | virtualenvs-create: true 24 | virtualenvs-in-project: true 25 | installer-parallel: true 26 | - name: Load cached venv 27 | id: cached-poetry-dependencies 28 | uses: actions/cache@v4 29 | with: 30 | path: .venv 31 | key: venv-doc-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/docs/poetry.lock') }} 32 | - name: Install Dependencies 33 | if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' 34 | run: | 35 | cd docs 36 | poetry install --no-interaction 37 | - name: Deploy to Github 38 | run: | 39 | cd docs 40 | poetry run mkdocs gh-deploy -------------------------------------------------------------------------------- /.github/workflows/mypy.yml: -------------------------------------------------------------------------------- 1 | name: MYPY 2 | 3 | on: 4 | push: 5 | branches: main 6 | pull_request: 7 | branches: '*' 8 | 9 | jobs: 10 | mypy: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: actions/setup-python@v5 15 | with: 16 | python-version: "3.8" 17 | - uses: snok/install-poetry@v1 18 | with: 19 | version: 1.8.0 20 | virtualenvs-create: true 21 | virtualenvs-in-project: true 22 | installer-parallel: true 23 | - name: Load cached venv 24 | id: cached-poetry-dependencies 25 | uses: actions/cache@v4 26 | with: 27 | path: .venv 28 | key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} 29 | - name: Install Dependencies 30 | if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' 31 | run: | 32 | poetry install --with test --no-root 33 | - name: Run MYPY 34 | run: | 35 | poetry run mypy --ignore-missing-imports --strict --check-untyped-defs $(git ls-files '*.py') 36 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Build and Publish Release to PYPI 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | 8 | jobs: 9 | publish-nbdefense: 10 | runs-on: ubuntu-latest 11 | permissions: 12 | contents: write 13 | pull-requests: write 14 | 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v4 18 | with: 19 | fetch-depth: 0 # Necessary to get tags 20 | - uses: actions/setup-python@v5 21 | with: 22 | python-version: "3.8" 23 | - uses: snok/install-poetry@v1 24 | with: 25 | version: 1.8.0 26 | virtualenvs-create: true 27 | virtualenvs-in-project: true 28 | installer-parallel: true 29 | - name: Get Release Version 30 | uses: mtkennerly/dunamai-action@v1 31 | with: 32 | env-var: NBD_VERSION 33 | args: --style semver --format "{base}" 34 | - name: Set Package Version 35 | run: | 36 | echo "__version__ = '$NBD_VERSION'" > nbdefense/_version.py 37 | poetry version $NBD_VERSION 38 | - name: Build Package 39 | run: | 40 | poetry build 41 | - name: Publish Package to PYPI 42 | run: | 43 | poetry config pypi-token.pypi ${{ secrets.NBDEFENSE_PYPI_API_TOKEN }} 44 | poetry publish -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: main 6 | pull_request: 7 | branches: '*' 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | python-version: ["3.8", "3.9", "3.10"] 15 | 16 | steps: 17 | - uses: actions/checkout@v4 18 | - name: Set up Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | - uses: snok/install-poetry@v1 23 | with: 24 | version: 1.8.0 25 | virtualenvs-create: true 26 | virtualenvs-in-project: true 27 | installer-parallel: true 28 | - name: Load cached venv 29 | id: cached-poetry-dependencies 30 | uses: actions/cache@v4 31 | with: 32 | path: .venv 33 | key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} 34 | - name: Install Dependencies 35 | if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' 36 | run: | 37 | poetry install --with test 38 | - name: Run Tests 39 | run: | 40 | make test 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | .DS_Store 163 | 164 | .vscode/ 165 | 166 | # Temp directory we use for converted Python files 167 | .tmp/ 168 | 169 | # Local config directory 170 | .nbdefense/ 171 | 172 | nbdefense/trivy 173 | 174 | # settings files for scan 175 | settings.toml 176 | 177 | # Mkdocs build 178 | /docs/site 179 | 180 | # Files Generated By Tests 181 | tests/plugin_tests/cve/mock_files/requirements.txt 182 | tests/plugin_tests/licenses/mock_files/requirements.txt -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 22.8.0 4 | hooks: 5 | - id: black 6 | - repo: https://github.com/PyCQA/bandit 7 | rev: '1.7.5' 8 | hooks: 9 | - id: bandit 10 | args: ["-c", "pyproject.toml"] 11 | additional_dependencies: ["bandit[toml]"] 12 | - repo: https://github.com/python-poetry/poetry 13 | rev: '1.4.0' 14 | hooks: 15 | - id: poetry-check # Makes sure poetry config is valid 16 | - id: poetry-lock # Makes sure lock file is up to date 17 | args: ["--check"] 18 | - repo: https://github.com/pre-commit/mirrors-mypy 19 | rev: "v1.1.1" 20 | hooks: 21 | - id: mypy 22 | args: ["--ignore-missing-imports", "--strict", "--check-untyped-defs"] 23 | additional_dependencies: [ 24 | "types-requests==2.28.11.15", 25 | "types-tqdm==4.65.0.0", 26 | "pytest==7.1.3", 27 | "types-setuptools==67.6.0.5", 28 | "click==8.1.3", 29 | "nbformat==5.6.1", 30 | "tomlkit==0.11.6", 31 | "jinja2==3.1.2" 32 | ] 33 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # 👩‍💻 CONTRIBUTING 2 | 3 | Welcome! We're glad to have you. If you would like to report a bug, request a new feature or enhancement, follow [this link](https://nbdefense.ai/faq) for more help. 4 | 5 | If you're looking for documentation on how to use NB Defense, you can find that [here](https://nbdefense.ai). 6 | 7 | ## ❗️ Requirements 8 | 9 | 1. Python 10 | 11 | NB Defense requires python version greater than `3.8` and less than `3.11` 12 | 13 | 2. Poetry 14 | 15 | The following install commands require [Poetry](https://python-poetry.org/). To install Poetry you can follow [this installation guide](https://python-poetry.org/docs/#installation). Poetry can also be installed with brew using the command `brew install poetry`. 16 | 17 | ## 💪 Developing with NB Defense 18 | 19 | 1. Clone the repo 20 | 21 | ```bash 22 | git clone git@github.com:protectai/nbdefense.git 23 | ``` 24 | 25 | 2. To install development dependencies to your environment and set up the cli for live updates, run the following command in the root of the `nbdefense` directory: 26 | 27 | ```bash 28 | make install-dev 29 | ``` 30 | 31 | 3. You are now ready to start developing! 32 | 33 | Run a scan with the cli with the following command: 34 | 35 | ```bash 36 | nbdefense scan -s 37 | ``` 38 | 39 | ## 📝 Submitting Changes 40 | 41 | Thanks for contributing! In order to open a PR into the NB Defense project, you'll have to follow these steps: 42 | 43 | 1. Fork the repo and clone your fork locally 44 | 2. Run `make install-dev` from the root of your forked repo to setup your environment 45 | 3. Make your changes 46 | 4. Submit a pull request 47 | 48 | After these steps have been completed, someone on our team at Protect AI will review the code and help merge in your changes! 49 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | VERSION ?= $(shell dunamai from git --style pep440 --format "{base}.dev{distance}+{commit}") 2 | 3 | install-dev: 4 | poetry install --with dev 5 | pre-commit install 6 | 7 | install: 8 | poetry install 9 | 10 | install-prod: 11 | poetry install --with prod 12 | 13 | clean: 14 | pip uninstall nbdefense 15 | pip freeze | xargs -n1 pip uninstall -y 16 | rm -f dist/* 17 | 18 | test: 19 | poetry run pytest 20 | 21 | test-watch: 22 | ptw --ignore ./.tmp 23 | 24 | build: 25 | poetry build 26 | 27 | build-prod: version 28 | poetry build 29 | 30 | version: 31 | echo "__version__ = '$(VERSION)'" > nbdefense/_version.py 32 | poetry version $(VERSION) 33 | 34 | lint: bandit mypy 35 | 36 | bandit: 37 | poetry run bandit -c pyproject.toml -r . 38 | 39 | mypy: 40 | poetry run mypy --ignore-missing-imports --strict --check-untyped-defs . 41 | 42 | format: 43 | black . 44 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🛡️ NB Defense 2 | 3 | [![bandit](https://github.com/protectai/nbdefense/actions/workflows/bandit.yml/badge.svg)](https://github.com/protectai/nbdefense/actions/workflows/bandit.yml) 4 | [![build](https://github.com/protectai/nbdefense/actions/workflows/build.yml/badge.svg)](https://github.com/protectai/nbdefense/actions/workflows/build.yml) 5 | [![black](https://github.com/protectai/nbdefense/actions/workflows/black.yml/badge.svg)](https://github.com/protectai/nbdefense/actions/workflows/black.yml) 6 | [![mypy](https://github.com/protectai/nbdefense/actions/workflows/mypy.yml/badge.svg)](https://github.com/protectai/nbdefense/actions/workflows/mypy.yml) 7 | [![tests](https://github.com/protectai/nbdefense/actions/workflows/test.yml/badge.svg)](https://github.com/protectai/nbdefense/actions/workflows/test.yml) 8 | [![License: Apache 2.0](https://img.shields.io/crates/l/apa)](https://opensource.org/license/apache-2-0/) 9 | 10 | ## 🏃‍♀️ Quick Start 11 | 12 | ```bash 13 | pip install nbdefense 14 | ``` 15 | 16 | ## 🙋‍♂️ What is NB Defense? 17 | 18 | Brought to you by Protect AI, NB Defense is a CLI tool and SDK that encourages you to think about security throughout every step of your machine learning development process. You can use nbdefense to scan for [Secrets](https://github.com/protectai/nbdefense/blob/main/docs/docs/supported-scans/detecting-secrets.md), [Personally Identifiable Information (PII)](https://github.com/protectai/nbdefense/blob/main/docs/docs/supported-scans/detecting-PII.md), [Common Vulnerabilities and Exposures(CVE)](https://github.com/protectai/nbdefense/blob/main/docs/docs/supported-scans/detecting-CVEs.md), and [Licenses](https://github.com/protectai/nbdefense/blob/main/docs/docs/supported-scans/detecting-licenses.md) in your notebook and dependency files. 19 | 20 | NB Defense also acts as a SDK for our [Jupyter Lab Extension](https://github.com/protectai/nbdefense-jupyter). Visit our [documentation](https://github.com/protectai/nbdefense/tree/main/docs/docs), or the repository below to learn more. 21 | 22 | - [Jupyter Lab Extension Repository](https://github.com/protectai/nbdefense-jupyter) 23 | 24 | ## 📄 Documentation 25 | 26 | For more details and [documentation](https://github.com/protectai/nbdefense/tree/main/docs/docs) visit these links: 27 | 28 | - [Documentation](https://github.com/protectai/nbdefense/tree/main/docs/docs) 29 | 30 | - [Getting Started on a Local Machine](https://github.com/protectai/nbdefense/blob/main/docs/docs/getting-started/cli.md) 31 | 32 | ## 💪 Contributing 33 | 34 | Welcome to the team! We are open to contributions and working with the community to make notebooks safer for everyone. 35 | 36 | If you would like to contribute, please visit [CONTRIBUTING.md](https://github.com/protectai/nbdefense/blob/main/CONTRIBUTING.md) to get started as a developer, or to suggest bug fixes, improvements, or new features follow [this link](https://github.com/protectai/nbdefense/blob/main/docs/docs/faq.md) to our FAQ page. 37 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | | Version | Supported | 6 | | ------- | ------------------ | 7 | | 1.x | :white_check_mark: | 8 | 9 | ## Reporting a Vulnerability 10 | 11 | If you find a vulnerability in NB Defense, perform the following steps: 12 | 13 | 1. [Open an issue](https://github.com/protectai/nbdefense/issues/new?assignees=&labels=bug&template=bug_report.md&title=[BUG]%20Security%20Vulnerability) in the nbdefense repo. Use `[BUG] Security Vulnerability` as the title and do not include any vulnerability details in the issue description. 14 | 2. Send us an email at `security@protectai.com` with the following: 15 | - The link to the issue you created above. 16 | - Your GitHub handle. 17 | - Details about the vulnerability including: 18 | - A description of what the vulnerability is. 19 | - Evidence of the issue happening or references to the relevant lines of code. 20 | - Instructions to reproduce the issue. 21 | 22 | After we have reproduced the issue we will reply to the issue and [open a draft security advisory](https://docs.github.com/en/code-security/security-advisories/creating-a-security-advisory) and will discuss the details there. 23 | 24 | Once we've released a fix we will use the Security Advisory to announce the findings. 25 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | install: 2 | poetry install 3 | 4 | start-dev: 5 | mkdocs serve --dirtyreload 6 | 7 | build: 8 | mkdocs build 9 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # 📖 NBDefense Documentation 2 | 3 | ## ❗️ Requirements 4 | 5 | 1. Python 6 | 7 | NB Defense requires python version greater than `3.8` and less than `3.11` 8 | 9 | 2. Poetry 10 | 11 | The following install commands require [Poetry](https://python-poetry.org/). To install Poetry you can follow [this installation guide](https://python-poetry.org/docs/#installation). Poetry can also be installed with brew using the command `brew install poetry`. 12 | 13 | ## 💻 Local Install 14 | 15 | 1. Run `make install` to install the pip dependencies to the current python environment 16 | 17 | 2. Run `make start-dev` to run a development server locally 18 | 19 | 3. Go to `http://127.0.0.1:8000/` to view the docs 20 | 21 | ## 📚 References 22 | 23 | We are using the following libraries for our documentation: 24 | 25 | - [Live Documentation](https://nbdefense.ai) 26 | - This is where the production build is hosted 27 | - [MkDocs](https://www.mkdocs.org/) 28 | - This is the static site generator we're using 29 | - [Material for MkDocs](https://squidfunk.github.io/mkdocs-material/) 30 | - This is the theme we're using for MkDocs 31 | -------------------------------------------------------------------------------- /docs/docs/CNAME: -------------------------------------------------------------------------------- 1 | nbdefense.ai -------------------------------------------------------------------------------- /docs/docs/about-us.md: -------------------------------------------------------------------------------- 1 | --- 2 | hide: 3 | - navigation 4 | --- 5 | 6 | At Protect AI we're building a safer AI Powered World by empowering a community of ML security researchers, finding unique exploits, and providing tools that to reduce risk inherent in MLOps pipelines. 7 | 8 | Learn more about us at [protectai.com](https://protectai.com/) and check out our [GitHub](https://github.com/protectai)! 9 | -------------------------------------------------------------------------------- /docs/docs/faq.md: -------------------------------------------------------------------------------- 1 | --- 2 | hide: 3 | - navigation 4 | --- 5 | 6 | Thank you for your interest in NB Defense. We want to make tools with the community that data scientist and engineers can use and enjoy. Welcome to the community! 7 | 8 | ## Common Questions 9 | 10 | ### Why should I use NB Defense as a data scientist? 11 | 12 | We know that JupyterLab is where many data scientist experiement and prove out their ideas. The NB Defense [JupyterLab Extension](./getting-started/jupyter-lab-extension.md) and [CLI](./getting-started/cli.md) allows you to check for vulnerabilities and security issues before the code leaves your environment. You can use NB Defense to check for personally identifiable information, secrets, common exposures and vulnerabilites, and non permissive licenses with the click of the button. We wanted to create a way for data scientists to reduce exposure to security issues and save time later on, by integrating good security practices into the development process. 13 | 14 | ### Can I run NB Defense in my CI pipeline? 15 | 16 | Yes, you can run NB Defense in your CI pipline using the [NB Defense CLI](./getting-started/cli.md)! Use the CLI in your CI pipelines to scan repositories, or multiple notebooks at a time. 17 | 18 | ### What is special about NB Defense when many security tools offer similar functionality? 19 | 20 | NB Defense is special because it allows you to scan Jupyter Notebooks. We provide both a [JupyterLab Extension](./getting-started/jupyter-lab-extension.md) that you can use to scan notebooks while you're working, and a [CLI](./getting-started/cli.md) that you can use to scan groups of notebooks or repositories. Using both of these tools, you can scan your notebooks for personally identifiable information (PII), secrets, common exposures and vulnerabilities (CVEs), and non permissive licenses. 21 | 22 | ### Does NB Defense JupyterLab Extension run in my kernel? 23 | 24 | We recommend that you isolate NB Defense from the kernel that you are using for your notebook. If you have installed NB Defense into a separate python environment, it will not run in your kernel. We do use your active kernels python path to determine which third party dependencies are installed, so we can scan them for CVEs and licenses. 25 | 26 | ## Found A Bug? 🐞 27 | 28 | Found an issue with the NB Defense CLI or JupyterLab Extension? Lets get that fixed. Let us know what you've found by opening an issue using the following links: 29 | 30 | - [NB Defense Bug (SDK and CLI)](https://github.com/protectai/nbdefense/issues/new?labels=bug&template=bug_report.md&title=%5BA+Few+Words+Describing+the+Bug%5D) 31 | 32 | - [NB Defense Jupyter Bug (JupyterLab Extension)](https://github.com/protectai/nbdefense-jupyter/issues/new?labels=bug&template=bug_report.md&title=%5BA+Few+Words+Describing+the+Bug%5D) 33 | 34 | ## Found An Issue With Our Documentation? 📄 35 | 36 | We want to be clear, correct, and concise. Let us know where we can improve: 37 | 38 | - [NB Defense Documentation Issue (SDK, CLI and JupyterLab Extension)](https://github.com/protectai/nbdefense/issues/new?labels=bug%2C+documentation&template=documentation-issue-report.md&title=%5BA+Few+Words+Describing+the+Issue%5D) 39 | 40 | ## Have An Idea For A New Feature? 💡 41 | 42 | We'd love to hear about it. Tell us more by creating an issue below: 43 | 44 | - [NB Defense Feature Request (SDK and CLI)](https://github.com/protectai/nbdefense/issues/new?labels=enhancement&template=feature_request.md&title=%5BA+Few+Words+Describing+the+Feature%5D) 45 | 46 | - [NB Defense Jupyter Feature Request (JupyterLab Extension)](https://github.com/protectai/nbdefense/issues/new?labels=enhancement&template=feature_request.md&title=%5BA+Few+Words+Describing+the+Feature%5D) 47 | 48 | ## Have A Question? ❓ 49 | 50 | Feel free to ask questions using github discussions by following the links below: 51 | 52 | - [NB Defense Discussions (SDK and CLI)](https://github.com/protectai/nbdefense/discussions) 53 | 54 | - [NB Defense Jupyter Discussions (JupyterLab Extension)](https://github.com/protectai/nbdefense-jupyter/discussions) 55 | -------------------------------------------------------------------------------- /docs/docs/getting-started/cloud-services/azure-ml-notebooks.md: -------------------------------------------------------------------------------- 1 | # Configuring NB Defense for Azure ML Notebooks 2 | 3 | This section outlines the steps for installation of NB Defense in [Azure Machine Learning Studio](https://azure.microsoft.com/en-us/products/machine-learning/). 4 | 5 | 1. Open the notebook you want to scan in JupyterLab by accessing the Editor menu as shown below: 6 | 7 | | ![](images/azure-jle-1.png) | 8 | | :------------------------------------------------------------------------: | 9 | | _Open jupyter notebook in JupyterLab using Azure Machine Learning Studio._ | 10 | 11 | 2. From JupyterLab, open a new terminal. 12 | 13 | 3. Activate `jupyter_env` environment using the following command: 14 | 15 | ``` 16 | conda activate jupyter_env 17 | ``` 18 | 19 | 4. The JLE can now be installed using the following command: 20 | 21 | ``` 22 | pip install nbdefense_jupyter 23 | ``` 24 | 25 | 5. Once NB Defense is installed, enable the extension using the following command: 26 | 27 | ``` 28 | jupyter server extension enable nbdefense_jupyter 29 | ``` 30 | 31 | 6. Optionally you can also run: 32 | 33 | ``` 34 | jupyter lab extension list 35 | ``` 36 | 37 | to ensure the extension is enlisted. The above two commands should give an output similar to the one below: both commands list **nbdefense_jupyter**. 38 | 39 | | ![](images/azure-jle-3.png) | 40 | | :-----------------------------------------------------------------------------------: | 41 | | _Enabling the NBDefense JupyterLab Extension (JLE) in Azure Machine Learning Studio._ | 42 | 43 | 7. Restart the jupyter server using: 44 | 45 | ``` 46 | sudo systemctl restart jupyter 47 | ``` 48 | 49 | and refresh the browser. This should show the NB Defense widget installed in the left menu: 50 | 51 | 8. Refresh the JupyterLab webpage (CMD+R on a Mac or CTRL+R on a Windows machine). You should now be able to see the NBDefense JLE widget on the left hand side of the JupyterLab UI. 52 | 53 | | ![](images/azure-jle-4.png) | 54 | | :--------------------------------------------------------------------------------------------------: | 55 | | _Successful installation of NB Defense JupyterLab Extension (JLE) in Azure Machine Learning Studio._ | 56 | 57 | That is it! You can now log in, and press the scan button and see if there are any issues in your notebook as outlined by the NB Defense scan. For example, in the notebook below NB Defense scan has found some personally identifiable information (PII) (note the PII is fake). 58 | 59 | | ![](images/azure-jle-5.png) | 60 | | :----------------------------------------------------------------------------------------: | 61 | | _NB Defense Jupyter Lab Extension (JLE) scan results using Azure Machine Learning Studio._ | 62 | 63 | Please note that [plugin specific settings](../../scan-settings/jupyterlab-settings.md) may not work on notebooks running in Azure Machine Learning Studio. 64 | -------------------------------------------------------------------------------- /docs/docs/getting-started/cloud-services/images/azure-jle-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/getting-started/cloud-services/images/azure-jle-1.png -------------------------------------------------------------------------------- /docs/docs/getting-started/cloud-services/images/azure-jle-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/getting-started/cloud-services/images/azure-jle-3.png -------------------------------------------------------------------------------- /docs/docs/getting-started/cloud-services/images/azure-jle-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/getting-started/cloud-services/images/azure-jle-4.png -------------------------------------------------------------------------------- /docs/docs/getting-started/cloud-services/images/azure-jle-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/getting-started/cloud-services/images/azure-jle-5.png -------------------------------------------------------------------------------- /docs/docs/getting-started/cloud-services/images/sm-notebook-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/getting-started/cloud-services/images/sm-notebook-1.png -------------------------------------------------------------------------------- /docs/docs/getting-started/cloud-services/images/sm-notebook-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/getting-started/cloud-services/images/sm-notebook-2.png -------------------------------------------------------------------------------- /docs/docs/getting-started/cloud-services/images/sm-notebook-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/getting-started/cloud-services/images/sm-notebook-3.png -------------------------------------------------------------------------------- /docs/docs/getting-started/cloud-services/images/sm-notebook-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/getting-started/cloud-services/images/sm-notebook-4.png -------------------------------------------------------------------------------- /docs/docs/getting-started/cloud-services/images/sm-notebook-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/getting-started/cloud-services/images/sm-notebook-5.png -------------------------------------------------------------------------------- /docs/docs/getting-started/cloud-services/images/studio-lab-screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/getting-started/cloud-services/images/studio-lab-screenshot.png -------------------------------------------------------------------------------- /docs/docs/getting-started/cloud-services/index.md: -------------------------------------------------------------------------------- 1 | # JupyterLab Extension on Cloud Services 2 | 3 | This page gives an overall account of installing NB Defense JupyterLab Extension (JLE) on different cloud platforms. Overall, the following four steps outline the installation of the JLE: 4 | 5 | 1. Choose the conda environment in which JLE is to be installed. 6 | 7 | ```bash 8 | conda activate ENVIRONMENT_NAME 9 | ``` 10 | 11 | 2. Install the JLE using pip install command: 12 | 13 | ```bash 14 | pip install nbdefense_jupyter 15 | ``` 16 | 17 | 3. Enable the JLE using: 18 | 19 | ```bash 20 | jupyter server extension enable nbdefense_jupyter 21 | ``` 22 | 23 | 4. Restart the JupyterLab server with the platform specific restart command/sequence. 24 | 25 | Please note that step 2 and step 3 will be the same on all cloud platforms, whereas step 1 and step 4 will change depending on the cloud platform. 26 | 27 | Please follow the links for the following cloud platforms to learn more about their specific JLE installation steps: 28 | 29 | [SageMaker Notebooks](./sage-maker-notebook-instances.md){ .md-button .md-button--primary } 30 | 31 | [SageMaker Studio Labs](./sage-maker-studio-lab.md){ .md-button .md-button--primary } 32 | 33 | [Azure Machine Learning Studio](./azure-ml-notebooks.md){ .md-button .md-button--primary } 34 | 35 | ## Unsupported Cloud Platforms: 36 | 37 | The NB Defense JLE will not work on the following cloud platforms: 38 | 39 | 1. Oracle Cloud Infrastructure (OCI): The Jupyter version running on OCI is `2.2.10` whereas the NB Defense JLE works on Jupyter version 3. 40 | 41 | 2. Google Cloud Platform (GCP): The GCP does not support third party JupyterLab extensions for both [managed notebooks](https://cloud.google.com/vertex-ai/docs/workbench/managed/introduction), and [user-managed notebooks](https://cloud.google.com/vertex-ai/docs/workbench/user-managed/introduction). 42 | 43 | 44 | ## Other Cloud Platforms? 45 | 46 | If you are trying to install NB Defense JLE on a cloud platform for which the installation instructions are not listed, you are welcome to open a ticket, or a PR [here](https://github.com/protectai/nbdefense/issues). 47 | -------------------------------------------------------------------------------- /docs/docs/getting-started/cloud-services/sage-maker-studio-lab.md: -------------------------------------------------------------------------------- 1 | # Configuring NB Defense for SageMaker Studio Lab 2 | 3 | This section covers the installation of NB Defense on [SageMaker Studio Lab](https://studiolab.sagemaker.aws/) 4 | 5 | 1. Open a terminal session within SageMaker Studio Lab 6 | 7 | 2. Activate the studiolab environment by running: 8 | 9 | ```shell 10 | conda activate studiolab 11 | ``` 12 | 13 | 3. Install the extension with pip (please ensure to replace your license key with {LICENSE_KEY} or get your pre-filled link from the [NB Defense web portal](https://nbdefense.protectai.com/)): 14 | 15 | ```shell 16 | pip install nbdefense_jupyter 17 | ``` 18 | 19 | 4. Enable the server extension with 20 | 21 | ```shell 22 | jupyter server extension enable nbdefense_jupyter 23 | ``` 24 | 25 | 5. Go to the Studio Lab project overview page. 26 | 27 | ![](images/studio-lab-screenshot.png) 28 | 29 | 6. Select Stop runtime. 30 | 7. Select Start runtime. 31 | 8. Reopen the JupyterLab session with the "Open project" button and the NB Defense extension should be there. 32 | 33 | ## Need more help? 34 | 35 | More details about how to manage your SageMaker Studio Lab environment can be found in [the official documentation](https://docs.aws.amazon.com/sagemaker/latest/dg/studio-lab-use-manage.html#studio-lab-use-manage-conda-jupyter) 36 | -------------------------------------------------------------------------------- /docs/docs/getting-started/cloud-services/sage-maker-studio.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: NB Defense for SageMaker Studio will be coming soon. 3 | --- 4 | 5 | # Configuring NB Defense on SageMaker Studio 6 | 7 | ## Coming Soon 🚧 8 | -------------------------------------------------------------------------------- /docs/docs/getting-started/index.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | NB Defense is an open source tool for detecting security issues in Jupyter Notebooks. These issues can range from detecting leaked PII in your notebook output to detecting vulnerable versions of installed dependencies. There are currently two tools that you can use to scan your notebooks with NB Defense. 4 | 5 | ## Choose the tool for you 6 | 7 | ### [The JupyterLab Extension](jupyter-lab-extension.md) 8 | 9 | Using the JupyterLab Extension is ideal for correcting issues directly within the JupyterLab environment. You can quickly scan and re-scan a single notebook in order to correct any potential security issues within your environment. 10 | 11 | 14 | 15 | [Try it out!](jupyter-lab-extension.md){ .md-button .md-button--primary } 16 | 17 | ### [The CLI](cli.md) 18 | 19 | The CLI is a better option if you have a lot of notebooks that you would like to scan simultaneously or if you would like to automatically scan notebooks that are going into a central repository. It can also help you set a baseline for correcting individual notebooks with the JupyterLab Extension at a later time. 20 | 21 | ![CLI help message](../imgs/cli-help-message.png) 22 | 23 | [Try it out!](cli.md){ .md-button .md-button--primary } 24 | -------------------------------------------------------------------------------- /docs/docs/getting-started/jupyter-lab-extension.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: Using the NB Defense JupyterLab extension. 3 | --- 4 | 5 | ## Installation 6 | 7 | !!! info "Running NB Defense on JupyterLab in the cloud" 8 | 9 | The following instructions are for a local installation of JupyterLab. If you are interested in running the JupyterLab Extension on a cloud platform, please visit the [JupyterLab Extension on Cloud Services](./cloud-services/index.md) page for instructions for each platform. 10 | 11 | 1. Activate the Python environment that you use to run JupyterLab. 12 | 13 | !!! warning "Supported JupyterLab Versions" 14 | 15 | The NB Defense JupyterLab extension works with versions of JupyterLab >=3 and <4. 16 | 17 | 2. Install the extension with pip: 18 | 19 | ```bash 20 | pip install nbdefense_jupyter 21 | ``` 22 | 23 | 3. (Optional) Install the Spacy model for PII detection with: 24 | 25 | ```bash 26 | python -m spacy download en_core_web_trf 27 | ``` 28 | 29 | !!! warning "Spacy model download" 30 | 31 | This is required for the PII plugin to operate. If you do not install the `en_core_web_trf` model, then you will get the following error message in the scan output with the PII plugin activated: 32 | ``` 33 | Error occurred in the scan portion of the PII Plugin plugin. 34 | Message: [E050] Can't find model 'en_core_web_trf'. 35 | It doesn't seem to be a Python package or a valid path to a data directory. 36 | ``` 37 | 38 | 4. Activate the server extension with: 39 | 40 | ```bash 41 | jupyter server extension enable nbdefense_jupyter 42 | ``` 43 | 44 | 5. Start (or restart) the JupyterLab session and the NB Defense extension should be ready to go! You will know the extension has been installed successfully if you are able to see the new NB Defense tab and the `Scan with NB Defense` button in the toolbar. 45 | ```bash 46 | jupyter lab 47 | ``` 48 | ![NB Defense extension successfully loaded into JupyterLab](../imgs/jle-initial-setup.png) 49 | 50 | ## Usage 51 | 52 | Once the extension has been installed, you can now start scanning your notebooks within the JupyterLab environment. 53 | 54 | Installing the NB Defense JupyterLab Extension (JLE) will add a tab with an NB icon to your side bar. Clicking this will take you to the NB Defense JLE panel. From there you can run a scan or view scan results for the currently focused notebook. 55 | 56 | 59 | 60 | You can also run a scan by clicking the `Scan with NB Defense` button in the notebook toolbar. 61 | 62 | 65 | 66 | ## Interpreting Scan Results 67 | 68 | Within the side panel we display a list of issues detected during the scan. The information on the side bar includes the "Issue type", a short description of the issue, and which cell index the issue was found in. 69 | 70 | ![Side panel with results](../imgs/side-panel-with-results.png){: style="height:600px"} 71 | 72 | Clicking on one of the issues here will focus the relevant cell. From here you can see that the characters that triggered the scan will be underlined in the editor. Additionally, all of the cells that contain issues will have a red background. 73 | 74 | 77 | 78 | ## Adjusting Scan Settings 79 | 80 | You can change and update settings for your scan from the JupyterLab settings menu. More info is available on the [Scan Settings](../scan-settings/index.md) page. 81 | -------------------------------------------------------------------------------- /docs/docs/imgs/cli-help-message.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/imgs/cli-help-message.png -------------------------------------------------------------------------------- /docs/docs/imgs/focus-cell-from-issue.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/imgs/focus-cell-from-issue.mp4 -------------------------------------------------------------------------------- /docs/docs/imgs/html-scan-report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/imgs/html-scan-report.png -------------------------------------------------------------------------------- /docs/docs/imgs/html-scan-with-errors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/imgs/html-scan-with-errors.png -------------------------------------------------------------------------------- /docs/docs/imgs/jle-initial-setup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/imgs/jle-initial-setup.png -------------------------------------------------------------------------------- /docs/docs/imgs/jupyter-extension-example.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/imgs/jupyter-extension-example.mp4 -------------------------------------------------------------------------------- /docs/docs/imgs/jupyter-settings.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/imgs/jupyter-settings.mp4 -------------------------------------------------------------------------------- /docs/docs/imgs/open-nb-defense-panel.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/imgs/open-nb-defense-panel.mp4 -------------------------------------------------------------------------------- /docs/docs/imgs/run-scan-from-toolbar.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/imgs/run-scan-from-toolbar.mp4 -------------------------------------------------------------------------------- /docs/docs/imgs/show-contextual-help-toggle.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/imgs/show-contextual-help-toggle.mp4 -------------------------------------------------------------------------------- /docs/docs/imgs/show-dropdowns-toggle.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/imgs/show-dropdowns-toggle.mp4 -------------------------------------------------------------------------------- /docs/docs/imgs/side-panel-with-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/imgs/side-panel-with-results.png -------------------------------------------------------------------------------- /docs/docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | template: home.html 3 | hide: 4 | - navigation 5 | - toc 6 | - content 7 | - md-main 8 | --- 9 | -------------------------------------------------------------------------------- /docs/docs/robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | 4 | Sitemap: https://nbdefense.ai/sitemap.xml -------------------------------------------------------------------------------- /docs/docs/scan-settings/cli-flags.md: -------------------------------------------------------------------------------- 1 | There are various flags that can change the behavior of the CLI. Information about these flags can be found using the `nbdefense scan --help` command. 2 | 3 | ``` 4 | nbdefense scan --help 5 | Usage: nbdefense scan [OPTIONS] [PATH] 6 | 7 | Scan [PATH] for .ipynb files for potential issues. 8 | 9 | Options: 10 | -r, --recursive Scan all nested directories for .ipynb 11 | files. 12 | -s, --serve Run an HTTP Server to view the report 13 | instead of persisting the report as an html 14 | file. 15 | -q, --quiet Suppress all output. 16 | -d, --dependency-file PATH Specify a requirements.txt file to scan for 17 | CVEs and license compatibility. 18 | -f, --output-file FILE Specify an output filename for the report. 19 | -o, --output-format [json|html] 20 | The output format for the report. 21 | -y, --yes Bypass all prompts with an affirmative 22 | response. 23 | --settings-file FILE Specify a settings file to use for the scan. 24 | Defaults to [PATH]/settings.toml. 25 | --no-progress-bars Hide progress bars, but keep other logging 26 | active. 27 | --help Show this message and exit. 28 | ``` 29 | -------------------------------------------------------------------------------- /docs/docs/scan-settings/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: This section covers how to configure the settings for NBDefense. 3 | --- 4 | 5 | # Scan Settings 6 | 7 | Settings for scans can be adjusted for both the [JupyterLab Extension](./jupyterlab-settings.md) and the [CLI](./cli-settings.md). 8 | 9 | ## Adjustable Settings 10 | 11 | Each of the four NB Defense plugins (secrets, PII, licences, CVE) can be switched on or off. That is, if only the PII plugin is disabled and the remaining three plugins are enabled, the NB Defense scan will scan for the three enabled plugins but will not scan for PII. 12 | 13 | In addition to enable/disable the available plugins for NB Defense scan, there is a global adjustable setting of `redact_secret` for presenting sensitive information in scan reports. Moreover, NB Defense also allows for individual settings for each of the four plugins. The global and individual plugin settings are described below: 14 | 15 | ### Global Settings 16 | 17 | #### `redact_secret` 18 | 19 | The `redact_secret` setting determines how sensitive information is presented in NB Defense scan reports. The possible values of `redact_secret` are `PARTIAL`, `ALL`, and `HASH`. `PARTIAL` will show only leading and trailing characters. `ALL` will shadow the full secret. `HASH` will replace the full secret with its hashed value (as shown in the table below as well). 20 | 21 | | Redacted Secret Option | Redacted Secret | 22 | | :--------------------- | :-------------- | 23 | | PARTIAL | 4aed\*\*\*\*78$ | 24 | | ALL | ****\*\*\***** | 25 | | HASH | h3hb54i109k | 26 | 27 | ### Secrets Plugin Settings 28 | 29 | #### `secrets_plugins` 30 | 31 | The `secrets_plugins` has the same modules as in the [detect-secrets](https://github.com/Yelp/detect-secrets) package. Each of the secret modules can be enable or disabled. 32 | 33 | More details on which of the secret modules are enabled by default, and what they detect can be found on the [secrets scan details page](../supported-scans/detecting-secrets.md#supported-detect-secrets-plugins). 34 | 35 | ### PII Plugin Settings 36 | 37 | #### `confidence_threshold` 38 | 39 | The `confidence_threshold` indicates the level of uncertainty allowed when flagging text as PII i.e., a confidence threshold set at `0.8` would only allow that text marked as PII when NB Defense's PII scan is at least `80%` confident that the text is PII. For any other text where the confidence of the NB Defense's PII scan is less than `80%` the text will not be marked as PII. The default value of `confidence_threshold` is set at `0.8` with a minimum allowed value of `0.0` and a maximum allowed value of `1.0`. 40 | 41 | Also, NB Defense PII scan allows for separate setting of `confidence_threshold` for each of the PII entities. In this way, by adjusting the values of `confidence_threshold` for each PII entity separately, the users of NB Defense can set the uncertainty in PII scan results for each of the PII entities independently. 42 | 43 | #### `entities` 44 | 45 | The PII entities to be scanned for can be toggled on and off in the scan settings. A list of PII entities with their brief description can be found on the [PII scan details page](../supported-scans/detecting-PII.md). 46 | 47 | ### License Plugin Settings 48 | 49 | #### `accepted_licenses` 50 | 51 | The licenses added to `accepted_licenses` setting will not be flagged in the NB Defense scan report. So, for example, if the following licenses are added to the `accepted_licences`, and they appear in the text, NB Defense License plugin will not flag it. 52 | 53 | ``` 54 | accepted_licenses = ["Apache License 2.0", "BSD-3-Clause", "MIT License"] 55 | ``` 56 | 57 | ### CVE Plugin Settings 58 | 59 | There are no settings specific to the CVE plugin. 60 | -------------------------------------------------------------------------------- /docs/docs/scan-settings/jupyterlab-settings.md: -------------------------------------------------------------------------------- 1 | The settings for the JupyterLab Extension can be found in the JupyterLab Advanced Settings Editor alongside the settings for other JupyterLab extensions. 2 | 3 | You can open the JupyterLab settings pane by clicking on the Settings tab in the menu bar, then clicking on the Advanced Settings Editor button. 4 | 5 | 8 | 9 | ### JupyterLab Specific Settings 10 | 11 | There are two settings specific to the JupyterLab Environment. These are also editable through the toggle buttons on the Notebook. 12 | 13 | #### Show Contextual Help 14 | 15 | This toggles the underlining of found issues in the editor as well as the highlighting of certain cell elements. 16 | 17 | 20 | 21 | #### Show Dropdowns 22 | 23 | This toggles the dropdowns that appear on hover of cells that have issues. 24 | 25 | 28 | -------------------------------------------------------------------------------- /docs/docs/supported-scans/detecting-CVEs.md: -------------------------------------------------------------------------------- 1 | # CVE Detection 2 | 3 | NB Defense scans for common vulnerabilities and exposures (CVEs) associated with the dependencies in your project's `requirements.txt` file and your python environment. We evaluate your dependencies with Aqua's [Trivy](https://github.com/aquasecurity/trivy) tool to detect any known vulnerabilities. 4 | 5 | ## CVE Detection with the JupyterLab Extension (JLE) 6 | 7 | The NB Defense JupyterLab Extension tracks your notebooks currently configured kernel to scan imported third party dependencies for CVEs. 8 | 9 | !!! warning 10 | 11 | Before you begin, please configure a kernel for the notebook you plan to scan, and install all the third party dependencies neccessary to execute your notebook into the kernel. If the notebooks dependencies are not installed in the kernel and are not imported by the notebook, they will not be scanned. You can check that all dependencies are correctly installed by running the python code in your notebook; if your python code is valid and executes without any errors, all related dependencies should be installed correctly. 12 | 13 | Before scanning, verify that the kernel in the NB Defense panel matches the kernel that you have configured. When you press scan, NB Defense will gather imported modules from your notebook and link them to third party packages installed in your kernel. The installed version of the package associated with the imported modules will then be scanned for known CVEs. 14 | 15 | ### JLE Settings 16 | 17 | You can enable or disable cve scanning in the [JupyterLab extensions settings](../scan-settings/jupyterlab-settings.md). 18 | 19 | ### JLE Scan Results 20 | 21 | Once a scan completes, any imported modules with associated CVEs will be added to the report and you will be provided with information about each CVE. The report below includes the CVE ID, the package and version with the issue, along with the fixed version, description, and a url to learn more. 22 | 23 | | ![](images/nbd-jle-cve-results.png) | 24 | | :------------------------------------------------------------------: | 25 | | _NBDefense JupyterLab Extension (JLE) scan report with CVEs found._ | 26 | 27 | ## CVE Detection with the CLI 28 | 29 | The NBDefense CLI currently supports scanning for CVEs with a python `requirements.txt` file. A dependency will only be scanned for CVEs if the version is pinned to a specific version number (ei. `numpy==1.0.0` and not `numpy>=1.0.0`). 30 | 31 | ### CLI Settings 32 | 33 | Using your [settings.toml file](../scan-settings/cli-settings.md) you can enable or disable the CVE plugin. 34 | 35 | !!! note "Example Settings" 36 | 37 | ```toml 38 | [plugins."nbdefense.plugins.CVEDependencyFilePlugin"] 39 | enabled = true 40 | ``` 41 | 42 | ### CLI Scan Results 43 | 44 | The results will provide you with information on what packages have issues, along with information on how to fix the issue. 45 | 46 | | ![](images/nbd-cli-cve-results.png) | 47 | | :------------------------------------------: | 48 | | _NBDefense CLI scan report with CVEs found._ | 49 | -------------------------------------------------------------------------------- /docs/docs/supported-scans/detecting-PII.md: -------------------------------------------------------------------------------- 1 | # Personally Identifiable Information (PII) 2 | The NB Defense scans for any information that can be used to identify a person i.e., personally identifiable information (PII). It can be their name, location, bank information etc. 3 | 4 | NB Defense supports both global entities (such as names, email addresses) as well as country-specific entities (such as UK National Health Service (NHS) number). At the moment, the NB Defense PII plugin has support for English language only. 5 | 6 | The PII analyser is adapted from [Miscrosoft Presidio](https://microsoft.github.io/presidio/) whereas the PII anonymization is handled by NB Defense. 7 | 8 | !!! warning 9 | 10 | Before you begin scanning with the PII plugin, please execute all code inside the notebook you would like scanned. This plugin does not execute code, and will only scan output for PII if it exists in the notebook. 11 | 12 | 13 | ## PII Entities 14 | NB Defense supports the following PII entities: 15 | 16 | ### Global PII entities: 17 | 18 | | Global PII Entity | Description | 19 | | :-- | :--- | 20 | | PERSON | If there is a name in a notebook, NB Defense will flag it as "PERSON". | 21 | | CREDIT_CARD | If there is a number between 12 to 19 digits in a notebook, NB Defense will flag it as "CREDIT_CARD".| 22 | | CRYPTO | If there is a Bitcoin wallet number in a notebook, NB Defense will flag it as "CRYPTO". At the moment, only Bitcoin addresses are supported. | 23 | | EMAIL_ADDRESS | If there is an email address in a notebook, NB Defense will flag it as "EMAIL_ADDRESS". | 24 | | IBAN_CODE | If there is an International Bank Account Number (IBAN) in a notebook, NB Defense will flag it as "IBAN_CODE". | 25 | | IP_ADDRESS | If there is an Internet Protocol (IP) version 4 or version 6 address in a notebook, NB Defense will flag it as "IP_ADDRESS". | 26 | |NRP | If there is a mention of a person's nationality, religious or political affiliation (NRP) in a notebook, NB Defense will flag it as "NRP". | 27 | |PHONE_NUMBER| If there is a telephone number in a notebook, NB Defense will flag it as a "PHONE_NUMBER".| 28 | |LOCATION| If there is a geographraphically defined location name in a notebook such as a city, province or state name, NB Defense will flag it as "LOCATION". | 29 | | MEDICAL_LICENSE | If there is a medical license number in a notebook, NB Defense will flag it as a "MEDICAL_LICENSE". | 30 | 31 | 32 | 33 | 34 | ### United States of America (US)-specific PII entities: 35 | 36 | 37 | | US-Specific PII Entities | Description | 38 | | :-- | :-- | 39 | | US_BANK_NUMBER | If there is a US bank account number between 8 to 17 digits in a notebook, NB Defense will flag it as "US_BANK_NUMBER". | 40 | | US_DRIVER_LICENSE | If there is a US driver license number in a notebook, NB Defense will flag it as "US_DRIVER_LICENSE". | 41 | | US_ITIN | If there is a US Individual Taxpayer Identification Number (ITIN) in a notebook, NB Defense will flag it as "US_ITIN". The US ITIN starts with a "9" and has a "7" or "8" as the fourth digit. | 42 | |US_PASSPORT | If there is a US passport number in a notebook, NB Defense will flag it as "US_PASSPORT". | 43 | |US_SSN| If there is a US Social Security Number (SSN) in a notebook, NB Defense will flag it as "US_SSN". The US SSN has 9 digits. | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | ### United Kingdom (UK)-specific PII entities: 52 | 53 | |UK-specific PII Entities | Description | 54 | |:--- | :--- | 55 | | UK_NHS | If there is a UK National Health Service (NHS) number in a notebook, NB Defense will flag it as a "UK_NHS". The UK NHS is a 10 digit number. | 56 | 57 | 58 | 59 | ### Australia (AU)-specific PII entities: 60 | |AU-specific PII Entities | Description | 61 | |:--- | :--- | 62 | | AU_ABN | If there is an Australian Bank Number (ABN) in a notebook, NB Defense will flag it as a "AU_ABN". The ABN is a 11 digit number. | 63 | |AU_ACN | If there is an Australian Company Number (ACN) in a notebook, NB Defense will flag it as a "AU_ACN". The AU_ACN is a 9 digit number. | 64 | | AU_TFN | If there is an Australian Tax File Number (TFN) in a notbook, NB Defense will flag it as a "AU_TFN". | 65 | | AU_MEDICARE| If there is an Australian Medicare number in a notbook, NB Defense will flag it as a "AU_MEDICARE". | 66 | 67 | 68 | ### PII Scan Results 69 | Below is a sample result for JLE with PII found in ```notebook_2.ipynb``` 70 | 71 | | ![](images/nbd-jle-scan-pii-found.png) | 72 | | :--: | 73 | | *NB Defense JupyterLab Extension (JLE) scan report with Personally Identifiable Information (PII) found.* | 74 | 75 | For any PII found in the notebook, the scan flags it as a HIGH severity issue, and also includes the cell number where it is found. 76 | 77 | On clicking on the ```Issue type: PII``` will focus the cell where PII is found. Upon hovering over the cell, NB Defense will show the total number of PII found in that cell: (in the sample scan result above) a total of 40 potential PII are found. -------------------------------------------------------------------------------- /docs/docs/supported-scans/detecting-licenses.md: -------------------------------------------------------------------------------- 1 | # License Detection 2 | 3 | NB Defense scans for common vulnerabilities and exposures (CVEs) associated with the dependencies in your project's `requirements.txt` file and your python environment. We evaluate your dependencies for licenses that are not included in your list of accepted licenses. 4 | 5 | ## License Detection with the JupyterLab Extension (JLE) 6 | 7 | The NB Defense JupyterLab Extension tracks your notebooks currently configured kernel to scan imported third party dependencies for licenses. 8 | 9 | !!! warning 10 | 11 | Before you begin, please configure a kernel for the notebook you plan to scan, and install all the third party dependencies neccessary to execute your notebook into the kernel. If the notebooks dependencies are not installed in the kernel and are not imported by the notebook, they will not be scanned. You can check that all dependencies are correctly installed by running the python code in your notebook; if your python code is valid and executes without any errors, all related dependencies should be installed correctly. 12 | 13 | Before scanning, verify that the kernel in the NB Defense panel matches the kernel that you have configured. When you press scan, NB Defense will gather imported modules from your notebook and link them to third party packages installed in your kernel. The installed version of the package associated with the imported modules will then be scanned for unapproved licenses. 14 | 15 | ### JLE Settings 16 | 17 | You can configure your list of accepted licenses, and license source in the [JupyterLab extensions settings](../scan-settings/jupyterlab-settings.md). Configure accepted licenses to include all licenses that are acceptable to your project's standards. Configure the license source to determine where NB Defense will look for license data associated with your package (local kernel, pypi, or a combination of the two). 18 | 19 | ### JLE Scan Results 20 | 21 | The example below shows that an unapproved license has been found in the `matplotlibXtns` version `20.5` package. 22 | 23 | | ![](images/nbd-jle-license-results.png) | 24 | | :----------------------------------------------------------------------: | 25 | | _NBDefense JupyterLab Extension (JLE) scan report with licenses found._ | 26 | 27 | ## License Detection with the CLI 28 | 29 | The NBDefense CLI currently supports scanning for licenses using a python `requirements.txt` file. A dependency will only be scanned for licenses if the version is pinned to a specific version number (ei. `numpy==1.0.0` and not `numpy>=1.0.0`). 30 | 31 | ### CLI Settings 32 | 33 | Using your [settings.toml file](../scan-settings/cli-settings.md) you can configure the list of licenses to include licenses that are acceptable to your projects standards. 34 | 35 | !!! note "Example Settings" 36 | 37 | ```toml 38 | [plugins."nbdefense.plugins.LicenseDependencyFilePlugin"] 39 | enabled = true 40 | accepted_licenses = ["Apache License 2.0", "BSD", "MIT", "Python Software Foundation License", "GNU Library or Lesser General Public License (LGPL)", "Apache Software License", "Apache 2.0", "Apache-2.0" , "BSD License", "BSD 3-Clause", "BSD-3-Clause", "Microsoft Public License" , "MIT License", "ISC License (ISCL)", "MIT-0"] 41 | ``` 42 | 43 | ### CLI Scan Results 44 | 45 | The example below surfaces three seperate issues: an unapproved license, a package where a license couldn't be found, and a package that does not have a pinned dependency. 46 | 47 | | ![](images/nbd-cli-license-results.png) | 48 | | :----------------------------------------------: | 49 | | _NBDefense CLI scan report with licenses found._ | 50 | -------------------------------------------------------------------------------- /docs/docs/supported-scans/detecting-secrets.md: -------------------------------------------------------------------------------- 1 | # Secrets Detection 2 | 3 | NB Defense scans for any secrets or authentication information(such as passwords or tokens) that may be present in a notebook. 4 | 5 | To enable this we have built a wrapper around Yelp's [detect-secrets](https://github.com/Yelp/detect-secrets) library, porting its capabilities to check the input and output cells of your notebooks. 6 | 7 | !!! warning 8 | 9 | Before you begin scanning for secrets, please execute all code inside the notebook that you would like scanned. This plugin does not execute code, and will only scan output for secrets if it exists in the notebook. 10 | 11 | ## Supported detect-secrets Plugins 12 | 13 | Following is a list of the different types of authentication information that is scanned for by the NB Defense: 14 | 15 | 1. API keys and tokens - Application Programming Interface (API) keys and tokens will be detected by NB Defense scan. They are typically used to authenticate a calling program communicating with an API. 16 | 2. Amazon Web Services (AWS) key - AWS key is used to gain access to all the AWS resources. 17 | 3. Azure storage key - Azure storage key is used to authorize access to data in Azure storage account. 18 | 4. Basic authentication - HTTP web browser authentication is usually undertaken using a username and password when making a request such as accessing email. 19 | 5. Cloudant detector - The Cloudant detector is a unique identifier for a document in a database. 20 | 6. Discord bot token - The Artificial Intelligence (AI) driven bots that can automate tasks for moderating server. 21 | 7. GitHub token - GitHub token is an alternative to password when using the GitHub API or the command line for accessing GitHub resources. 22 | 8. Base64 High Entropy String - Strings with high entropy that appear to be Base64 encoded. 23 | 9. Hex High Entropy String - Strings with high entropy that appear to be in a hexadecimal format. 24 | 10. IBM Cloud Identity and Access Management 25 | 11. IBM Hash-based Message Authentication Code (HMAC) - IBM HMAC credentials consist of an Access Key and Secret Key paired for use with S3-compatible tools and libraries that require authentication. 26 | 12. JSON Web Token - JSON Web Tokens (JWT) are an open, industry standard RFC 7519 method for representing claims securely between two parties. 27 | 13. Keywords - Keywords are used for finding tokens and passwords such as 'The password is: **\***'. The word password is a keyword since it gives context. 28 | 14. Mailchimp - Mailchimp is used by marketers for promotional emails. NB Defense detection scans for login information for Mailchimp. 29 | 15. Node Package Manager (NPM) - NPM detection scans for login information for NPM. 30 | 16. Private Key - Private key detection is used for detecting any private or secret keys that could be used for data encryption. 31 | 17. SendGrid - SendGrid is an email marketing provider used for promotional emails. 32 | 18. Slack - Slack is a messaging application used by businesses for connecting their team. 33 | 19. Softlayer - IBM SoftLayer is a public cloud computing platform that offers a range of services, such as compute, storage, and application development. 34 | 20. SquareOAuth - The OAuth allows for sharing of information between services without exposing a password. 35 | 21. Stripe - Stripe enables online payment processing for online businesses. 36 | 22. Twilio - Twilio is a customer engagement platform which is programmable. 37 | 38 | ### Plugin Options 39 | 40 | There are a few plugins that have their own options: 41 | 42 | | Plugin | Option | Description | 43 | | ------------------------- | ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | 44 | | `Base64HighEntropyString` | `limit` | Sets the [Shannon entropy](https://rzepsky.medium.com/hunting-for-secrets-with-the-dumpsterdiver-93d38a9cd4c1) limit for high entropy strings. Value must be between 0.0 and 8.0, defaults to 4.5. | 45 | | `HexHighEntropyString` | `limit` | Sets the [Shannon entropy](https://rzepsky.medium.com/hunting-for-secrets-with-the-dumpsterdiver-93d38a9cd4c1) limit for high entropy strings. Value must be between 0.0 and 8.0, defaults to 3.0. | 46 | | `KeywordDetector` | `keyword_exclude` | Specify a regex string to exclude certain strings from the secrets scan. | 47 | -------------------------------------------------------------------------------- /docs/docs/supported-scans/images/nbd-cli-cve-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/supported-scans/images/nbd-cli-cve-results.png -------------------------------------------------------------------------------- /docs/docs/supported-scans/images/nbd-cli-license-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/supported-scans/images/nbd-cli-license-results.png -------------------------------------------------------------------------------- /docs/docs/supported-scans/images/nbd-jle-cve-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/supported-scans/images/nbd-jle-cve-results.png -------------------------------------------------------------------------------- /docs/docs/supported-scans/images/nbd-jle-license-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/supported-scans/images/nbd-jle-license-results.png -------------------------------------------------------------------------------- /docs/docs/supported-scans/images/nbd-jle-scan-pii-found.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/docs/supported-scans/images/nbd-jle-scan-pii-found.png -------------------------------------------------------------------------------- /docs/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: "NB Defense: Secure Jupyter Notebooks" 2 | site_url: "https://nbdefense.ai" 3 | 4 | repo_name: protectai/nbdefense 5 | repo_url: https://github.com/protectai/nbdefense 6 | edit_uri: "" 7 | 8 | copyright: Copyright © 2023 9 | 10 | theme: 11 | name: material 12 | home: home.html 13 | custom_dir: overrides 14 | features: 15 | - navigation.footer 16 | - navigation.tabs 17 | - navigation.tabs.sticky 18 | - navigation.indexes 19 | palette: 20 | - scheme: default 21 | toggle: 22 | icon: material/brightness-7 23 | name: Switch to dark mode 24 | - scheme: slate 25 | toggle: 26 | icon: material/brightness-4 27 | name: Switch to light mode 28 | logo: assets/imgs/top-logo.svg 29 | favicon: assets/imgs/favicon.svg 30 | 31 | nav: 32 | - Home: "index.md" 33 | - "Getting Started": 34 | - "getting-started/index.md" 35 | - "CLI": "getting-started/cli.md" 36 | - "JupyterLab Extension": "getting-started/jupyter-lab-extension.md" 37 | - "JupyterLab Extension on Cloud Services": 38 | - "getting-started/cloud-services/index.md" 39 | - "SageMaker Notebook Instances": "getting-started/cloud-services/sage-maker-notebook-instances.md" 40 | - "Sage Maker Studio Lab": "getting-started/cloud-services/sage-maker-studio-lab.md" 41 | - "Azure ML Notebooks": "getting-started/cloud-services/azure-ml-notebooks.md" 42 | - "SageMaker Studio - COMING SOON": "getting-started/cloud-services/sage-maker-studio.md" 43 | - "Supported Scans": 44 | - "Secrets Detection": "supported-scans/detecting-secrets.md" 45 | - "PII Detection": "supported-scans/detecting-PII.md" 46 | - "CVE Detection": "supported-scans/detecting-CVEs.md" 47 | - "License Detection": "supported-scans/detecting-licenses.md" 48 | - "Scan Settings": 49 | - "scan-settings/index.md" 50 | - "JupyterLab Extension Settings": "scan-settings/jupyterlab-settings.md" 51 | - "CLI Settings": "scan-settings/cli-settings.md" 52 | - "CLI Flags": "scan-settings/cli-flags.md" 53 | - "FAQ": 54 | - "FAQ": "faq.md" 55 | - "About Us": "about-us.md" 56 | 57 | markdown_extensions: 58 | - pymdownx.highlight: 59 | anchor_linenums: true 60 | - pymdownx.details 61 | - pymdownx.inlinehilite 62 | - pymdownx.superfences 63 | - attr_list 64 | - lightgallery 65 | - admonition 66 | 67 | plugins: 68 | - search 69 | - meta-descriptions: 70 | export_csv: false 71 | quiet: false 72 | enable_checks: false 73 | min_length: 10 74 | max_length: 160 75 | - git-revision-date-localized: 76 | enable_creation_date: true 77 | - git-authors: 78 | sort_authors_by: contribution 79 | - minify: 80 | minify_js: true 81 | minify_css: true 82 | cache_safe: true 83 | js_files: 84 | - assets/js/timeago_mkdocs.js 85 | css_files: 86 | - assets/css/extra.css 87 | 88 | extra: 89 | social: 90 | - icon: fontawesome/brands/github 91 | link: https://github.com/protectai/nbdefense 92 | - icon: fontawesome/brands/linkedin 93 | link: https://www.linkedin.com/company/protect-ai/ 94 | - icon: fontawesome/brands/slack 95 | link: https://mlsecops.slack.com 96 | - icon: social-discussion 97 | link: https://github.com/protectai/nbdefense/discussions 98 | generator: false 99 | 100 | extra_javascript: 101 | - https://cdnjs.cloudflare.com/ajax/libs/timeago.js/4.0.2/timeago.min.js 102 | - assets/js/timeago_mkdocs.js 103 | 104 | extra_css: 105 | - assets/css/extra.css 106 | -------------------------------------------------------------------------------- /docs/overrides/.icons/social-discussion.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/overrides/.icons/social-github.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /docs/overrides/.icons/social-linkedin.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /docs/overrides/.icons/social-slack.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /docs/overrides/assets/css/extra.css: -------------------------------------------------------------------------------- 1 | /* Define Font-Family */ 2 | @font-face { 3 | font-family: "SF Pro Display"; 4 | src: url("../fonts/SF-Pro-Display-Bold.woff"); 5 | } 6 | @font-face { 7 | font-family: "SF Pro Text"; 8 | src: url("../fonts/sf-pro-text-regular.woff"); 9 | } 10 | @font-face { 11 | font-family: "SF Pro Text Bold"; 12 | src: url("../fonts/sf-pro-text-bold.woff"); 13 | } 14 | 15 | /* Define Variable */ 16 | :root > * { 17 | --copyright-text-color: #dfdfdf; 18 | --copyright-logo-color: white; 19 | 20 | /* Links */ 21 | --md-typeset-a-color: #138bff; 22 | --md-accent-fg-color: #fe9a2d; 23 | } 24 | 25 | /* Define stylesheet */ 26 | [data-md-color-scheme="default"] { 27 | /* Header */ 28 | --md-primary-fg-color: #384860; 29 | 30 | /* Footer */ 31 | --md-footer-bg-color: white; 32 | --md-footer-bg-color--dark: #384860; 33 | --md-footer-fg-color: black; 34 | } 35 | [data-md-color-scheme="slate"] { 36 | /* Header */ 37 | --md-primary-fg-color: #010101; 38 | 39 | /* Main */ 40 | --md-default-bg-color: #061a38; 41 | --md-default-fg-color--light: white; 42 | 43 | /* Footer */ 44 | --md-footer-bg-color: #061a38; 45 | --md-footer-bg-color--dark: #061a38; 46 | 47 | /* Footer - Copyright */ 48 | --copyright-text-color: #969696; 49 | --copyright-logo-color: #dfdfdf; 50 | } 51 | 52 | /* Header Customization */ 53 | .custom-copyright { 54 | font-family: "Roboto"; 55 | font-style: normal; 56 | font-weight: 400; 57 | font-size: 14px; 58 | line-height: 16px; 59 | color: var(--copyright-text-color); 60 | 61 | display: flex; 62 | flex-direction: row; 63 | gap: 8px; 64 | } 65 | .custom-copyright svg { 66 | color: var(--copyright-logo-color); 67 | } 68 | .custom-copyright svg:hover { 69 | color: white; 70 | } 71 | 72 | #top-logo { 73 | height: 24px; 74 | width: 98px; 75 | } 76 | 77 | /* Footer Customization */ 78 | .md-footer-meta { 79 | padding: 24px 0px; 80 | } 81 | .md-footer-meta__inner { 82 | position: relative; 83 | 84 | align-items: center; 85 | justify-content: center; 86 | 87 | margin-left: auto; 88 | margin-right: auto; 89 | max-width: 61rem; 90 | } 91 | .md-social { 92 | flex: 0; 93 | 94 | position: absolute; 95 | right: 4px; 96 | 97 | display: flex; 98 | } 99 | html .md-footer-meta.md-typeset a:is(:focus, :hover) { 100 | color: white; 101 | } 102 | 103 | /* Copyright customization */ 104 | .protectai-logo { 105 | padding-left: 4px; 106 | } 107 | 108 | @media (max-width: 500px) { 109 | .md-social { 110 | flex: 1; 111 | position: inherit; 112 | } 113 | .md-footer-meta__inner { 114 | flex-direction: column; 115 | } 116 | } 117 | 118 | /* Search Bar in header customization */ 119 | .md-search__form { 120 | background-color: hsla(0, 0%, 100%, 0.06); 121 | } 122 | 123 | /* table width customization */ 124 | .md-typeset table:not([class]) th { 125 | min-width: 10rem; 126 | } 127 | 128 | /* md-button customization */ 129 | .md-typeset .md-button--primary { 130 | padding: 12px 20px; 131 | background-color: #138bff; 132 | border: 1px solid #138bff; 133 | border-radius: 4px; 134 | 135 | font-family: "SF Pro Text"; 136 | font-style: normal; 137 | font-weight: 600; 138 | font-size: 14px; 139 | line-height: 24px; 140 | 141 | color: white; 142 | } 143 | .md-typeset .md-button--primary:hover { 144 | background-color: white; 145 | border: 1px solid #138bff; 146 | color: #138bff; 147 | } 148 | 149 | /* Git commit information */ 150 | .md-source-file { 151 | display: flex; 152 | flex-direction: column; 153 | gap: 8px; 154 | } 155 | 156 | .md-source-file__fact { 157 | display: flex; 158 | flex-direction: row; 159 | gap: 8px; 160 | } 161 | 162 | .md-source-file__fact .md-icon { 163 | min-width: 24px; 164 | min-height: 24px; 165 | } -------------------------------------------------------------------------------- /docs/overrides/assets/fonts/SF-Pro-Display-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/overrides/assets/fonts/SF-Pro-Display-Bold.woff -------------------------------------------------------------------------------- /docs/overrides/assets/fonts/lg.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/overrides/assets/fonts/lg.ttf -------------------------------------------------------------------------------- /docs/overrides/assets/fonts/lg.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/overrides/assets/fonts/lg.woff -------------------------------------------------------------------------------- /docs/overrides/assets/fonts/sf-pro-text-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/overrides/assets/fonts/sf-pro-text-bold.woff -------------------------------------------------------------------------------- /docs/overrides/assets/fonts/sf-pro-text-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/overrides/assets/fonts/sf-pro-text-regular.woff -------------------------------------------------------------------------------- /docs/overrides/assets/imgs/background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/overrides/assets/imgs/background.png -------------------------------------------------------------------------------- /docs/overrides/assets/imgs/branch-bottom.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/overrides/assets/imgs/circle-arrow.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /docs/overrides/assets/imgs/favicon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /docs/overrides/assets/imgs/footer-logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /docs/overrides/assets/imgs/git-contributors.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /docs/overrides/assets/imgs/git-created.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /docs/overrides/assets/imgs/git-updated.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /docs/overrides/assets/imgs/loading.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/docs/overrides/assets/imgs/loading.gif -------------------------------------------------------------------------------- /docs/overrides/assets/imgs/vulnerability-cve.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/overrides/assets/imgs/vulnerability-pii.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /docs/overrides/assets/imgs/vulnerability-secrets.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /docs/overrides/assets/imgs/vulnerability-third-party.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /docs/overrides/assets/js/timeago_mkdocs.js: -------------------------------------------------------------------------------- 1 | const nodes = document.querySelectorAll(".timeago"); 2 | if (nodes.length > 0) { 3 | const locale = nodes[0].getAttribute("locale"); 4 | timeago.render(nodes, locale); 5 | } -------------------------------------------------------------------------------- /docs/overrides/main.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block extrahead %} 3 | {% endblock %} 4 | 5 | 6 | {% block article %} 7 | {{ super() }} 8 | {% include "partials/content.html" %} 9 | {% endblock %} 10 | -------------------------------------------------------------------------------- /docs/overrides/partials/content.html: -------------------------------------------------------------------------------- 1 | {% if "\x3ch1" not in page.content %} 2 |

{{ page.title | d(config.site_name, true)}}

3 | {% endif %} 4 | 5 | {{ page.content }} 6 | 7 | 8 | {% include "partials/source-file.html" %} 9 | -------------------------------------------------------------------------------- /docs/overrides/partials/copyright.html: -------------------------------------------------------------------------------- 1 | {% if config.copyright %} 2 | 3 | {{ config.copyright }} 4 | 5 | {% include "assets/imgs/footer-logo.svg" %} 6 | 7 | 8 | {% endif %} 9 | -------------------------------------------------------------------------------- /docs/overrides/partials/footer.html: -------------------------------------------------------------------------------- 1 | 65 | -------------------------------------------------------------------------------- /docs/overrides/partials/header.html: -------------------------------------------------------------------------------- 1 | {% set class = "md-header" %} 2 | {% if "navigation.tabs.sticky" in features %} 3 | {% set class = class ~ " md-header--shadow md-header--lifted" %} 4 | {% elif "navigation.tabs" not in features %} 5 | {% set class = class ~ " md-header--shadow" %} 6 | {% endif %} 7 |
8 | 109 | {% if "navigation.tabs.sticky" in features %} 110 | {% if "navigation.tabs" in features %} 111 | {% include "partials/tabs.html" %} 112 | {% endif %} 113 | {% endif %} 114 |
115 | -------------------------------------------------------------------------------- /docs/overrides/partials/logo.html: -------------------------------------------------------------------------------- 1 | {% if config.theme.logo %} 2 | {% include config.theme.logo or url %} 3 | {% else %} 4 | {% set icon = config.theme.icon.logo or "material/library" %} 5 | {% include ".icons/" ~ icon ~ ".svg" %} 6 | {% endif %} 7 | -------------------------------------------------------------------------------- /docs/overrides/partials/source-file.html: -------------------------------------------------------------------------------- 1 | 36 | -------------------------------------------------------------------------------- /docs/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "nbdefense-docs" 3 | version = "0.0.0" 4 | description = "NB Defense Documentation" 5 | authors = ["ProtectAI "] 6 | license = "Apache 2.0" 7 | readme = "README.md" 8 | 9 | [tool.poetry.dependencies] 10 | python = ">=3.8,<3.11" 11 | mkdocs = "1.4.2" 12 | mkdocs-material = "8.5.10" 13 | mkdocs-meta-descriptions-plugin = "2.2.0" 14 | mkdocs-git-authors-plugin = "0.7.0" 15 | mkdocs-git-revision-date-localized-plugin = "1.2.0" 16 | lightgallery = "0.5" 17 | mkdocs-minify-plugin = "^0.6.4" 18 | -------------------------------------------------------------------------------- /nbdefense/__init__.py: -------------------------------------------------------------------------------- 1 | """CLI for scanning notebooks""" 2 | 3 | from ._version import __version__ 4 | -------------------------------------------------------------------------------- /nbdefense/_version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.0" 2 | -------------------------------------------------------------------------------- /nbdefense/constants.py: -------------------------------------------------------------------------------- 1 | TEMPDIR = "./.tmp" 2 | 3 | # Plugins are loaded by the order of their priority. 4 | # Highest priority plugins are loaded first. 5 | SCANNING_PLUGINS = [ 6 | "nbdefense.plugins.PIIPlugin", 7 | "nbdefense.plugins.SecretsPlugin", 8 | "nbdefense.plugins.LicenseDependencyFilePlugin", 9 | "nbdefense.plugins.CVEDependencyFilePlugin", 10 | ] 11 | 12 | AVAILABLE_REPORTING_MODULES = { 13 | "json": "nbdefense.reports.JsonReport", 14 | "html": "nbdefense.reports.HTMLReport", 15 | } 16 | 17 | DEFAULT_REPORTING_MODULE = AVAILABLE_REPORTING_MODULES["html"] 18 | 19 | DEFAULT_SETTINGS = { 20 | "redact_secret": "PARTIAL", 21 | "trivy_binary_path": "", 22 | "plugins": { 23 | "nbdefense.plugins.SecretsPlugin": { 24 | "enabled": True, 25 | "secrets_plugins": [ 26 | {"name": "SoftlayerDetector"}, 27 | {"name": "StripeDetector"}, 28 | {"name": "SendGridDetector"}, 29 | {"name": "NpmDetector"}, 30 | {"name": "KeywordDetector", "keyword_exclude": ""}, 31 | {"name": "IbmCosHmacDetector"}, 32 | {"name": "DiscordBotTokenDetector"}, 33 | {"name": "BasicAuthDetector"}, 34 | {"name": "AzureStorageKeyDetector"}, 35 | {"name": "ArtifactoryDetector"}, 36 | {"name": "AWSKeyDetector"}, 37 | {"name": "CloudantDetector"}, 38 | {"name": "GitHubTokenDetector"}, 39 | {"name": "IbmCloudIamDetector"}, 40 | {"name": "JwtTokenDetector"}, 41 | {"name": "MailchimpDetector"}, 42 | {"name": "PrivateKeyDetector"}, 43 | {"name": "SlackDetector"}, 44 | {"name": "SquareOAuthDetector"}, 45 | {"name": "TwilioKeyDetector"}, 46 | {"name": "Base64HighEntropyString", "limit": 4.5}, 47 | {"name": "HexHighEntropyString", "limit": 3.0}, 48 | ], 49 | }, 50 | "nbdefense.plugins.PIIPlugin": { 51 | "enabled": True, 52 | "confidence_threshold": 0.8, 53 | "entities": { 54 | "US_PASSPORT": True, 55 | "AU_MEDICARE": True, 56 | "AU_TFN": True, 57 | "AU_ACN": True, 58 | "AU_ABN": True, 59 | "UK_NHS": True, 60 | "US_SSN": True, 61 | "US_ITIN": True, 62 | "US_DRIVER_LICENSE": True, 63 | "US_BANK_NUMBER": True, 64 | "MEDICAL_LICENSE": True, 65 | "LOCATION": True, 66 | "PHONE_NUMBER": True, 67 | "NRP": True, 68 | "IP_ADDRESS": True, 69 | "EMAIL_ADDRESS": True, 70 | "IBAN_CODE": True, 71 | "CRYPTO": True, 72 | "CREDIT_CARD": True, 73 | "PERSON": True, 74 | }, 75 | }, 76 | "nbdefense.plugins.LicenseDependencyFilePlugin": { 77 | "enabled": True, 78 | "accepted_licenses": [ 79 | "Apache License 2.0", 80 | "Apache Software License", 81 | "Apache 2.0", 82 | "Apache-2.0", 83 | "BSD", 84 | "BSD License", 85 | "BSD 3-Clause", 86 | "BSD-3-Clause", 87 | "GNU Library or Lesser General Public License (LGPL)", 88 | "Microsoft Public License", 89 | "MIT", 90 | "MIT License", 91 | "Python Software Foundation License", 92 | "ISC License (ISCL)", 93 | "MIT-0", 94 | ], 95 | }, 96 | "nbdefense.plugins.CVEDependencyFilePlugin": {"enabled": True}, 97 | "nbdefense.plugins.LicenseNotebookPlugin": { 98 | "enabled": True, 99 | "accepted_licenses": [ 100 | "Apache License 2.0", 101 | "Apache Software License", 102 | "Apache 2.0", 103 | "Apache-2.0", 104 | "BSD", 105 | "BSD License", 106 | "BSD 3-Clause", 107 | "BSD-3-Clause", 108 | "GNU Library or Lesser General Public License (LGPL)", 109 | "Microsoft Public License", 110 | "MIT", 111 | "MIT License", 112 | "Python Software Foundation License", 113 | "ISC License (ISCL)", 114 | "MIT-0", 115 | ], 116 | "licenses_for_notebooks_source": "HYBRID", 117 | }, 118 | "nbdefense.plugins.CVENotebookPlugin": {"enabled": True}, 119 | }, 120 | } 121 | -------------------------------------------------------------------------------- /nbdefense/dependencies.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from typing import Dict, List 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | 8 | class DependencyInfo: 9 | def __init__( 10 | self, name: str, version: str, dist_info_path: Path, dist_info_name: str 11 | ) -> None: 12 | self.name: str = name 13 | self.version: str = version 14 | self.dist_info_path: Path = dist_info_path 15 | self.dist_info_name: str = dist_info_name 16 | 17 | 18 | class ThirdPartyDependencies: 19 | def __init__(self, site_packages_path: Path) -> None: 20 | self._site_packages_path: Path = site_packages_path 21 | self._dependencies_in_env: List[DependencyInfo] = [] 22 | self._dependencies_by_module: Dict[str, DependencyInfo] = {} 23 | self._load() 24 | 25 | def _load(self) -> None: 26 | self._parse_dependencies_available_in_env() 27 | self._parse_modules_from_dependencies_available() 28 | 29 | @property 30 | def dependencies_in_env(self) -> List[DependencyInfo]: 31 | return self._dependencies_in_env 32 | 33 | @property 34 | def dependencies_by_module(self) -> Dict[str, DependencyInfo]: 35 | return self._dependencies_by_module 36 | 37 | def _parse_dependencies_available_in_env(self) -> None: 38 | for directory in self._site_packages_path.iterdir(): 39 | directory_name = directory.name 40 | if directory_name.endswith(".dist-info"): 41 | # Parse package information from dist-info folder name 42 | package_details = directory_name.replace(".dist-info", "").split("-") 43 | if len(package_details) != 2: 44 | raise NotImplementedError("Could not Parse Package Details") 45 | self._dependencies_in_env.append( 46 | DependencyInfo( 47 | name=package_details[0], 48 | version=package_details[1], 49 | dist_info_path=directory, 50 | dist_info_name=directory_name, 51 | ) 52 | ) 53 | 54 | def _parse_modules_from_dependencies_available(self) -> None: 55 | # Directories installed by a package that shouldn't be considered importable modules 56 | invalid_root_directories = [ 57 | "_pytest", 58 | "/", 59 | "..", 60 | "__pycache__", 61 | "__pytest", 62 | ] 63 | for dependency in self._dependencies_in_env: 64 | # Parse Record File 65 | record_file: Path = dependency.dist_info_path / "RECORD" 66 | if record_file.exists(): 67 | for file_line in open(record_file): 68 | file_line = file_line.split(",")[0] 69 | path = Path(file_line) 70 | root_of_directory = path.parts[0].replace(".py", "") 71 | module = self._site_packages_path / root_of_directory 72 | if ( 73 | (module.is_dir() or root_of_directory.endswith(".py")) 74 | and not root_of_directory.startswith("_") 75 | and root_of_directory not in invalid_root_directories 76 | and root_of_directory != dependency.dist_info_name 77 | and root_of_directory not in self._dependencies_by_module.keys() 78 | ): 79 | self._dependencies_by_module[root_of_directory] = dependency 80 | else: 81 | logger.warning( 82 | f"No RECORD file exists for {dependency.name}=={dependency.version}. Could not link dependency to modules." 83 | ) 84 | -------------------------------------------------------------------------------- /nbdefense/errors.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Any, Dict, Optional 3 | 4 | 5 | class ErrorType(Enum): 6 | # Error occurred in the handle_binary_dependencies method 7 | DEPENDENCY_CHECK = "dependency check" 8 | # Error occurred in the scan method 9 | SCAN = "scan" 10 | # Error occurred in the report generation 11 | REPORT = "report generation" 12 | 13 | 14 | class NBDefenseError: 15 | error_type: ErrorType 16 | plugin_name: str 17 | message: Optional[str] 18 | 19 | def __init__( 20 | self, error_type: ErrorType, plugin_name: str, message: Optional[str] = None 21 | ) -> None: 22 | self.error_type = error_type 23 | self.plugin_name = plugin_name 24 | self.message = message if message else "None" 25 | 26 | def __str__(self) -> str: 27 | return f"Error occurred in the {self.error_type.value} portion for the {self.plugin_name} plugin with the message: {self.message}" 28 | 29 | def to_json(self) -> Dict[str, Any]: 30 | return { 31 | "error_type": self.error_type.name, 32 | "plugin_name": self.plugin_name, 33 | "message": self.message, 34 | } 35 | -------------------------------------------------------------------------------- /nbdefense/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | from nbdefense.plugins.cve.cve_dependency_file_plugin import ( 2 | CVEDependencyFilePlugin, 3 | ) 4 | from nbdefense.plugins.cve.cve_notebooks_plugin import CVENotebookPlugin 5 | from nbdefense.plugins.licenses.licenses_dependency_file_plugin import ( 6 | LicenseDependencyFilePlugin, 7 | ) 8 | from nbdefense.plugins.licenses.licenses_notebooks_plugin import ( 9 | LicenseNotebookPlugin, 10 | ) 11 | from nbdefense.plugins.pii import PIIPlugin 12 | from nbdefense.plugins.secrets import SecretsPlugin 13 | -------------------------------------------------------------------------------- /nbdefense/plugins/cve/cve_dependency_file_plugin.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from pathlib import Path 4 | from typing import Any, Dict, List, Optional 5 | 6 | from rich import print 7 | 8 | from nbdefense.codebase import Codebase 9 | from nbdefense.errors import ErrorType, NBDefenseError 10 | from nbdefense.issues import Issue, IssueCode, IssueDetails 11 | from nbdefense.plugins.cve.cve_plugin import CVEPlugin 12 | from nbdefense.plugins.plugin import ScanTarget 13 | from nbdefense.settings import Settings 14 | from nbdefense.tools import Trivy 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class DependencyIssueDetails(IssueDetails): 20 | def __init__(self, vulnerability: Dict[str, Any], file_path: Path) -> None: 21 | self.vulnerability = vulnerability 22 | self.file_path = file_path 23 | 24 | def to_json(self) -> Dict[str, Any]: 25 | return { 26 | "file_path": str(self.file_path), 27 | "results": self.vulnerability, 28 | } 29 | 30 | 31 | class CVEDependencyFilePlugin(CVEPlugin): 32 | def __init__(self) -> None: 33 | super().__init__() 34 | 35 | @staticmethod 36 | def scan_target() -> ScanTarget: 37 | return ScanTarget.DEPENDENCIES 38 | 39 | @staticmethod 40 | def name() -> str: 41 | return "CVE Plugin for Dependency Files" 42 | 43 | @staticmethod 44 | def scan( 45 | codebase: Codebase, settings: Optional[Settings] = None 46 | ) -> List[NBDefenseError]: 47 | if codebase.quiet: 48 | logger.setLevel(logging.CRITICAL) 49 | if not codebase.requirements_file_path: 50 | print( 51 | f"[yellow]Skipping {CVEDependencyFilePlugin.name()} scan as it requires a dependency file. (Plugin should have been skipped)[/yellow]" 52 | ) 53 | return [ 54 | NBDefenseError( 55 | ErrorType.SCAN, 56 | CVEDependencyFilePlugin.name(), 57 | f"CVE scan was skipped because a dependency file was not provided.", 58 | ) 59 | ] 60 | 61 | initialTrivyBinaryPath = settings.get("trivy_binary_path") if settings else "" 62 | trivy = Trivy(initialTrivyBinaryPath) 63 | _, stdout, _ = trivy.execute(str(codebase.requirements_file_path)) 64 | if stdout: 65 | stdoutstring = b"".join(stdout) 66 | vulnerabilities = CVEPlugin.extract_results(json.loads(stdoutstring)) 67 | if vulnerabilities: 68 | logger.warning(f"Scan found {len(vulnerabilities)} vulnerabilities") 69 | for vulnerability in vulnerabilities: 70 | codebase.issues.add_issue( 71 | Issue( 72 | code=IssueCode.VULNERABLE_DEPENDENCY_DEP_FILE, 73 | severity=CVEPlugin.vulnerability_severity(vulnerability), 74 | details=DependencyIssueDetails( 75 | vulnerability=vulnerability, 76 | file_path=codebase.requirements_file_path, 77 | ), 78 | ) 79 | ) 80 | else: 81 | print("Scan found no vulnerabilities") 82 | return [] 83 | -------------------------------------------------------------------------------- /nbdefense/plugins/cve/cve_plugin.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | from rich import print 4 | from rich.prompt import Confirm 5 | 6 | from nbdefense.issues import Severity 7 | from nbdefense.plugins.plugin import Plugin 8 | from nbdefense.settings import Settings 9 | from nbdefense.tools import Trivy 10 | 11 | 12 | class CVEPluginSettings(Settings): 13 | def __init__( 14 | self, plugin_class_name: str, is_enabled: bool, overrides: Dict[str, Any] 15 | ) -> None: 16 | super().__init__(plugin_class_name, is_enabled, overrides) 17 | 18 | 19 | class CVEPlugin(Plugin): 20 | def __init__(self) -> None: 21 | super().__init__() 22 | 23 | @staticmethod 24 | def handle_binary_dependencies( 25 | quiet: bool, yes: bool, settings: Optional[Settings] = None 26 | ) -> bool: 27 | initialTrivyBinaryPath = settings.get("trivy_binary_path") if settings else "" 28 | trivy = Trivy(initialTrivyBinaryPath) 29 | if not quiet: 30 | print("Checking for trivy...") 31 | if not trivy.installed(): 32 | print("trivy not found.") 33 | if yes: 34 | trivy.install() 35 | else: 36 | confirmation = Confirm.ask( 37 | "[bold]Do you want to install trivy from https://github.com/aquasecurity/trivy?[/bold]", 38 | default=True, 39 | show_default=True, 40 | ) 41 | if confirmation: 42 | trivy.install() 43 | else: 44 | return False 45 | else: 46 | print("trivy found.") 47 | else: 48 | if not trivy.installed(): 49 | trivy.install() 50 | return trivy.installed() 51 | 52 | @staticmethod 53 | def get_settings( 54 | plugin_class_name: str, is_enabled: bool = True, overrides: Dict[str, Any] = {} 55 | ) -> CVEPluginSettings: 56 | return CVEPluginSettings(plugin_class_name, is_enabled, overrides) 57 | 58 | @staticmethod 59 | def vulnerability_severity(vulnerability: Dict[str, Any]) -> Severity: 60 | if vulnerability["Severity"] == "CRITICAL": 61 | return Severity.CRITICAL 62 | if vulnerability["Severity"] == "HIGH": 63 | return Severity.HIGH 64 | if vulnerability["Severity"] == "MEDIUM": 65 | return Severity.MEDIUM 66 | if vulnerability["Severity"] == "LOW": 67 | return Severity.LOW 68 | return Severity.HIGH 69 | 70 | @staticmethod 71 | def extract_results(trivy_json: Dict[Any, Any]) -> List[Dict[str, Any]]: 72 | try: 73 | results = trivy_json.get("Results", []) 74 | vulnerabilities: List[Dict[str, Any]] = results[0].get( 75 | "Vulnerabilities", [] 76 | ) 77 | return vulnerabilities 78 | except IndexError: 79 | return [] 80 | -------------------------------------------------------------------------------- /nbdefense/plugins/licenses/license_plugin_settings.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Any, Dict, List 3 | 4 | from nbdefense.settings import Settings, UnknownSettingsValueError 5 | 6 | 7 | class LicensePluginSource(str, Enum): 8 | HYBRID = "hybrid" # Looks for licenses in local pip environment and uses the pypi api when a package is not installed locally 9 | PYPI = "pypi" # Scans for licenses using PYPI api only 10 | LOCAL = "local" # Scans for licenses in local pip environment only 11 | 12 | def to_json(self) -> str: 13 | return self.name 14 | 15 | 16 | class LicensePluginSettings(Settings): 17 | def __init__( 18 | self, plugin_class_name: str, is_enabled: bool, settings: Dict[str, Any] = {} 19 | ) -> None: 20 | super().__init__(plugin_class_name, is_enabled, settings) 21 | 22 | def get_accepted_licenses(self) -> List[str]: 23 | accepted_licenses: List[str] = super().get("accepted_licenses") 24 | if isinstance(accepted_licenses, list): 25 | return accepted_licenses 26 | else: 27 | raise UnknownSettingsValueError("accepted_licenses", accepted_licenses) 28 | 29 | def get_licenses_for_notebooks_source(self) -> LicensePluginSource: 30 | source: str = str(super().get("licenses_for_notebooks_source")).lower() 31 | if ( 32 | source == LicensePluginSource.LOCAL 33 | or source == LicensePluginSource.HYBRID 34 | or source == LicensePluginSource.PYPI 35 | ): 36 | return LicensePluginSource(source) 37 | raise UnknownSettingsValueError("licenses_for_notebooks_source", source) 38 | -------------------------------------------------------------------------------- /nbdefense/plugins/licenses/licenses_dependency_file_plugin.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List 3 | 4 | from rich import print 5 | 6 | from nbdefense.codebase import Codebase, DependencyIssue 7 | from nbdefense.errors import ErrorType, NBDefenseError 8 | from nbdefense.issues import Issue, IssueCode, Severity 9 | from nbdefense.plugins.licenses.license_plugin import ( 10 | LicensePlugin, 11 | UnapprovedLicenseIssueDetails, 12 | ) 13 | from nbdefense.plugins.licenses.license_plugin_settings import ( 14 | LicensePluginSettings, 15 | LicensePluginSource, 16 | ) 17 | from nbdefense.plugins.plugin import ScanTarget 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | class LicenseDependencyFilePlugin(LicensePlugin): 23 | def __init__(self) -> None: 24 | super().__init__() 25 | 26 | @staticmethod 27 | def scan_target() -> ScanTarget: 28 | return ScanTarget.DEPENDENCIES 29 | 30 | @staticmethod 31 | def name() -> str: 32 | return "License Plugin for Dependency Files" 33 | 34 | @staticmethod 35 | def scan(cb: Codebase, settings: LicensePluginSettings) -> List[NBDefenseError]: # type: ignore[override] 36 | if cb.quiet: 37 | logger.setLevel(logging.CRITICAL) 38 | 39 | if not cb.requirements_file_path: 40 | print( 41 | f"[yellow]Skipping {LicenseDependencyFilePlugin.name()} scan as it requires a dependency file. (Plugin should have been skipped)[/yellow]" 42 | ) 43 | return [ 44 | NBDefenseError( 45 | ErrorType.SCAN, 46 | LicenseDependencyFilePlugin.name(), 47 | f"License scan was skipped because a dependency file was not provided.", 48 | ) 49 | ] 50 | 51 | # Add issue if package is not installed in environment. 52 | packages = LicensePlugin.get_licenses( 53 | cb.requirements_file_dependencies, 54 | cb.temp_directory, 55 | LicensePluginSource.PYPI, 56 | ) 57 | 58 | # Add issue unapproved packages 59 | accepted_licenses = settings.get_accepted_licenses() 60 | 61 | # Add errors that occur when scanning 62 | scan_errors = [] 63 | for package in packages: 64 | try: 65 | unapproved_licenses = LicensePlugin.filter_for_unapproved_licenses( 66 | package.licenses, accepted_licenses 67 | ) 68 | if unapproved_licenses: 69 | print( 70 | f"Scan found {len(unapproved_licenses)} unapproved license(s): {unapproved_licenses} in package {package.name}." 71 | ) 72 | for license in package.licenses: 73 | cb.issues.add_issue( 74 | Issue( 75 | code=IssueCode.UNAPPROVED_LICENSE_DEP_FILE, 76 | severity=Severity.MEDIUM, 77 | details=UnapprovedLicenseIssueDetails( 78 | package_name=package.name, 79 | package_version=package.version, 80 | unapproved_license=license, 81 | file_path=cb.requirements_file_path, 82 | ), 83 | ) 84 | ) 85 | elif not package.licenses: 86 | print(f"Could not find license associated with {package.name}.") 87 | cb.issues.add_issue( 88 | Issue( 89 | code=IssueCode.LICENSE_NOT_FOUND_DEP_FILE, 90 | severity=Severity.MEDIUM, 91 | details=DependencyIssue( 92 | cb.requirements_file_path, package.name, package.version 93 | ), 94 | ) 95 | ) 96 | except Exception as error: 97 | logger.warning( 98 | f"Unable to scan package {package.name} because of the following error: {str(error)}" 99 | ) 100 | scan_errors.append( 101 | NBDefenseError( 102 | ErrorType.SCAN, 103 | LicenseDependencyFilePlugin.name(), 104 | f"License scan for package {package.name} was skipped because of the following error: {str(error)}", 105 | ) 106 | ) 107 | return scan_errors 108 | -------------------------------------------------------------------------------- /nbdefense/plugins/plugin.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from enum import Enum 3 | from typing import Any, Dict, List, Optional 4 | 5 | from nbdefense.codebase import Codebase 6 | from nbdefense.errors import NBDefenseError 7 | from nbdefense.settings import Settings 8 | 9 | 10 | class ScanTarget(Enum): 11 | NOTEBOOKS = 1 12 | DEPENDENCIES = 2 13 | 14 | 15 | class Plugin(metaclass=abc.ABCMeta): 16 | def __init__(self) -> None: 17 | pass 18 | 19 | @staticmethod 20 | def handle_binary_dependencies( 21 | quiet: bool, yes: bool, settings: Optional[Settings] = None 22 | ) -> bool: 23 | """ 24 | Implement this method if the plugin requires a binary dependency. 25 | It should perform the following actions: 26 | 27 | 1. Check if the dependency is installed 28 | 2. Prompt the user to install the dependency if it is not installed 29 | 30 | :param quiet: Whether or not to suppress output (Prompts should resolve with the default option) 31 | :param yes: When True, all user prompts should be assumed yes 32 | 33 | :returns: 34 | True if there are no dependencies or if they are all installed; 35 | False if the install failed or the user declines to install the dependency 36 | """ 37 | return True 38 | 39 | @staticmethod 40 | @abc.abstractmethod 41 | def scan_target() -> ScanTarget: 42 | """ 43 | Implement this method for all plugins. 44 | 45 | This method should return a scan target for each plugin. 46 | """ 47 | raise NotImplementedError 48 | 49 | @staticmethod 50 | @abc.abstractmethod 51 | def name() -> str: 52 | """ 53 | Implement this method for all plugins. 54 | 55 | This method should return a readable name for each plugin. 56 | """ 57 | raise NotImplementedError 58 | 59 | @staticmethod 60 | @abc.abstractmethod 61 | def get_settings( 62 | plugin_class_name: str, is_enabled: bool = True, settings: Dict[str, Any] = {} 63 | ) -> Settings: 64 | """ 65 | Implement this method for all plugins. 66 | 67 | This method should return a settings object for the plugin 68 | """ 69 | raise NotImplementedError 70 | 71 | @staticmethod 72 | @abc.abstractmethod 73 | def scan(codebase: Codebase, settings: Settings) -> List[NBDefenseError]: 74 | """ 75 | Implement this method for all plugins. 76 | 77 | This method should handle the main scan logic of that plugin. 78 | 79 | :param codebase: The codebase instance that holds issues at repo and notebook level. 80 | :param settings: The settings object for the plugin 81 | 82 | :returns: A list of the errors that occurred during the scan. 83 | """ 84 | raise NotImplementedError 85 | 86 | @staticmethod 87 | @abc.abstractmethod 88 | def redact(codebase: Codebase, settings: Optional[Settings] = None) -> None: 89 | """ 90 | Implement this method for all plugins. 91 | 92 | This method should perform any necessary redaction on detected sensitive data. 93 | 94 | :param cb: The codebase instance that holds issues at repo and notebook level. 95 | """ 96 | raise NotImplemented 97 | -------------------------------------------------------------------------------- /nbdefense/settings.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import Any, Dict 3 | 4 | import tomlkit 5 | 6 | from nbdefense.constants import DEFAULT_SETTINGS 7 | 8 | 9 | class Settings(abc.ABC): 10 | is_enabled: bool 11 | settings: Dict[str, Any] 12 | 13 | def __init__( 14 | self, plugin_class_name: str, is_enabled: bool, settings: Dict[str, Any] 15 | ) -> None: 16 | self.plugin_class_name = plugin_class_name 17 | self.is_enabled = is_enabled 18 | self.settings = settings 19 | 20 | def get(self, key: str) -> Any: 21 | # Global settings 22 | if key in self.settings: 23 | return self.settings.get(key) 24 | if ( 25 | "plugins" in self.settings 26 | and self.plugin_class_name in self.settings["plugins"] 27 | and key in self.settings["plugins"][self.plugin_class_name] 28 | ): 29 | return self.settings["plugins"][self.plugin_class_name].get(key) 30 | 31 | raise UnknownSettingsError(key) 32 | 33 | def to_json(self, limit_to_plugin: bool = False) -> Any: 34 | return ( 35 | self.settings["plugins"].get(self.plugin_class_name) 36 | if limit_to_plugin 37 | else self.settings 38 | ) 39 | 40 | 41 | class UnknownSettingsError(Exception): 42 | setting_accessed: str 43 | 44 | def __init__(self, setting_accessed: str, *args: object) -> None: 45 | self.setting_accessed = setting_accessed 46 | super().__init__(*args) 47 | 48 | def __str__(self) -> str: 49 | return f"Unknown setting: {self.setting_accessed}" 50 | 51 | 52 | class UnknownSettingsValueError(UnknownSettingsError): 53 | setting_value: str 54 | 55 | def __init__( 56 | self, setting_accessed: str, setting_value: str, *args: object 57 | ) -> None: 58 | self.setting_value = setting_value 59 | super().__init__(setting_accessed, *args) 60 | 61 | def __str__(self) -> str: 62 | return f"Setting '{self.setting_accessed}' has unknown value: '{self.setting_value}'" 63 | 64 | 65 | class SettingsUtils: 66 | @staticmethod 67 | def get_default_settings_as_toml() -> str: 68 | # TODO: Find a better way to add comments to the toml file 69 | toml_settings = tomlkit.dumps(DEFAULT_SETTINGS) 70 | 71 | # Remove notebook plugin settings 72 | toml_settings = toml_settings.split( 73 | '\n[plugins."nbdefense.plugins.LicenseNotebookPlugin"]' 74 | )[0] 75 | 76 | # Add settings file header 77 | toml_settings = f"# NB Defense settings file\n\n{toml_settings}" 78 | 79 | # Add redact_secret enum comment 80 | redact_secret_string = 'redact_secret = "PARTIAL"' # nosec 81 | redact_secret_end_index = toml_settings.find( 82 | redact_secret_string 83 | ) # + len(redact_secret_string) - 1 84 | toml_settings_list = [ 85 | toml_settings[:redact_secret_end_index], 86 | """# Redact secrets 87 | # Possible values are `PARTIAL`, `ALL`, `HASH` 88 | 89 | # `PARTIAL` will show only leading and trailing characters. 90 | 91 | # `ALL` will shadow the full secret. 92 | 93 | # `HASH` will replace the full secret with its hashed value.\n""", 94 | toml_settings[redact_secret_end_index:], 95 | ] 96 | toml_settings = "".join(toml_settings_list) 97 | 98 | return toml_settings 99 | -------------------------------------------------------------------------------- /nbdefense/templates/errors.html: -------------------------------------------------------------------------------- 1 | {% if errors %} 2 |
4 |
{% include "icons/alert-icon-critical.svg" %}
5 |
6 |

Errors:

7 |
    8 | {% for error in errors %} 9 |
  • 10 | Error occurred in the 11 | {{error.error_type.value}} 12 | portion of the 13 | {{error.plugin_name}} 14 | plugin. Message: {{error.message}} 15 |
  • 16 | {% endfor %} 17 |
18 |
19 |
20 | {% endif %} -------------------------------------------------------------------------------- /nbdefense/templates/files-scanned-dialog.html: -------------------------------------------------------------------------------- 1 | 3 |
4 |
5 |

Files scanned

6 | 7 | {% include "icons/x.svg" %} 8 | 9 |
10 |
    11 | {% for artifact_name, artifacts in artifacts_scanned.items() %} 12 |
  • {{ artifact_name | title }}
  • 13 |
      14 | {% for artifact in artifacts %} 15 |
    • {{ artifact }}
    • 16 | {% endfor %} 17 |
    18 | {% endfor %} 19 |
20 |
21 |
-------------------------------------------------------------------------------- /nbdefense/templates/footer.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nbdefense/templates/header.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 34 | 35 | 50 | 51 | NB Defense Report 52 | -------------------------------------------------------------------------------- /nbdefense/templates/icons/alert-icon-critical.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /nbdefense/templates/icons/alert-icon-high.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /nbdefense/templates/icons/alert-icon-low.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /nbdefense/templates/icons/alert-icon-medium.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /nbdefense/templates/icons/header-github.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /nbdefense/templates/icons/x.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | -------------------------------------------------------------------------------- /nbdefense/templates/issue-codes/dependency-file.html: -------------------------------------------------------------------------------- 1 |
2 | {% set issue_details = issue.details %} 3 |

{{ issue_code | replace("_", " ") | title }}

4 |

Path: {{ issue_details.file_path }}

5 |

Description:

6 |

Could not find a pinned version for package `{{ issue_details.name }}`

7 |
8 | -------------------------------------------------------------------------------- /nbdefense/templates/issue-codes/license-not-found-dep-file.html: -------------------------------------------------------------------------------- 1 |
2 | {% set issue_details = issue.details %} 3 |

License Not Found

4 |

Path: {{ issue_details.file_path }}

5 |
6 |
7 |

Package

8 |

{{issue_details.name}}

9 |
10 |
11 |

Version

12 |

{{issue_details.version}}

13 |
14 |
15 |
-------------------------------------------------------------------------------- /nbdefense/templates/issue-codes/pii-found.html: -------------------------------------------------------------------------------- 1 |
2 |

{{ issue_code | replace("_", " ") | title | replace("Pii", "PII") }}

3 |

Path: {{ issue.details.file_path }}

4 |

Description:

5 |

{{ issue.details.description }} with the following tag(s):

6 |

7 |

{{ issue.details.summary_field | to_pretty_json | scrub_html | safe }}
8 |

9 |

10 | Location: cell {{ issue.location.value }} 11 |

12 |
13 |
14 |

Cell #{{ issue.cell.cell_index }}

15 |
16 |
17 | {{ issue.cell | string() | scrub_html | safe }} 18 |
19 |
20 |
-------------------------------------------------------------------------------- /nbdefense/templates/issue-codes/secrets.html: -------------------------------------------------------------------------------- 1 |
2 |

{{ issue_code | replace("_", " ") | title }}

3 |

Path: {{ issue.details.file_path }}

4 |

Vulnerability: Exposed Secret

5 |

Description:

6 |

{{ issue.details.description | replace(",", ", ") }}

7 |

8 | Location: cell {{ issue.details.location }} 9 |

10 |
11 |
12 |

Cell #{{ issue.cell.cell_index }}

13 |
14 |
17 | {{ issue.cell | string() }} 18 |
19 |
20 |
21 | -------------------------------------------------------------------------------- /nbdefense/templates/issue-codes/unapproved-license-dep-file.html: -------------------------------------------------------------------------------- 1 |
2 | {% set issue_details = issue.details %} 3 |

Unapproved License

4 |

Path: {{ issue_details.file_path }}

5 |

License: {{issue_details.unapproved_license}}

6 |
7 |
8 |

Package

9 |

{{issue_details.package_name}}

10 |
11 |
12 |

Version

13 |

{{issue_details.package_version}}

14 |
15 |
16 |
17 | -------------------------------------------------------------------------------- /nbdefense/templates/issue-codes/vulnerable-dependency-dep-file.html: -------------------------------------------------------------------------------- 1 |
2 | {% set issue_details = issue.details %} 3 |

CVE

4 |

Path: {{ issue_details.file_path }}

5 |

6 | Vulnerability: 7 | {{ issue_details.vulnerability['VulnerabilityID'] }} 14 |

15 |
16 |
17 |

Package

18 |

{{ issue_details.vulnerability['PkgName'] }}

19 |
20 |
21 |

Affected versions

22 |

{{ issue_details.vulnerability['InstalledVersion'] }}

23 |
24 |
25 |

Fixed versions

26 |

{{ issue_details.vulnerability['FixedVersion'] }}

27 |
28 |
29 |
30 | -------------------------------------------------------------------------------- /nbdefense/templates/issue.html: -------------------------------------------------------------------------------- 1 |
4 |
5 | {% include "icons/alert-icon-"+severity|lower+".svg" %} 6 |
7 | {% include "issue-codes/"+issue_code | replace("_", "-") | lower+".html" %} 8 |
9 | -------------------------------------------------------------------------------- /nbdefense/templates/issues.html: -------------------------------------------------------------------------------- 1 |
2 |

All issues

3 | {% for severity in ["CRITICAL", "HIGH", "MEDIUM", "LOW"] %} 4 |
5 | {{ issues_by_severity[severity]|length }} 9 |

{{ severity|title }}

10 |
11 | {% if issues_by_severity[severity] | length == 0 %} 12 | {% include "no-issues.html"%} 13 | {% else %} 14 | {% for issue in issues_by_severity[severity] %} 15 | {% set issue_code = issue.code.name %} 16 | {% include "issue.html" %} 17 | {% endfor %} 18 | {% endif %} 19 | {% endfor %} 20 |
21 | -------------------------------------------------------------------------------- /nbdefense/templates/navbar.html: -------------------------------------------------------------------------------- 1 | 14 | -------------------------------------------------------------------------------- /nbdefense/templates/no-issues.html: -------------------------------------------------------------------------------- 1 |
4 |

No issues were found at this severity level

5 |
6 | -------------------------------------------------------------------------------- /nbdefense/templates/report.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% include 'header.html' %} 4 | 5 | 6 |
7 | {% include 'navbar.html' %} 8 |
9 |
10 |
11 |
12 |
13 | {{codebase_root}} 14 |
15 |
16 | {{codebase_basename}} 17 |
18 |
19 |
20 | 22 |
23 |
24 | 25 | {% include 'summary-card.html' %} 26 | 27 | {% include "errors.html" %} 28 | 29 | {% include 'issues.html' %} 30 | 31 | {% include "files-scanned-dialog.html" %} 32 | 33 | {% include 'footer.html' %} 34 | 35 | {% include "scripts.html" %} 36 | -------------------------------------------------------------------------------- /nbdefense/templates/scripts.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nbdefense/templates/summary-card.html: -------------------------------------------------------------------------------- 1 |
3 |
4 |

Files

5 | {{(artifacts_scanned["requirements"]|length) + 6 | (artifacts_scanned["notebooks"]|length)}} 7 |
8 |
9 |

Checks ran

10 | {{total_checks_run}} 11 |
12 |
13 |

Scanned at

14 | {{date}} · {{time}} 15 |
16 |
-------------------------------------------------------------------------------- /nbdefense/templating.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module loads Jinja2 templating engine. 3 | """ 4 | from pathlib import Path 5 | from typing import Any, Optional, Union 6 | 7 | from jinja2 import Environment, FileSystemLoader, PackageLoader, Template 8 | 9 | 10 | class Jinja2Templates: 11 | def __init__( 12 | self, directory: Optional[Union[str, Path]] = None, **env_options: Any 13 | ) -> None: 14 | """ 15 | Initialize templating environment. 16 | """ 17 | self.env = self._create_env(directory, **env_options) 18 | 19 | def _create_env( 20 | self, directory: Optional[Union[str, Path]] = None, **env_options: Any 21 | ) -> Environment: 22 | loader: Union[PackageLoader, FileSystemLoader] = PackageLoader( 23 | "nbdefense", "templates" 24 | ) 25 | if directory: 26 | directory = Path(directory) 27 | if directory.exists(): 28 | loader = FileSystemLoader(directory) 29 | 30 | env_options.setdefault("loader", loader) 31 | env_options.setdefault("autoescape", True) 32 | 33 | env = Environment(**env_options) # nosec 34 | return env 35 | 36 | def get_template(self, name: str) -> Template: 37 | """ 38 | Returns a template loaded via templating directory 39 | 40 | :param name: Name of the template e.g. report.html 41 | """ 42 | return self.env.get_template(name) 43 | -------------------------------------------------------------------------------- /nbdefense/tools.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements the tools to scan notebooks and repositories 3 | """ 4 | import abc 5 | import os 6 | from subprocess import PIPE, Popen, run # nosec 7 | from typing import Any, List, Optional, Tuple, Union 8 | 9 | import click 10 | 11 | 12 | class Tool(metaclass=abc.ABCMeta): 13 | """ 14 | A base class for different type of tools used in the scanning. 15 | """ 16 | 17 | def __init__(self) -> None: 18 | pass 19 | 20 | @abc.abstractmethod 21 | def execute( 22 | self, args: Optional[Union[str, List[Any]]] = None 23 | ) -> Tuple[Union[int, Any], Optional[List[bytes]], Optional[List[bytes]]]: 24 | raise NotImplementedError 25 | 26 | @abc.abstractmethod 27 | def __call__( 28 | self, 29 | args: Optional[Union[str, List[Any]]] = None, 30 | ignore_output: Optional[bool] = True, 31 | ) -> Any: 32 | raise NotImplementedError 33 | 34 | 35 | class CLITool(Tool): 36 | """ 37 | Executes shell commands for running scans using 3rd party tools. 38 | """ 39 | 40 | def __init__(self) -> None: 41 | self.basecmd = ["echo"] 42 | 43 | def execute( 44 | self, args: Optional[Union[str, List[Any]]] = None 45 | ) -> Tuple[Union[int, Any], Optional[List[bytes]], Optional[List[bytes]]]: 46 | cmd = self.basecmd 47 | if args: 48 | if isinstance(args, List): 49 | cmd = self.basecmd + args 50 | else: 51 | cmd = self.basecmd + [args] 52 | 53 | p = Popen(cmd, stdout=PIPE, stderr=PIPE) # nosec 54 | 55 | stdout_lines = p.stdout.readlines() if p.stdout else None 56 | stderr_lines = p.stderr.readlines() if p.stderr else None 57 | 58 | return p.returncode, stdout_lines, stderr_lines 59 | 60 | def __call__( 61 | self, 62 | args: Optional[Union[str, List[Any]]] = None, 63 | ignore_output: Optional[bool] = True, 64 | ) -> Any: 65 | return self.execute(args) 66 | 67 | 68 | class Trivy(CLITool): 69 | def __init__(self, initialTrivyBinaryPath: str = "") -> None: 70 | super().__init__() 71 | self.installPath = os.path.split(__file__)[0] 72 | self.trivyPath = ( 73 | initialTrivyBinaryPath 74 | if initialTrivyBinaryPath 75 | else os.path.join(self.installPath, "trivy") 76 | ) 77 | self.basecmd = [self.trivyPath, "fs", "--security-checks", "vuln", "-f", "json"] 78 | 79 | def installed(self) -> bool: 80 | return os.path.isfile(self.trivyPath) 81 | 82 | def install(self) -> None: 83 | click.echo(f"Installing trivy to {self.trivyPath}") 84 | p1 = Popen( 85 | [ 86 | "curl", 87 | "-sfL", 88 | "https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh", 89 | ], 90 | stdout=PIPE, 91 | ) # nosec 92 | p2 = run( 93 | ["sh", "-s", "--", "-b", self.installPath, "v0.32.1"], 94 | stdin=p1.stdout, 95 | stdout=PIPE, 96 | ) # nosec 97 | 98 | 99 | class DetectSecrets(CLITool): 100 | def __init__(self) -> None: 101 | super().__init__() 102 | self.basecmd = ["detect-secrets", "scan"] 103 | -------------------------------------------------------------------------------- /nbdefense/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import re 4 | import socketserver 5 | import threading 6 | import time 7 | import webbrowser 8 | from datetime import datetime 9 | from http.server import SimpleHTTPRequestHandler 10 | from pathlib import Path 11 | from typing import Any, Optional 12 | 13 | import click 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | def serve_report_and_launch_url(report: str) -> None: 19 | """ 20 | Create a HTTP server at a random port that servers the html passed as string 21 | 22 | :param report: The string that will be send back as response 23 | 24 | :return: TCPServer that can be launched with serve_forever() 25 | """ 26 | encoded_report = report.encode("utf-8") 27 | 28 | class SimpleHTTPReportServer(SimpleHTTPRequestHandler): 29 | def do_GET(self) -> None: 30 | self.send_response(200) 31 | self.send_header("Content-type", "text/html") 32 | self.end_headers() 33 | 34 | self.wfile.write(encoded_report) 35 | 36 | handler = SimpleHTTPReportServer 37 | server = socketserver.TCPServer(("localhost", 0), handler) 38 | port = server.server_address[1] 39 | url = f"http://127.0.0.1:{port}/" 40 | click.echo(f"Report can be viewed at {url}") 41 | 42 | def start_server(server: Any) -> None: 43 | with server: 44 | server.serve_forever() 45 | 46 | server_thread = threading.Thread(target=start_server, args=(server,)) 47 | server_thread.setDaemon(True) 48 | server_thread.start() 49 | 50 | webbrowser.open_new(url) 51 | 52 | should_run = True 53 | 54 | while should_run: 55 | try: 56 | time.sleep(1) 57 | except KeyboardInterrupt: 58 | should_run = False 59 | 60 | 61 | def write_output_file(report: str, output_format: str, output_file: str) -> Path: 62 | if output_file: 63 | output_file_path = Path(output_file) 64 | if output_file_path.parent and not output_file_path.parent.exists(): 65 | os.makedirs(output_file_path.parent, exist_ok=True) 66 | else: 67 | # Pick file extension 68 | output_extension = None 69 | if output_format == "html": 70 | output_extension = ".html" 71 | elif output_format == "json": 72 | output_extension = ".json" 73 | else: 74 | raise NotImplementedError 75 | 76 | # Create file path 77 | now = datetime.now() 78 | date_time_str = now.strftime("%m%d-%H%M") 79 | file_name = "nbdefense" + date_time_str + output_extension 80 | output_file_path = Path(os.getcwd()) / file_name 81 | 82 | with open(output_file_path, "w", encoding="utf-8") as f: 83 | click.echo(report, file=f) 84 | 85 | return output_file_path 86 | 87 | 88 | def scrub_html(code_snippet: Optional[str]) -> str: 89 | if code_snippet: 90 | allowed_html_tags = ["table", "thead", "tbody", "tr", "th", "td"] 91 | 92 | new_code_snippet = code_snippet 93 | new_code_snippet = re.sub("<", "<", new_code_snippet) 94 | new_code_snippet = re.sub(">", ">", new_code_snippet) 95 | 96 | for allowed_html_tag in allowed_html_tags: 97 | # Tags without style information converted back to html tags 98 | new_code_snippet = re.sub( 99 | f"<{allowed_html_tag}>", f"<{allowed_html_tag}>", new_code_snippet 100 | ) 101 | # Tags with style information converted back to html tags 102 | new_code_snippet = re.sub( 103 | f"<{allowed_html_tag}(.*)>", 104 | f"<{allowed_html_tag}\g<1>>", 105 | new_code_snippet, 106 | ) 107 | new_code_snippet = re.sub( 108 | f"</{allowed_html_tag}>", 109 | f"", 110 | new_code_snippet, 111 | ) 112 | 113 | return new_code_snippet 114 | else: 115 | return "" 116 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "nbdefense" 3 | description = "NB Defense CLI and SDK" 4 | version = "0.0.0" 5 | authors = ["ProtectAI "] 6 | readme = "README.md" 7 | packages = [{ include = "nbdefense" }] 8 | license = "Apache License 2.0" 9 | exclude = ["tests/*", "Makefile"] 10 | 11 | [tool.poetry.scripts] 12 | nbdefense = "nbdefense.cli:cli" 13 | 14 | [tool.poetry.dependencies] 15 | python = ">=3.8,<3.11" 16 | click = "^8.1.3" 17 | requirements-parser = "0.5.0" 18 | detect-secrets = "1.4.0" 19 | nbformat = "5.6.1" 20 | presidio_analyzer = "2.2.29" 21 | nbconvert = "7.0.0" 22 | jinja2 = "^3.1.2" 23 | requests = "2.32.2" 24 | tqdm = "4.66.3" 25 | rich = "12.6.0" 26 | pandas = "^1.3.5" 27 | tomlkit = "0.11.6" 28 | 29 | [tool.poetry.group.test.dependencies] 30 | pytest = "7.2.0" 31 | pytest-watch = "4.2.0" 32 | responses = "0.22.0" 33 | bandit = { version = "1.7.5", extras = ["toml"] } 34 | mypy = "1.1.1" 35 | types-requests = "2.28.11.15" 36 | types-tqdm = "4.65.0.0" 37 | types-setuptools = "67.6.0.5" 38 | 39 | [tool.poetry.group.dev.dependencies] 40 | black = "24.3.0" 41 | pre-commit = "2.20.0" 42 | dunamai = "1.13.2" 43 | en_core_web_trf = [ 44 | { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.4.1/en_core_web_trf-3.4.1-py3-none-any.whl" }, 45 | ] 46 | 47 | [tool.poetry.group.prod.dependencies] 48 | dunamai = "1.13.2" 49 | 50 | [tool.poetry-dynamic-versioning] 51 | enable = true 52 | 53 | [build-system] 54 | requires = ["poetry-core", "poetry-dynamic-versioning"] 55 | build-backend = "poetry.core.masonry.api" 56 | 57 | [tool.bandit] 58 | exclude_dirs = ["tests"] 59 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protectai/nbdefense/f836d3702636b715058e6e80f53165414ee159ff/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from tests.mock_notebooks.fixtures import ( 2 | dataframe, 3 | dataframe_with_secrets, 4 | dataframe_with_custom_index, 5 | raw_notebook_cells, 6 | mock_notebook_as_json, 7 | mock_notebook, 8 | ) 9 | 10 | from tests.plugin_tests.licenses.fixtures import ( 11 | mock_license_cache, 12 | mock_fetch_licenses_from_dist_info_path, 13 | mock_fetch_license_data_from_pypi, 14 | create_license_requirements_file_path, 15 | ) 16 | 17 | from tests.plugin_tests.cve.fixtures import ( 18 | mock_third_party_dependencies, 19 | install_trivy, 20 | create_cve_requirements_file_path, 21 | ) 22 | -------------------------------------------------------------------------------- /tests/default_settings.py: -------------------------------------------------------------------------------- 1 | DEFAULT_SETTINGS = { 2 | "redact_secret": "PARTIAL", 3 | "trivy_binary_path": "", 4 | "plugins": { 5 | "nbdefense.plugins.SecretsPlugin": { 6 | "enabled": True, 7 | "secrets_plugins": [ 8 | {"name": "SoftlayerDetector"}, 9 | {"name": "StripeDetector"}, 10 | {"name": "SendGridDetector"}, 11 | {"name": "NpmDetector"}, 12 | {"name": "KeywordDetector", "keyword_exclude": ""}, 13 | {"name": "IbmCosHmacDetector"}, 14 | {"name": "DiscordBotTokenDetector"}, 15 | {"name": "BasicAuthDetector"}, 16 | {"name": "AzureStorageKeyDetector"}, 17 | {"name": "ArtifactoryDetector"}, 18 | {"name": "AWSKeyDetector"}, 19 | {"name": "CloudantDetector"}, 20 | {"name": "GitHubTokenDetector"}, 21 | {"name": "IbmCloudIamDetector"}, 22 | {"name": "JwtTokenDetector"}, 23 | {"name": "MailchimpDetector"}, 24 | {"name": "PrivateKeyDetector"}, 25 | {"name": "SlackDetector"}, 26 | {"name": "SquareOAuthDetector"}, 27 | {"name": "TwilioKeyDetector"}, 28 | {"name": "Base64HighEntropyString", "limit": 4.5}, 29 | {"name": "HexHighEntropyString", "limit": 3.0}, 30 | ], 31 | }, 32 | "nbdefense.plugins.PIIPlugin": { 33 | "enabled": True, 34 | "confidence_threshold": 0.8, 35 | "entities": { 36 | "US_PASSPORT": True, 37 | "AU_MEDICARE": True, 38 | "AU_TFN": True, 39 | "AU_ACN": True, 40 | "AU_ABN": True, 41 | "UK_NHS": True, 42 | "US_SSN": True, 43 | "US_ITIN": True, 44 | "US_DRIVER_LICENSE": True, 45 | "US_BANK_NUMBER": True, 46 | "MEDICAL_LICENSE": True, 47 | "LOCATION": True, 48 | "PHONE_NUMBER": True, 49 | "NRP": True, 50 | "IP_ADDRESS": True, 51 | "EMAIL_ADDRESS": True, 52 | "IBAN_CODE": True, 53 | "CRYPTO": True, 54 | "CREDIT_CARD": True, 55 | "PERSON": True, 56 | }, 57 | }, 58 | "nbdefense.plugins.LicenseDependencyFilePlugin": { 59 | "enabled": True, 60 | "accepted_licenses": [ 61 | "Apache License 2.0", 62 | "Apache Software License", 63 | "Apache 2.0", 64 | "Apache-2.0", 65 | "BSD", 66 | "BSD License", 67 | "BSD 3-Clause", 68 | "BSD-3-Clause", 69 | "GNU Library or Lesser General Public License (LGPL)", 70 | "Microsoft Public License", 71 | "MIT", 72 | "MIT License", 73 | "Python Software Foundation License", 74 | "ISC License (ISCL)", 75 | "MIT-0", 76 | ], 77 | }, 78 | "nbdefense.plugins.CVEDependencyFilePlugin": {"enabled": True}, 79 | "nbdefense.plugins.LicenseNotebookPlugin": { 80 | "enabled": True, 81 | "accepted_licenses": [ 82 | "Apache License 2.0", 83 | "Apache Software License", 84 | "Apache 2.0", 85 | "Apache-2.0", 86 | "BSD", 87 | "BSD License", 88 | "BSD 3-Clause", 89 | "BSD-3-Clause", 90 | "GNU Library or Lesser General Public License (LGPL)", 91 | "Microsoft Public License", 92 | "MIT", 93 | "MIT License", 94 | "Python Software Foundation License", 95 | "ISC License (ISCL)", 96 | "MIT-0", 97 | ], 98 | "licenses_for_notebooks_source": "HYBRID", 99 | }, 100 | "nbdefense.plugins.CVENotebookPlugin": {"enabled": True}, 101 | }, 102 | } 103 | -------------------------------------------------------------------------------- /tests/mock_notebooks/mock_notebook.py: -------------------------------------------------------------------------------- 1 | """ 2 | Creates a mock v4 notebook using nbformat 3 | """ 4 | from enum import Enum 5 | from typing import List, Optional 6 | 7 | import pandas as pd 8 | from nbformat import NotebookNode 9 | from nbformat.v4 import new_code_cell, new_markdown_cell, new_notebook, new_output 10 | 11 | 12 | class MockCellType(Enum): 13 | SOURCE = 1 14 | MARKDOWN = 2 15 | 16 | 17 | class MockNotebook: 18 | def __init__(self) -> None: 19 | self.nb = new_notebook(cells=[]) # type: ignore[no-untyped-call] 20 | self.current_cell_index = -1 21 | 22 | def add_cell( 23 | self, 24 | cell_type: MockCellType, 25 | source: List[str], 26 | stream_output: Optional[List[str]] = None, 27 | data_frame: Optional[pd.DataFrame] = None, 28 | html_output: Optional[List[str]] = None, 29 | plaintext_output: Optional[List[str]] = None, 30 | ) -> None: 31 | """ 32 | Add a cell to the mock notebook. 33 | The outputs are add to source cell only. if cell type is markdown all other outputs are ignored. 34 | The function will add all the outputs to the cell in the order of function arguments. 35 | DataFrames are added as both html and plain-text format. 36 | 37 | :param cell_type: Type of the cell to add. 38 | :param source: Source or markdown string(s) 39 | :param stream_output: 40 | :param data_frame: Dataframe is converted to html using df.to_html() 41 | :param html_output: 42 | :param plaintext_output: 43 | """ 44 | joined_source = "\n".join(source) 45 | 46 | joined_stream_output: Optional[str] = None 47 | if stream_output: 48 | joined_stream_output = "\n".join(stream_output) 49 | 50 | joined_html_output: Optional[str] = None 51 | if html_output: 52 | joined_html_output = "\n".join(html_output) 53 | 54 | joined_plaintext_output: Optional[str] = None 55 | if plaintext_output: 56 | joined_plaintext_output = "\n".join(plaintext_output) 57 | 58 | if cell_type == MockCellType.MARKDOWN: 59 | self.nb.cells.append(new_markdown_cell(joined_source)) # type: ignore[no-untyped-call] 60 | return 61 | 62 | self.nb.cells.append(new_code_cell(source=joined_source)) # type: ignore[no-untyped-call] 63 | if joined_stream_output: 64 | self.nb.cells[-1].outputs.append( 65 | new_output(output_type="stream", text=joined_stream_output) # type: ignore[no-untyped-call] 66 | ) 67 | 68 | if data_frame is not None: 69 | self.nb.cells[-1].outputs.append( 70 | new_output( 71 | output_type="execute_result", 72 | data={ 73 | "text/html": data_frame.to_html(), 74 | "text/plain": str(data_frame), 75 | }, 76 | ) # type: ignore[no-untyped-call] 77 | ) 78 | 79 | if joined_html_output: 80 | self.nb.cells[-1].outputs.append( 81 | new_output( 82 | output_type="execute_result", data={"text/html": joined_html_output} 83 | ) # type: ignore[no-untyped-call] 84 | ) 85 | 86 | if joined_plaintext_output: 87 | self.nb.cells[-1].outputs.append( 88 | new_output( 89 | output_type="execute_result", 90 | data={"text/plain": joined_plaintext_output}, 91 | ) # type: ignore[no-untyped-call] 92 | ) 93 | 94 | @property 95 | def notebook_node(self) -> NotebookNode: 96 | return self.nb # type: ignore[no-any-return] 97 | -------------------------------------------------------------------------------- /tests/plugin_tests/cve/fixtures.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from typing import Any 4 | 5 | import pytest 6 | 7 | from nbdefense.dependencies import DependencyInfo, ThirdPartyDependencies 8 | from nbdefense.plugins.cve.cve_plugin import CVEPlugin 9 | 10 | 11 | @pytest.fixture(scope="session") 12 | def create_cve_requirements_file_path() -> Path: 13 | current_directory = Path(os.path.dirname(os.path.realpath(__file__))) 14 | requirements_directory = current_directory / "mock_files" 15 | requirements_file_path = requirements_directory / "requirements.txt" 16 | with open(requirements_file_path, "w") as f: 17 | f.writelines( 18 | ["pytorch-lightning==1.5.10\n", "pandas==1.5.0\n", "matplotlibXtns==20.5\n"] 19 | ) 20 | return requirements_file_path 21 | 22 | 23 | @pytest.fixture 24 | def mock_third_party_dependencies(monkeypatch: Any) -> None: 25 | dependencies = [ 26 | DependencyInfo( 27 | name="pytorch-lightning", 28 | version="1.5.10", 29 | dist_info_path=Path("none"), 30 | dist_info_name="none", 31 | ), 32 | DependencyInfo( 33 | name="pandas", 34 | version="1.5.0", 35 | dist_info_path=Path("none"), 36 | dist_info_name="none", 37 | ), 38 | DependencyInfo( 39 | name="matplotlibXtns", 40 | version="20.5", 41 | dist_info_path=Path("none"), 42 | dist_info_name="none", 43 | ), 44 | ] 45 | 46 | def mock_parse_dependencies_available_in_env(self: Any) -> None: 47 | self._dependencies_in_env = dependencies 48 | 49 | def mock_parse_modules_from_dependencies_available(self: Any) -> None: 50 | self._dependencies_by_module["pytorch_lightning"] = dependencies[0] 51 | self._dependencies_by_module["pandas"] = dependencies[1] 52 | self._dependencies_by_module["matplotlibXtns"] = dependencies[2] 53 | 54 | monkeypatch.setattr( 55 | ThirdPartyDependencies, 56 | "_parse_dependencies_available_in_env", 57 | mock_parse_dependencies_available_in_env, 58 | ) 59 | monkeypatch.setattr( 60 | ThirdPartyDependencies, 61 | "_parse_modules_from_dependencies_available", 62 | mock_parse_modules_from_dependencies_available, 63 | ) 64 | 65 | 66 | @pytest.fixture 67 | def install_trivy() -> None: 68 | CVEPlugin.handle_binary_dependencies(True, True) 69 | -------------------------------------------------------------------------------- /tests/plugin_tests/cve/mock_files/test-cve.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "id": "0d7a3ee3-2905-4755-b369-44144e9fa233", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "output\n" 14 | ] 15 | } 16 | ], 17 | "source": [ 18 | "import pandas\n", 19 | "import matplotlibXtns\n", 20 | "client_secret = \"e2a6dda0953954cc1bf299b71e2d1befdd22709a\"\n", 21 | "print(\"output\")" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 3, 27 | "id": "26e1650c", 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "data": { 32 | "text/plain": [ 33 | "'e2a6dda0953954cc1bf299b71e2d1befdd22709a'" 34 | ] 35 | }, 36 | "execution_count": 3, 37 | "metadata": {}, 38 | "output_type": "execute_result" 39 | } 40 | ], 41 | "source": [ 42 | "import pytorch_lightning as lightning\n", 43 | "client_secret" 44 | ] 45 | } 46 | ], 47 | "metadata": { 48 | "kernelspec": { 49 | "display_name": "Python 3.8.13 ('nbdefense')", 50 | "language": "python", 51 | "name": "python3" 52 | }, 53 | "language_info": { 54 | "codemirror_mode": { 55 | "name": "ipython", 56 | "version": 3 57 | }, 58 | "file_extension": ".py", 59 | "mimetype": "text/x-python", 60 | "name": "python", 61 | "nbconvert_exporter": "python", 62 | "pygments_lexer": "ipython3", 63 | "version": "3.8.13" 64 | }, 65 | "vscode": { 66 | "interpreter": { 67 | "hash": "7e2becf3a3002680eb4d2fb30f5037c726550c6b7f1911bce4ec05c40b2fbaa9" 68 | } 69 | } 70 | }, 71 | "nbformat": 4, 72 | "nbformat_minor": 5 73 | } 74 | -------------------------------------------------------------------------------- /tests/plugin_tests/cve/test_cve_dependency_file_plugin.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Any, List 3 | 4 | import pytest 5 | 6 | from nbdefense.codebase import Codebase 7 | from nbdefense.plugins.cve.cve_dependency_file_plugin import CVEDependencyFilePlugin 8 | from nbdefense.constants import DEFAULT_SETTINGS 9 | 10 | 11 | class TestCVEDependencyFilePlugin: 12 | @pytest.fixture 13 | def codebase( 14 | self, tmp_path: Path, create_cve_requirements_file_path: Path 15 | ) -> Codebase: 16 | return Codebase( 17 | Path("./tests/plugin_tests/cve/mock_files"), 18 | False, 19 | True, 20 | False, 21 | tmp_path, 22 | requirements_file=create_cve_requirements_file_path, 23 | ) 24 | 25 | def test_scan( 26 | self, 27 | install_trivy: None, 28 | codebase: Codebase, 29 | create_cve_requirements_file_path: Path, 30 | ) -> None: 31 | CVEDependencyFilePlugin.scan( 32 | codebase, 33 | CVEDependencyFilePlugin.get_settings( 34 | "nbdefense.plugins.CVEDependencyFilePlugin", True, DEFAULT_SETTINGS 35 | ), 36 | ) 37 | self.verify_scan_results(codebase, create_cve_requirements_file_path) 38 | 39 | @staticmethod 40 | def verify_scan_results(codebase: Codebase, requiremetns_file_path: Path) -> None: 41 | issues = codebase.issues.to_json() 42 | assert len(issues) == 2 43 | 44 | cve = "CVE-2022-0845" 45 | cve_filter = lambda issue: issue["details"]["results"]["VulnerabilityID"] == cve 46 | cve_list: List[Any] = list(filter(cve_filter, issues)) 47 | assert len(cve_list) == 1 48 | cve1: Any = cve_list[0] 49 | assert cve1["code"] == "VULNERABLE_DEPENDENCY_DEP_FILE" 50 | assert cve1["severity"] == "CRITICAL" 51 | assert cve1["details"]["file_path"] == str(requiremetns_file_path) 52 | assert cve1["details"]["results"]["PkgName"] == "pytorch-lightning" 53 | assert cve1["details"]["results"]["InstalledVersion"] == "1.5.10" 54 | assert cve1["details"]["results"]["FixedVersion"] == "1.6.0" 55 | 56 | cve = "CVE-2021-4118" 57 | cve2: Any = list(filter(cve_filter, issues)) 58 | assert len(cve2) == 1 59 | cve2 = cve2[0] 60 | assert cve2["code"] == "VULNERABLE_DEPENDENCY_DEP_FILE" 61 | assert cve2["severity"] == "HIGH" 62 | assert cve2["details"]["file_path"] == str(requiremetns_file_path) 63 | assert cve2["details"]["results"]["PkgName"] == "pytorch-lightning" 64 | assert cve2["details"]["results"]["InstalledVersion"] == "1.5.10" 65 | assert cve2["details"]["results"]["FixedVersion"] == "1.6.0" 66 | -------------------------------------------------------------------------------- /tests/plugin_tests/cve/test_cve_notebook_plugin.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Any, List 3 | 4 | import pytest 5 | 6 | from nbdefense.codebase import Codebase 7 | from nbdefense.plugins.cve.cve_notebooks_plugin import CVENotebookPlugin 8 | from nbdefense.constants import DEFAULT_SETTINGS 9 | 10 | 11 | class TestCVENotebookPlugin: 12 | @pytest.fixture 13 | def codebase(self, mock_third_party_dependencies: None, tmp_path: Path) -> Codebase: 14 | return Codebase( 15 | Path("./tests/plugin_tests/cve/mock_files/test-cve.ipynb"), 16 | False, 17 | True, 18 | False, 19 | tmp_path, 20 | site_packages_path=Path("none"), 21 | ) 22 | 23 | def test_scan( 24 | self, 25 | install_trivy: None, 26 | codebase: Codebase, 27 | ) -> None: 28 | CVENotebookPlugin.scan( 29 | codebase, 30 | CVENotebookPlugin.get_settings( 31 | "nbdefense.plugins.CVENotebookPlugin", True, DEFAULT_SETTINGS 32 | ), 33 | ) 34 | self.verify_scan_results(codebase) 35 | 36 | @staticmethod 37 | def verify_scan_results(codebase: Codebase) -> None: 38 | notebooks = list(codebase.notebooks()) 39 | assert len(notebooks) == 1 40 | issues = notebooks[0].issues.to_json() 41 | assert len(issues) == 2 42 | 43 | cve = "CVE-2022-0845" 44 | cve_filter = lambda issue: issue["details"]["summary_field"]["CVE_ID"] == cve 45 | cve_list: List[Any] = list(filter(cve_filter, issues)) 46 | assert len(cve_list) == 1 47 | cve1: Any = cve_list[0] 48 | assert cve1["code"] == "VULNERABLE_DEPENDENCY_IMPORT" 49 | assert cve1["severity"] == "CRITICAL" 50 | assert cve1["cell"]["cell_index"] == 1 51 | assert cve1["line_index"] == 0 52 | assert cve1["location"] == "INPUT" 53 | assert cve1["character_start_index"] == 7 54 | assert cve1["character_end_index"] == 24 55 | assert ( 56 | cve1["details"]["summary_field"]["INSTALLED_PACKAGE"] == "pytorch-lightning" 57 | ) 58 | assert cve1["details"]["summary_field"]["INSTALLED_VERSION"] == "1.5.10" 59 | assert cve1["details"]["summary_field"]["FIXED_VERSION"] == "1.6.0" 60 | 61 | cve = "CVE-2021-4118" 62 | cve2: Any = list(filter(cve_filter, issues)) 63 | assert len(cve2) == 1 64 | cve2 = cve2[0] 65 | assert cve2["code"] == "VULNERABLE_DEPENDENCY_IMPORT" 66 | assert cve2["severity"] == "HIGH" 67 | assert cve1["cell"]["cell_index"] == 1 68 | assert cve1["line_index"] == 0 69 | assert cve1["location"] == "INPUT" 70 | assert cve1["character_start_index"] == 7 71 | assert cve1["character_end_index"] == 24 72 | assert ( 73 | cve2["details"]["summary_field"]["INSTALLED_PACKAGE"] == "pytorch-lightning" 74 | ) 75 | assert cve2["details"]["summary_field"]["INSTALLED_VERSION"] == "1.5.10" 76 | assert cve2["details"]["summary_field"]["FIXED_VERSION"] == "1.6.0" 77 | -------------------------------------------------------------------------------- /tests/plugin_tests/licenses/fixtures.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from typing import Any, Dict, List 4 | 5 | import pytest 6 | import requests 7 | 8 | from nbdefense.codebase import PackageInfo 9 | from nbdefense.plugins.licenses.license_plugin import LicenseCache, LicensePlugin 10 | 11 | 12 | @pytest.fixture(scope="session") 13 | def create_license_requirements_file_path() -> Path: 14 | current_directory = Path(os.path.dirname(os.path.realpath(__file__))) 15 | requirements_directory = current_directory / "mock_files" 16 | requirements_file_path = requirements_directory / "requirements.txt" 17 | with open(requirements_file_path, "w") as f: 18 | f.writelines( 19 | [ 20 | "test_package_1==23.2.1\n", 21 | "test_package_2==2.28.1\n", 22 | "test_package_3==1.8.0\n", 23 | "test_package_4==1.16.0", 24 | ] 25 | ) 26 | return requirements_file_path 27 | 28 | 29 | @pytest.fixture 30 | def mock_license_cache(monkeypatch: Any) -> None: 31 | def mock_init(self: Any, temp_directory: Path, filename: str) -> None: 32 | self._cache = {} 33 | self._cache_updated = False 34 | 35 | def mock_del(self: Any) -> None: 36 | pass 37 | 38 | monkeypatch.setattr(LicenseCache, "__init__", mock_init) 39 | monkeypatch.setattr(LicenseCache, "__del__", mock_del) 40 | 41 | 42 | @pytest.fixture 43 | def mock_fetch_licenses_from_dist_info_path(monkeypatch: Any) -> Dict[str, List[str]]: 44 | classifiers: List[str] = [] 45 | license_metadata: List[str] = [] 46 | package_names: List[str] = [] 47 | 48 | def mock_parse_license_from_dist_info_path(package: PackageInfo) -> Any: 49 | if not package_names or package.name in package_names: 50 | if classifiers: 51 | return classifiers 52 | else: 53 | return license_metadata 54 | return [] 55 | 56 | monkeypatch.setattr( 57 | LicensePlugin, 58 | "_parse_license_data_from_dist_info_path", 59 | mock_parse_license_from_dist_info_path, 60 | ) 61 | return { 62 | "classifiers": classifiers, 63 | "license_metadata": license_metadata, 64 | "package_names": package_names, 65 | } 66 | 67 | 68 | @pytest.fixture 69 | def mock_fetch_license_data_from_pypi(monkeypatch: Any) -> Dict[str, List[str]]: 70 | licenses: List[str] = [] 71 | package_names: List[str] = [] 72 | 73 | def mock_function_response( 74 | _: requests.Session, package: PackageInfo 75 | ) -> PackageInfo: 76 | if not package_names or package.name in package_names: 77 | package.licenses = licenses 78 | else: 79 | package.licenses = [] 80 | return package 81 | 82 | monkeypatch.setattr( 83 | LicensePlugin, 84 | "_fetch_license_data_from_pypi", 85 | mock_function_response, 86 | ) 87 | return { 88 | "licenses": licenses, 89 | "package_names": package_names, 90 | } 91 | -------------------------------------------------------------------------------- /tests/plugin_tests/licenses/mock_files/test-license.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "id": "0d7a3ee3-2905-4755-b369-44144e9fa233", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "output\n" 14 | ] 15 | } 16 | ], 17 | "source": [ 18 | "import pandas\n", 19 | "import matplotlibXtns\n", 20 | "client_secret = \"e2a6dda0953954cc1bf299b71e2d1befdd22709a\"\n", 21 | "print(\"output\")" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 3, 27 | "id": "26e1650c", 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "data": { 32 | "text/plain": [ 33 | "'e2a6dda0953954cc1bf299b71e2d1befdd22709a'" 34 | ] 35 | }, 36 | "execution_count": 3, 37 | "metadata": {}, 38 | "output_type": "execute_result" 39 | } 40 | ], 41 | "source": [ 42 | "import pytorch_lightning as lightning\n", 43 | "client_secret" 44 | ] 45 | } 46 | ], 47 | "metadata": { 48 | "kernelspec": { 49 | "display_name": "Python 3.8.13 ('nbdefense')", 50 | "language": "python", 51 | "name": "python3" 52 | }, 53 | "language_info": { 54 | "codemirror_mode": { 55 | "name": "ipython", 56 | "version": 3 57 | }, 58 | "file_extension": ".py", 59 | "mimetype": "text/x-python", 60 | "name": "python", 61 | "nbconvert_exporter": "python", 62 | "pygments_lexer": "ipython3", 63 | "version": "3.8.13" 64 | }, 65 | "vscode": { 66 | "interpreter": { 67 | "hash": "7e2becf3a3002680eb4d2fb30f5037c726550c6b7f1911bce4ec05c40b2fbaa9" 68 | } 69 | } 70 | }, 71 | "nbformat": 4, 72 | "nbformat_minor": 5 73 | } 74 | -------------------------------------------------------------------------------- /tests/plugin_tests/licenses/test_license_dependency_file_plugin.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Dict, List 3 | 4 | import pytest 5 | 6 | from nbdefense.codebase import Codebase 7 | from nbdefense.plugins.licenses.licenses_dependency_file_plugin import ( 8 | LicenseDependencyFilePlugin, 9 | ) 10 | from tests.default_settings import DEFAULT_SETTINGS 11 | 12 | 13 | class TestLicenseDependencyFilePlugin: 14 | @pytest.fixture 15 | def codebase( 16 | self, tmp_path: Path, create_license_requirements_file_path: Path 17 | ) -> Codebase: 18 | print(create_license_requirements_file_path) 19 | return Codebase( 20 | Path("./tests/plugin_tests/licenses/mock_files"), 21 | False, 22 | True, 23 | False, 24 | tmp_path, 25 | requirements_file=create_license_requirements_file_path, 26 | ) 27 | 28 | def test_scan( 29 | self, 30 | mock_license_cache: None, 31 | mock_fetch_licenses_from_dist_info_path: Dict[str, List[str]], 32 | mock_fetch_license_data_from_pypi: Dict[str, List[str]], 33 | codebase: Codebase, 34 | create_license_requirements_file_path: Path, 35 | ) -> None: 36 | unapproved_licenses = ["BAD_LICENSE_1", "BAD_LICENSE_2"] 37 | mock_fetch_license_data_from_pypi["licenses"].append(unapproved_licenses[0]) 38 | mock_fetch_license_data_from_pypi["licenses"].append(unapproved_licenses[1]) 39 | LicenseDependencyFilePlugin.scan( 40 | codebase, 41 | LicenseDependencyFilePlugin.get_settings( 42 | plugin_class_name="nbdefense.plugins.LicenseDependencyFilePlugin", 43 | settings=DEFAULT_SETTINGS, 44 | ), 45 | ) 46 | self.verify_scan_results(codebase, create_license_requirements_file_path) 47 | 48 | @staticmethod 49 | def verify_scan_results(codebase: Codebase, requirements_file_path: Path) -> None: 50 | issues = codebase.issues.to_json() 51 | assert len(issues) == 8 52 | for package_name_index in range(1, 5): 53 | package_name = f"test_package_{package_name_index}" 54 | package_version = None 55 | if package_name == "test_package_1": 56 | package_version = "23.2.1" 57 | elif package_name == "test_package_2": 58 | package_version = "2.28.1" 59 | elif package_name == "test_package_3": 60 | package_version = "1.8.0" 61 | elif package_name == "test_package_4": 62 | package_version = "1.16.0" 63 | 64 | issues_filter_by_package_name = list( 65 | filter( 66 | lambda issue: (issue["details"]["package_name"] == package_name), 67 | issues, 68 | ) 69 | ) 70 | assert len(issues_filter_by_package_name) == 2 71 | for package_index, _ in enumerate(issues_filter_by_package_name): 72 | assert { 73 | "code": "UNAPPROVED_LICENSE_DEP_FILE", 74 | "severity": "MEDIUM", 75 | "details": { 76 | "package_name": package_name, 77 | "package_version": package_version, 78 | "unapproved_license": f"BAD_LICENSE_{package_index+1}", 79 | "file_path": str(requirements_file_path), 80 | }, 81 | } in issues_filter_by_package_name 82 | -------------------------------------------------------------------------------- /tests/plugin_tests/licenses/test_license_plugin.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Dict, List 3 | 4 | from nbdefense.codebase import PackageInfo 5 | from nbdefense.plugins.licenses.license_plugin import LicensePlugin 6 | from nbdefense.plugins.licenses.license_plugin_settings import LicensePluginSource 7 | from tests.default_settings import DEFAULT_SETTINGS 8 | 9 | 10 | class TestLicensePlugin: 11 | def test_get_licenses_none_found( 12 | self, 13 | tmp_path: Path, 14 | mock_license_cache: None, 15 | mock_fetch_licenses_from_dist_info_path: Dict[str, List[str]], 16 | mock_fetch_license_data_from_pypi: Dict[str, List[str]], 17 | ) -> None: 18 | test_package = PackageInfo(name="none", version="none") 19 | 20 | installed_packages = LicensePlugin.get_licenses( 21 | [test_package], tmp_path, LicensePluginSource.HYBRID 22 | ) 23 | assert len(installed_packages) == 1 24 | assert installed_packages[0].name == test_package.name 25 | assert installed_packages[0].version == test_package.version 26 | assert not installed_packages[0].licenses 27 | 28 | def test_get_licenses_fall_back_to_api( 29 | self, 30 | tmp_path: Path, 31 | mock_license_cache: None, 32 | mock_fetch_licenses_from_dist_info_path: Dict[str, List[str]], 33 | mock_fetch_license_data_from_pypi: Dict[str, List[str]], 34 | ) -> None: 35 | package_info = PackageInfo(name="package_test_1", version="version") 36 | fallback_license = "Fallback To API License" 37 | mock_fetch_license_data_from_pypi["licenses"].append(fallback_license) 38 | installed_packages = LicensePlugin.get_licenses( 39 | [package_info], tmp_path, LicensePluginSource.HYBRID 40 | ) 41 | assert len(installed_packages) == 1 42 | assert installed_packages[0].name == package_info.name 43 | assert installed_packages[0].version == package_info.version 44 | assert installed_packages[0].licenses == [fallback_license] 45 | 46 | def test_filter_for_unapproved_licenses(self) -> None: 47 | licenses = LicensePlugin.filter_for_unapproved_licenses( 48 | ["MIT", "UNAPPROVED"], ["MIT"] 49 | ) 50 | assert len(licenses) == 1 51 | assert licenses[0] == "UNAPPROVED" 52 | 53 | def test_get_licenses_in_metadata( 54 | self, 55 | tmp_path: Path, 56 | mock_license_cache: None, 57 | mock_fetch_licenses_from_dist_info_path: Dict[str, List[str]], 58 | ) -> None: 59 | license = "GNU" 60 | package_info = PackageInfo(name="package_test_1", version="version") 61 | mock_fetch_licenses_from_dist_info_path["license_metadata"].append(license) 62 | installed_packages = LicensePlugin.get_licenses( 63 | [package_info], tmp_path, LicensePluginSource.HYBRID 64 | ) 65 | assert len(installed_packages) == 1 66 | assert installed_packages[0].name == package_info.name 67 | assert installed_packages[0].version == package_info.version 68 | assert installed_packages[0].licenses == [license] 69 | 70 | def test_process_for_unnapproved_licenses(self) -> None: 71 | settings = LicensePlugin.get_settings( 72 | plugin_class_name="nbdefense.plugins.LicenseNotebookPlugin", 73 | settings=DEFAULT_SETTINGS, 74 | ) 75 | unapproved_license = "APACHE_UNAPPROVED" 76 | unapproved_licenses = LicensePlugin.filter_for_unapproved_licenses( 77 | ["MIT", unapproved_license], settings.get_accepted_licenses() 78 | ) 79 | assert unapproved_licenses == [unapproved_license] 80 | 81 | def test_parse_licenses_from_classifiers_single_license(self) -> None: 82 | classifiers_list = [ 83 | "License :: OSI Approved", 84 | "License :: OSI Approved :: MIT", 85 | "Private :: Do Not Upload", 86 | ] 87 | licenses = LicensePlugin._parse_licenses_from_classifiers(classifiers_list) 88 | assert licenses == ["MIT"] 89 | 90 | def test_parse_licenses_from_classifiers_multiple_license(self) -> None: 91 | classifiers_list = [ 92 | "License :: OSI Approved :: MIT", 93 | "License :: OSI Approved :: Apache 2", 94 | ] 95 | licenses = LicensePlugin._parse_licenses_from_classifiers(classifiers_list) 96 | assert licenses == ["MIT", "Apache 2"] 97 | 98 | def test_parse_licenses_from_classifiers_no_license(self) -> None: 99 | no_license_classifier = ["Private :: Do Not Upload"] 100 | licenses = LicensePlugin._parse_licenses_from_classifiers(no_license_classifier) 101 | assert licenses == [] 102 | -------------------------------------------------------------------------------- /tests/test_codebase.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | import pytest 4 | 5 | from nbdefense.codebase import Codebase 6 | 7 | 8 | class TestCodebase: 9 | @pytest.fixture 10 | def path_to_single_notebook( 11 | self, tmp_path: pathlib.Path, mock_notebook_as_json: str 12 | ) -> pathlib.Path: 13 | tmp_file = tmp_path.joinpath("temp.ipynb") 14 | with open(tmp_file, "w") as f: 15 | f.write(mock_notebook_as_json) 16 | 17 | return tmp_file 18 | 19 | @pytest.fixture 20 | def path_with_multiple_notebooks( 21 | self, tmp_path: pathlib.Path, mock_notebook_as_json: str 22 | ) -> pathlib.Path: 23 | paths = ["temp.ipynb", "dir2/temp2.ipynb", "dir3/temp3.ipynb"] 24 | for path in paths: 25 | tmp_file = tmp_path.joinpath(path) 26 | if "/" in path: 27 | subdir_path = tmp_path.joinpath(path.split("/")[0]) 28 | subdir_path.mkdir(exist_ok=True) 29 | with open(tmp_file, "w") as f: 30 | f.write(mock_notebook_as_json) 31 | 32 | return tmp_path 33 | 34 | def test_codebase_single_notebook_file( 35 | self, path_to_single_notebook: pathlib.Path, tmp_path: pathlib.Path 36 | ) -> None: 37 | # TODO: Add requirements.txt path as well. 38 | c = Codebase( 39 | path=path_to_single_notebook, 40 | recursive=False, 41 | quiet=True, 42 | temp_directory=tmp_path, 43 | requirements_file=None, 44 | show_progress_bars=False, 45 | ) 46 | 47 | assert c.quiet == True 48 | assert len(list(c.notebooks())) == 1 49 | 50 | def test_codebase_multiple_notebook_path( 51 | self, path_with_multiple_notebooks: pathlib.Path, tmp_path: pathlib.Path 52 | ) -> None: 53 | c = Codebase( 54 | path=path_with_multiple_notebooks, 55 | recursive=False, 56 | quiet=True, 57 | temp_directory=tmp_path, 58 | requirements_file=None, 59 | show_progress_bars=False, 60 | ) 61 | 62 | assert len(list(c.notebooks())) == 1 63 | 64 | c = Codebase( 65 | path=path_with_multiple_notebooks, 66 | recursive=True, 67 | quiet=True, 68 | temp_directory=tmp_path, 69 | requirements_file=None, 70 | show_progress_bars=False, 71 | ) 72 | 73 | assert len(list(c.notebooks())) == 3 74 | -------------------------------------------------------------------------------- /tests/test_notebook.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | from typing import Dict, List, Optional 3 | 4 | import pandas as pd 5 | 6 | from nbdefense.notebook import ( 7 | Cell, 8 | InputCell, 9 | InputCellType, 10 | Notebook, 11 | OutputCell, 12 | OutputCellType, 13 | ) 14 | 15 | 16 | class TestNotebookParsing: 17 | def test_parsing( 18 | self, mock_notebook: Notebook, raw_notebook_cells: List[Dict[str, List[str]]] 19 | ) -> None: 20 | assert len(mock_notebook.input_cells) == len(raw_notebook_cells) 21 | assert ( 22 | len(mock_notebook.output_cells) == 8 23 | ) # TODO replace the hard-coded output number 24 | 25 | for index, data in enumerate(raw_notebook_cells): 26 | cell = mock_notebook.input_cells[index] 27 | assert type(cell) == InputCell 28 | if "markdown" in data: 29 | assert cell.cell_type == InputCellType.MARKDOWN 30 | markdown_text = data["markdown"] 31 | assert "\n".join(markdown_text) == str(cell) 32 | else: 33 | assert cell.cell_type == InputCellType.SOURCE 34 | source_text = data["source"] 35 | assert "\n".join(source_text) == str(cell) 36 | 37 | index = 0 38 | for data in raw_notebook_cells: 39 | if "plaintext_output" in data: 40 | cell = mock_notebook.output_cells[index] 41 | assert type(cell) == OutputCell 42 | assert cell.cell_type == OutputCellType.PLAINTEXT 43 | assert "\n".join(data["plaintext_output"]) == str(cell) 44 | index = index + 1 45 | elif "stream_output" in data: 46 | cell = mock_notebook.output_cells[index] 47 | assert type(cell) == OutputCell 48 | assert cell.cell_type == OutputCellType.STREAM 49 | assert "\n".join(data["stream_output"]) == str(cell) 50 | index = index + 1 51 | elif "dataframe_output" in data: 52 | cell = mock_notebook.output_cells[index] 53 | assert cell.cell_type == OutputCellType.DATAFRAME 54 | df: pd.DataFrame = data["dataframe_output"] 55 | assert [ 56 | str(elem) for elem in list(df.columns.values) 57 | ] == cell.data_cells[ # type: ignore[attr-defined] 58 | 0 59 | ] 60 | idx = 0 61 | for _, row in df.iterrows(): 62 | assert [str(val) for val in list(row.values)] == cell.data_cells[ # type: ignore[attr-defined] 63 | idx + 1 64 | ] 65 | idx = idx + 1 66 | 67 | index = index + 1 68 | 69 | def test_input_file_line_number_to_cell( 70 | self, tmp_path: pathlib.Path, mock_notebook: Notebook 71 | ) -> None: 72 | tmp_file = tmp_path.joinpath("input_test_file.py") 73 | with open(tmp_file, "w") as f: 74 | for cell in mock_notebook.input_cells: 75 | f.write(str(cell)) 76 | f.write("\n") 77 | 78 | with open(tmp_file, "r") as f: 79 | for line_number, line in enumerate(f.readlines()): 80 | line_number = line_number + 1 81 | found_cell: Optional[ 82 | Cell 83 | ] = mock_notebook.get_input_file_line_number_to_cell(line_number) 84 | assert found_cell 85 | # Test for the file text is in the cell found at line number 86 | assert line[:-1] in found_cell.lines 87 | 88 | # Test for the file text at line index within the found cell 89 | cell_line_index = found_cell.get_cell_line_index_for_file_line_number( 90 | line_number 91 | ) 92 | assert cell_line_index is not None 93 | assert len(found_cell.lines) > cell_line_index 94 | assert found_cell.lines[cell_line_index] == line[:-1] 95 | 96 | def test_output_file_line_number_to_cell( 97 | self, tmp_path: pathlib.Path, mock_notebook: Notebook 98 | ) -> None: 99 | tmp_file = tmp_path.joinpath("output_test_file.txt") 100 | with open(tmp_file, "w") as f: 101 | for cell in mock_notebook.output_cells: 102 | f.write(str(cell)) 103 | f.write("\n") 104 | 105 | with open(tmp_file, "r") as f: 106 | for line_number, line in enumerate(f.readlines()): 107 | line_number = line_number + 1 108 | found_cell = mock_notebook.get_output_file_line_number_to_cell( 109 | line_number 110 | ) 111 | 112 | # Test for the file text is in the cell found at line number 113 | assert found_cell 114 | assert line[:-1] in found_cell.lines 115 | 116 | # Test for the file text at line index within the found cell 117 | cell_line_index = found_cell.get_cell_line_index_for_file_line_number( 118 | line_number 119 | ) 120 | assert cell_line_index is not None 121 | assert found_cell.lines[cell_line_index] == line[:-1] 122 | --------------------------------------------------------------------------------