├── .flake8 ├── .github └── workflows │ ├── lint.yml │ └── tests.yml ├── .gitignore ├── .mypy.ini ├── .pre-commit-config.yaml ├── .pre-commit-hooks.yaml ├── .pylintrc ├── LICENSE ├── Makefile ├── README.md ├── docs ├── README.md └── overrides │ └── partials │ └── copyright.html ├── mkdocs.yaml ├── noxfile.py ├── noxfile_conda.py ├── noxfile_conda_lint.py ├── noxfile_lint.py ├── pyproject.toml ├── requirements_dev.txt ├── requirements_test.txt ├── setup.cfg ├── setup.py ├── src └── nbmetaclean │ ├── __init__.py │ ├── app_check.py │ ├── app_clean.py │ ├── check.py │ ├── clean.py │ ├── helpers.py │ ├── nb_types.py │ └── version.py └── tests ├── test_app_check.py ├── test_app_clean.py ├── test_check.py ├── test_clean.py ├── test_get_nbnames.py ├── test_nbs ├── .test_nb_2_meta.ipynb ├── test_nb_1.ipynb ├── test_nb_2_clean.ipynb └── test_nb_3_ec.ipynb └── test_read_write.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | select = C,E,F,W 3 | max-complexity = 10 4 | max-line-length = 120 5 | extend-ignore = W503 6 | disable-noqa = True 7 | application-import-names = nbmetaclean, tests 8 | import-order-style = google 9 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | on: 3 | push: 4 | branches: 5 | - dev 6 | - main 7 | jobs: 8 | tests: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@main 12 | - uses: actions/setup-python@main 13 | with: 14 | python-version: "3.11" 15 | architecture: x64 16 | - run: pip install ruff 17 | - run: ruff check . 18 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | on: 3 | push: 4 | branches: 5 | - dev 6 | - main 7 | jobs: 8 | tests: 9 | runs-on: ubuntu-latest 10 | strategy: 11 | matrix: 12 | python: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@main 16 | - name: Setup Python ${{ matrix.python }} 17 | uses: actions/setup-python@main 18 | with: 19 | python-version: ${{ matrix.python }} 20 | architecture: x64 21 | 22 | - name: Install 23 | run: | 24 | pip install uv 25 | uv pip install --system .[test] "coverage[toml]" 26 | 27 | - name: Tests 28 | run: pytest --cov 29 | 30 | - name: CodeCov 31 | if: ${{ matrix.python == '3.11' }} 32 | uses: codecov/codecov-action@main 33 | with: 34 | token: ${{ secrets.CODECOV_TOKEN }} 35 | slug: ayasyrev/nbmetaclean 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # my 2 | .vscode/ 3 | tmp*/ 4 | cov.xml 5 | 6 | # ide 7 | 8 | .idea/ 9 | .vscode/settings.json 10 | 11 | # nox 12 | .nox 13 | 14 | # Byte-compiled / optimized / DLL files 15 | __pycache__/ 16 | *.py[cod] 17 | *$py.class 18 | 19 | # C extensions 20 | *.so 21 | 22 | # Distribution / packaging 23 | .Python 24 | build/ 25 | develop-eggs/ 26 | dist/ 27 | downloads/ 28 | eggs/ 29 | .eggs/ 30 | lib/ 31 | lib64/ 32 | parts/ 33 | sdist/ 34 | var/ 35 | wheels/ 36 | pip-wheel-metadata/ 37 | share/python-wheels/ 38 | *.egg-info/ 39 | .installed.cfg 40 | *.egg 41 | MANIFEST 42 | 43 | # PyInstaller 44 | # Usually these files are written by a python script from a template 45 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 46 | *.manifest 47 | *.spec 48 | 49 | # Installer logs 50 | pip-log.txt 51 | pip-delete-this-directory.txt 52 | 53 | # Unit test / coverage reports 54 | htmlcov/ 55 | .tox/ 56 | .nox/ 57 | .coverage 58 | .coverage.* 59 | .cache 60 | nosetests.xml 61 | coverage.xml 62 | *.cover 63 | *.py,cover 64 | .hypothesis/ 65 | .pytest_cache/ 66 | 67 | # Translations 68 | *.mo 69 | *.pot 70 | 71 | # Django stuff: 72 | *.log 73 | local_settings.py 74 | db.sqlite3 75 | db.sqlite3-journal 76 | 77 | # Flask stuff: 78 | instance/ 79 | .webassets-cache 80 | 81 | # Scrapy stuff: 82 | .scrapy 83 | 84 | # Sphinx documentation 85 | docs/_build/ 86 | 87 | # PyBuilder 88 | target/ 89 | 90 | # Jupyter Notebook 91 | .ipynb_checkpoints 92 | 93 | # IPython 94 | profile_default/ 95 | ipython_config.py 96 | 97 | # pyenv 98 | .python-version 99 | 100 | # pipenv 101 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 102 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 103 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 104 | # install all needed dependencies. 105 | #Pipfile.lock 106 | 107 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 108 | __pypackages__/ 109 | 110 | # Celery stuff 111 | celerybeat-schedule 112 | celerybeat.pid 113 | 114 | # SageMath parsed files 115 | *.sage.py 116 | 117 | # Environments 118 | .env 119 | .venv 120 | env/ 121 | venv/ 122 | ENV/ 123 | env.bak/ 124 | venv.bak/ 125 | 126 | # Spyder project settings 127 | .spyderproject 128 | .spyproject 129 | 130 | # Rope project settings 131 | .ropeproject 132 | 133 | # mkdocs documentation 134 | /site 135 | 136 | # mypy 137 | .mypy_cache/ 138 | .dmypy.json 139 | dmypy.json 140 | 141 | # Pyre type checker 142 | .pyre/ 143 | -------------------------------------------------------------------------------- /.mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: local 3 | 4 | hooks: 5 | # local version for testing 6 | - id: nbmetaclean 7 | name: nbmetaclean local 8 | entry: nbmetaclean 9 | language: system 10 | files: \.ipynb 11 | 12 | - id: nbcheck 13 | name: nbcheck execution_count local 14 | entry: nbcheck 15 | language: system 16 | files: \.ipynb 17 | args: [ --ec, --no_exec, --err ] 18 | 19 | - repo: https://github.com/pre-commit/pre-commit-hooks 20 | rev: v4.6.0 21 | hooks: 22 | - id: check-added-large-files 23 | - id: check-ast 24 | - id: check-builtin-literals 25 | - id: check-case-conflict 26 | - id: check-docstring-first 27 | - id: check-executables-have-shebangs 28 | - id: check-shebang-scripts-are-executable 29 | - id: check-symlinks 30 | - id: check-toml 31 | - id: check-xml 32 | - id: detect-private-key 33 | - id: forbid-new-submodules 34 | - id: forbid-submodules 35 | - id: mixed-line-ending 36 | - id: destroyed-symlinks 37 | - id: fix-byte-order-marker 38 | - id: check-json 39 | - id: check-yaml 40 | args: [ --unsafe ] 41 | - id: debug-statements 42 | - id: end-of-file-fixer 43 | - id: trailing-whitespace 44 | - id: requirements-txt-fixer 45 | - repo: https://github.com/astral-sh/ruff-pre-commit 46 | # Ruff version. 47 | rev: v0.6.1 48 | 49 | hooks: 50 | # Run the linter. 51 | - id: ruff 52 | args: [ --fix ] 53 | # Run the formatter. 54 | - id: ruff-format 55 | - repo: https://github.com/pre-commit/pygrep-hooks 56 | rev: v1.10.0 57 | hooks: 58 | - id: python-check-mock-methods 59 | - id: python-use-type-annotations 60 | - id: python-check-blanket-noqa 61 | - id: text-unicode-replacement-char 62 | -------------------------------------------------------------------------------- /.pre-commit-hooks.yaml: -------------------------------------------------------------------------------- 1 | - id: nbmetaclean 2 | name: nbmetaclean 3 | description: Clean Jupyter Notebooks metadata and optionally output. 4 | entry: nbmetaclean 5 | files: \.ipynb$ 6 | language: python 7 | language_version: python3 8 | 9 | # Same as nbmetaclean, for compatibility. 10 | - id: nbclean 11 | name: nbclean 12 | description: Clean Jupyter Notebooks metadata and optionally output. 13 | entry: nbclean 14 | files: \.ipynb$ 15 | language: python 16 | language_version: python3 17 | 18 | - id: nbcheck 19 | name: nbcheck 20 | description: Check Jupyter Notebooks for correct sequence of execution_count and (or) errors in outputs. 21 | entry: nbcheck 22 | files: \.ipynb$ 23 | language: python 24 | language_version: python3 25 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # A comma-separated list of package or module names from where C extensions may 4 | # be loaded. Extensions are loading into the active Python interpreter and may 5 | # run arbitrary code. 6 | extension-pkg-allow-list= 7 | 8 | # A comma-separated list of package or module names from where C extensions may 9 | # be loaded. Extensions are loading into the active Python interpreter and may 10 | # run arbitrary code. (This is an alternative name to extension-pkg-allow-list 11 | # for backward compatibility.) 12 | extension-pkg-whitelist=pydantic 13 | ; extension-pkg-whitelist=pydantic , nbconvert, nbformat 14 | 15 | # Return non-zero exit code if any of these messages/categories are detected, 16 | # even if score is above --fail-under value. Syntax same as enable. Messages 17 | # specified are enabled, while categories only check already-enabled messages. 18 | fail-on= 19 | 20 | # Specify a score threshold to be exceeded before program exits with error. 21 | fail-under=10.0 22 | 23 | # Files or directories to be skipped. They should be base names, not paths. 24 | ignore=CVS 25 | 26 | # Add files or directories matching the regex patterns to the ignore-list. The 27 | # regex matches against paths and can be in Posix or Windows format. 28 | ignore-paths= 29 | 30 | # Files or directories matching the regex patterns are skipped. The regex 31 | # matches against base names, not paths. 32 | ignore-patterns= 33 | 34 | # Python code to execute, usually for sys.path manipulation such as 35 | # pygtk.require(). 36 | #init-hook= 37 | 38 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the 39 | # number of processors available to use. 40 | jobs=1 41 | 42 | # Control the amount of potential inferred values when inferring a single 43 | # object. This can help the performance when dealing with large functions or 44 | # complex, nested conditions. 45 | limit-inference-results=100 46 | 47 | # List of plugins (as comma separated values of python module names) to load, 48 | # usually to register additional checkers. 49 | load-plugins= 50 | 51 | # Pickle collected data for later comparisons. 52 | persistent=yes 53 | 54 | # Minimum Python version to use for version dependent checks. Will default to 55 | # the version used to run pylint. 56 | py-version=3.9 57 | 58 | # When enabled, pylint would attempt to guess common misconfiguration and emit 59 | # user-friendly hints instead of false-positive error messages. 60 | suggestion-mode=yes 61 | 62 | # Allow loading of arbitrary C extensions. Extensions are imported into the 63 | # active Python interpreter and may run arbitrary code. 64 | unsafe-load-any-extension=no 65 | 66 | 67 | [MESSAGES CONTROL] 68 | 69 | # Only show warnings with the listed confidence levels. Leave empty to show 70 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. 71 | confidence= 72 | 73 | # Disable the message, report, category or checker with the given id(s). You 74 | # can either give multiple identifiers separated by comma (,) or put this 75 | # option multiple times (only on the command line, not in the configuration 76 | # file where it should appear only once). You can also use "--disable=all" to 77 | # disable everything first and then reenable specific checks. For example, if 78 | # you want to run only the similarities checker, you can use "--disable=all 79 | # --enable=similarities". If you want to run only the classes checker, but have 80 | # no Warning level messages displayed, use "--disable=all --enable=classes 81 | # --disable=W". 82 | disable=raw-checker-failed, 83 | bad-inline-option, 84 | locally-disabled, 85 | file-ignored, 86 | suppressed-message, 87 | useless-suppression, 88 | deprecated-pragma, 89 | use-symbolic-message-instead, 90 | exec-used, 91 | missing-module-docstring, 92 | missing-docstring, 93 | invalid-name 94 | 95 | # Enable the message, report, category or checker with the given id(s). You can 96 | # either give multiple identifier separated by comma (,) or put this option 97 | # multiple time (only on the command line, not in the configuration file where 98 | # it should appear only once). See also the "--disable" option for examples. 99 | enable=c-extension-no-member 100 | 101 | 102 | [REPORTS] 103 | 104 | # Python expression which should return a score less than or equal to 10. You 105 | # have access to the variables 'error', 'warning', 'refactor', and 'convention' 106 | # which contain the number of messages in each category, as well as 'statement' 107 | # which is the total number of statements analyzed. This score is used by the 108 | # global evaluation report (RP0004). 109 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 110 | 111 | # Template used to display messages. This is a python new-style format string 112 | # used to format the message information. See doc for all details. 113 | #msg-template= 114 | 115 | # Set the output format. Available formats are text, parseable, colorized, json 116 | # and msvs (visual studio). You can also give a reporter class, e.g. 117 | # mypackage.mymodule.MyReporterClass. 118 | output-format=text 119 | 120 | # Tells whether to display a full report or only the messages. 121 | reports=no 122 | 123 | # Activate the evaluation score. 124 | score=yes 125 | 126 | 127 | [REFACTORING] 128 | 129 | # Maximum number of nested blocks for function / method body 130 | max-nested-blocks=5 131 | 132 | # Complete name of functions that never returns. When checking for 133 | # inconsistent-return-statements if a never returning function is called then 134 | # it will be considered as an explicit return statement and no message will be 135 | # printed. 136 | never-returning-functions=sys.exit,argparse.parse_error 137 | 138 | 139 | [SPELLING] 140 | 141 | # Limits count of emitted suggestions for spelling mistakes. 142 | max-spelling-suggestions=4 143 | 144 | # Spelling dictionary name. Available dictionaries: none. To make it work, 145 | # install the 'python-enchant' package. 146 | spelling-dict= 147 | 148 | # List of comma separated words that should be considered directives if they 149 | # appear and the beginning of a comment and should not be checked. 150 | spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: 151 | 152 | # List of comma separated words that should not be checked. 153 | spelling-ignore-words= 154 | 155 | # A path to a file that contains the private dictionary; one word per line. 156 | spelling-private-dict-file= 157 | 158 | # Tells whether to store unknown words to the private dictionary (see the 159 | # --spelling-private-dict-file option) instead of raising a message. 160 | spelling-store-unknown-words=no 161 | 162 | 163 | [FORMAT] 164 | 165 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 166 | expected-line-ending-format= 167 | 168 | # Regexp for a line that is allowed to be longer than the limit. 169 | ignore-long-lines=^\s*(# )??$ 170 | 171 | # Number of spaces of indent required inside a hanging or continued line. 172 | indent-after-paren=4 173 | 174 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 175 | # tab). 176 | indent-string=' ' 177 | 178 | # Maximum number of characters on a single line. 179 | max-line-length=120 180 | 181 | # Maximum number of lines in a module. 182 | max-module-lines=1000 183 | 184 | # Allow the body of a class to be on the same line as the declaration if body 185 | # contains single statement. 186 | single-line-class-stmt=no 187 | 188 | # Allow the body of an if to be on the same line as the test if there is no 189 | # else. 190 | single-line-if-stmt=no 191 | 192 | 193 | [VARIABLES] 194 | 195 | # List of additional names supposed to be defined in builtins. Remember that 196 | # you should avoid defining new builtins when possible. 197 | additional-builtins= 198 | 199 | # Tells whether unused global variables should be treated as a violation. 200 | allow-global-unused-variables=yes 201 | 202 | # List of names allowed to shadow builtins 203 | allowed-redefined-builtins= 204 | 205 | # List of strings which can identify a callback function by name. A callback 206 | # name must start or end with one of those strings. 207 | callbacks=cb_, 208 | _cb 209 | 210 | # A regular expression matching the name of dummy variables (i.e. expected to 211 | # not be used). 212 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ 213 | 214 | # Argument names that match this expression will be ignored. Default to name 215 | # with leading underscore. 216 | ignored-argument-names=_.*|^ignored_|^unused_ 217 | 218 | # Tells whether we should check for unused import in __init__ files. 219 | init-import=no 220 | 221 | # List of qualified module names which can have objects that can redefine 222 | # builtins. 223 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io 224 | 225 | 226 | [SIMILARITIES] 227 | 228 | # Comments are removed from the similarity computation 229 | ignore-comments=yes 230 | 231 | # Docstrings are removed from the similarity computation 232 | ignore-docstrings=yes 233 | 234 | # Imports are removed from the similarity computation 235 | ignore-imports=no 236 | 237 | # Signatures are removed from the similarity computation 238 | ignore-signatures=no 239 | 240 | # Minimum lines number of a similarity. 241 | min-similarity-lines=4 242 | 243 | 244 | [LOGGING] 245 | 246 | # The type of string formatting that logging methods do. `old` means using % 247 | # formatting, `new` is for `{}` formatting. 248 | logging-format-style=old 249 | 250 | # Logging modules to check that the string format arguments are in logging 251 | # function parameter format. 252 | logging-modules=logging 253 | 254 | 255 | [BASIC] 256 | 257 | # Naming style matching correct argument names. 258 | argument-naming-style=snake_case 259 | 260 | # Regular expression matching correct argument names. Overrides argument- 261 | # naming-style. 262 | #argument-rgx= 263 | 264 | # Naming style matching correct attribute names. 265 | attr-naming-style=snake_case 266 | 267 | # Regular expression matching correct attribute names. Overrides attr-naming- 268 | # style. 269 | #attr-rgx= 270 | 271 | # Bad variable names which should always be refused, separated by a comma. 272 | bad-names=foo, 273 | bar, 274 | baz, 275 | toto, 276 | tutu, 277 | tata 278 | 279 | # Bad variable names regexes, separated by a comma. If names match any regex, 280 | # they will always be refused 281 | bad-names-rgxs= 282 | 283 | # Naming style matching correct class attribute names. 284 | class-attribute-naming-style=any 285 | 286 | # Regular expression matching correct class attribute names. Overrides class- 287 | # attribute-naming-style. 288 | #class-attribute-rgx= 289 | 290 | # Naming style matching correct class constant names. 291 | class-const-naming-style=UPPER_CASE 292 | 293 | # Regular expression matching correct class constant names. Overrides class- 294 | # const-naming-style. 295 | #class-const-rgx= 296 | 297 | # Naming style matching correct class names. 298 | class-naming-style=PascalCase 299 | 300 | # Regular expression matching correct class names. Overrides class-naming- 301 | # style. 302 | #class-rgx= 303 | 304 | # Naming style matching correct constant names. 305 | const-naming-style=UPPER_CASE 306 | 307 | # Regular expression matching correct constant names. Overrides const-naming- 308 | # style. 309 | #const-rgx= 310 | 311 | # Minimum line length for functions/classes that require docstrings, shorter 312 | # ones are exempt. 313 | docstring-min-length=-1 314 | 315 | # Naming style matching correct function names. 316 | function-naming-style=snake_case 317 | 318 | # Regular expression matching correct function names. Overrides function- 319 | # naming-style. 320 | #function-rgx= 321 | 322 | # Good variable names which should always be accepted, separated by a comma. 323 | good-names=i, 324 | j, 325 | k, 326 | ex, 327 | Run, 328 | _ 329 | 330 | # Good variable names regexes, separated by a comma. If names match any regex, 331 | # they will always be accepted 332 | good-names-rgxs= 333 | 334 | # Include a hint for the correct naming format with invalid-name. 335 | include-naming-hint=no 336 | 337 | # Naming style matching correct inline iteration names. 338 | inlinevar-naming-style=any 339 | 340 | # Regular expression matching correct inline iteration names. Overrides 341 | # inlinevar-naming-style. 342 | #inlinevar-rgx= 343 | 344 | # Naming style matching correct method names. 345 | method-naming-style=snake_case 346 | 347 | # Regular expression matching correct method names. Overrides method-naming- 348 | # style. 349 | #method-rgx= 350 | 351 | # Naming style matching correct module names. 352 | module-naming-style=snake_case 353 | 354 | # Regular expression matching correct module names. Overrides module-naming- 355 | # style. 356 | #module-rgx= 357 | 358 | # Colon-delimited sets of names that determine each other's naming style when 359 | # the name regexes allow several styles. 360 | name-group= 361 | 362 | # Regular expression which should only match function or class names that do 363 | # not require a docstring. 364 | no-docstring-rgx=^_ 365 | 366 | # List of decorators that produce properties, such as abc.abstractproperty. Add 367 | # to this list to register other decorators that produce valid properties. 368 | # These decorators are taken in consideration only for invalid-name. 369 | property-classes=abc.abstractproperty 370 | 371 | # Naming style matching correct variable names. 372 | variable-naming-style=snake_case 373 | 374 | # Regular expression matching correct variable names. Overrides variable- 375 | # naming-style. 376 | #variable-rgx= 377 | 378 | 379 | [TYPECHECK] 380 | 381 | # List of decorators that produce context managers, such as 382 | # contextlib.contextmanager. Add to this list to register other decorators that 383 | # produce valid context managers. 384 | contextmanager-decorators=contextlib.contextmanager 385 | 386 | # List of members which are set dynamically and missed by pylint inference 387 | # system, and so shouldn't trigger E1101 when accessed. Python regular 388 | # expressions are accepted. 389 | generated-members= 390 | 391 | # Tells whether missing members accessed in mixin class should be ignored. A 392 | # class is considered mixin if its name matches the mixin-class-rgx option. 393 | ignore-mixin-members=yes 394 | 395 | # Tells whether to warn about missing members when the owner of the attribute 396 | # is inferred to be None. 397 | ignore-none=yes 398 | 399 | # This flag controls whether pylint should warn about no-member and similar 400 | # checks whenever an opaque object is returned when inferring. The inference 401 | # can return multiple potential results while evaluating a Python object, but 402 | # some branches might not be evaluated, which results in partial inference. In 403 | # that case, it might be useful to still emit no-member and other checks for 404 | # the rest of the inferred objects. 405 | ignore-on-opaque-inference=yes 406 | 407 | # List of class names for which member attributes should not be checked (useful 408 | # for classes with dynamically set attributes). This supports the use of 409 | # qualified names. 410 | ignored-classes=optparse.Values,thread._local,_thread._local 411 | 412 | # List of module names for which member attributes should not be checked 413 | # (useful for modules/projects where namespaces are manipulated during runtime 414 | # and thus existing member attributes cannot be deduced by static analysis). It 415 | # supports qualified module names, as well as Unix pattern matching. 416 | ignored-modules= 417 | 418 | # Show a hint with possible names when a member name was not found. The aspect 419 | # of finding the hint is based on edit distance. 420 | missing-member-hint=yes 421 | 422 | # The minimum edit distance a name should have in order to be considered a 423 | # similar match for a missing member name. 424 | missing-member-hint-distance=1 425 | 426 | # The total number of similar names that should be taken in consideration when 427 | # showing a hint for a missing member. 428 | missing-member-max-choices=1 429 | 430 | # Regex pattern to define which classes are considered mixins ignore-mixin- 431 | # members is set to 'yes' 432 | mixin-class-rgx=.*[Mm]ixin 433 | 434 | # List of decorators that change the signature of a decorated function. 435 | signature-mutators= 436 | 437 | 438 | [MISCELLANEOUS] 439 | 440 | # List of note tags to take in consideration, separated by a comma. 441 | notes=FIXME, 442 | XXX, 443 | TODO 444 | 445 | # Regular expression of note tags to take in consideration. 446 | #notes-rgx= 447 | 448 | 449 | [STRING] 450 | 451 | # This flag controls whether inconsistent-quotes generates a warning when the 452 | # character used as a quote delimiter is used inconsistently within a module. 453 | check-quote-consistency=no 454 | 455 | # This flag controls whether the implicit-str-concat should generate a warning 456 | # on implicit string concatenation in sequences defined over several lines. 457 | check-str-concat-over-line-jumps=no 458 | 459 | 460 | [IMPORTS] 461 | 462 | # List of modules that can be imported at any level, not just the top level 463 | # one. 464 | allow-any-import-level= 465 | 466 | # Allow wildcard imports from modules that define __all__. 467 | allow-wildcard-with-all=no 468 | 469 | # Analyse import fallback blocks. This can be used to support both Python 2 and 470 | # 3 compatible code, which means that the block might have code that exists 471 | # only in one or another interpreter, leading to false positives when analysed. 472 | analyse-fallback-blocks=no 473 | 474 | # Deprecated modules which should not be used, separated by a comma. 475 | deprecated-modules= 476 | 477 | # Output a graph (.gv or any supported image format) of external dependencies 478 | # to the given file (report RP0402 must not be disabled). 479 | ext-import-graph= 480 | 481 | # Output a graph (.gv or any supported image format) of all (i.e. internal and 482 | # external) dependencies to the given file (report RP0402 must not be 483 | # disabled). 484 | import-graph= 485 | 486 | # Output a graph (.gv or any supported image format) of internal dependencies 487 | # to the given file (report RP0402 must not be disabled). 488 | int-import-graph= 489 | 490 | # Force import order to recognize a module as part of the standard 491 | # compatibility libraries. 492 | known-standard-library= 493 | 494 | # Force import order to recognize a module as part of a third party library. 495 | known-third-party=enchant 496 | 497 | # Couples of modules and preferred modules, separated by a comma. 498 | preferred-modules= 499 | 500 | 501 | [DESIGN] 502 | 503 | # List of regular expressions of class ancestor names to ignore when counting 504 | # public methods (see R0903) 505 | exclude-too-few-public-methods= 506 | 507 | # List of qualified class names to ignore when counting class parents (see 508 | # R0901) 509 | ignored-parents= 510 | 511 | # Maximum number of arguments for function / method. 512 | max-args=5 513 | 514 | # Maximum number of attributes for a class (see R0902). 515 | max-attributes=7 516 | 517 | # Maximum number of boolean expressions in an if statement (see R0916). 518 | max-bool-expr=5 519 | 520 | # Maximum number of branch for function / method body. 521 | max-branches=12 522 | 523 | # Maximum number of locals for function / method body. 524 | max-locals=15 525 | 526 | # Maximum number of parents for a class (see R0901). 527 | max-parents=7 528 | 529 | # Maximum number of public methods for a class (see R0904). 530 | max-public-methods=20 531 | 532 | # Maximum number of return / yield for function / method body. 533 | max-returns=6 534 | 535 | # Maximum number of statements in function / method body. 536 | max-statements=50 537 | 538 | # Minimum number of public methods for a class (see R0903). 539 | min-public-methods=2 540 | 541 | 542 | [CLASSES] 543 | 544 | # Warn about protected attribute access inside special methods 545 | check-protected-access-in-special-methods=no 546 | 547 | # List of method names used to declare (i.e. assign) instance attributes. 548 | defining-attr-methods=__init__, 549 | __new__, 550 | setUp, 551 | __post_init__ 552 | 553 | # List of member names, which should be excluded from the protected access 554 | # warning. 555 | exclude-protected=_asdict, 556 | _fields, 557 | _replace, 558 | _source, 559 | _make 560 | 561 | # List of valid names for the first argument in a class method. 562 | valid-classmethod-first-arg=cls 563 | 564 | # List of valid names for the first argument in a metaclass class method. 565 | valid-metaclass-classmethod-first-arg=cls 566 | 567 | 568 | [EXCEPTIONS] 569 | 570 | # Exceptions that will emit a warning when being caught. Defaults to 571 | # "BaseException, Exception". 572 | overgeneral-exceptions=BaseException, 573 | Exception 574 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .ONESHELL: 2 | SHELL := /bin/bash 3 | 4 | pypi: dist 5 | twine upload --repository pypi dist/* 6 | 7 | dist: clean 8 | python3 -m build 9 | 10 | clean: 11 | rm -rf dist 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nbmetaclean 2 | Collections of python scripts for checking and cleaning Jupyter Notebooks metadata, execution_count and optionally output. 3 | Can be used as command line tool or pre-commit hook. 4 | 5 | 6 | Pure Python, no dependencies. 7 | 8 | Can be used as a pre-commit hook or as a command line tool. 9 | 10 | 11 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/benchmark-utils)](https://pypi.org/project/nbmetaclean/) 12 | [![PyPI Status](https://badge.fury.io/py/nbmetaclean.svg)](https://badge.fury.io/py/nbmetaclean) 13 | [![Tests](https://github.com/ayasyrev/nbmetaclean/workflows/Tests/badge.svg)](https://github.com/ayasyrev/nbmetaclean/actions?workflow=Tests) 14 | [![Codecov](https://codecov.io/gh/ayasyrev/nbmetaclean/branch/main/graph/badge.svg)](https://codecov.io/gh/ayasyrev/nbmetaclean) 15 | 16 | ## nbmetaclean 17 | 18 | Clean Jupyter Notebooks metadata, execution_count and optionally output. 19 | 20 | ## nbcheck 21 | Check Jupyter Notebooks for errors and (or) warnings in outputs. 22 | 23 | 24 | ## Base usage 25 | 26 | ### Pre-commit hook 27 | Nbmetaclean can be used as a pre-commit hook, with [pre-commit](https://pre-commit.com/pre-commit) 28 | You do not need to install nbmetaclean, it will be installed automatically. 29 | add to `.pre-commit-config.yaml`: 30 | ```yaml 31 | repos: 32 | - repo: https://github.com/ayasyrev/nbmetaclean 33 | rev: 0.1.1 34 | hooks: 35 | - id: nbmetaclean 36 | - id: nbcheck 37 | args: [ --ec, --err, --warn ] 38 | ``` 39 | 40 | 41 | 42 | ### Command line tool 43 | 44 | #### Without install: 45 | If you use [uv](https://github.com/astral-sh/uv) package manager, you can nbmetaclean without install. 46 | To clean notebooks: 47 | ```bash 48 | uvx nbmetaclean 49 | ``` 50 | To check notebooks: 51 | ```bash 52 | uvx --from nbmetaclean nbcheck --ec --err --warn 53 | ``` 54 | 55 | #### Install: 56 | ```bash 57 | pip install nbmetaclean 58 | ``` 59 | 60 | Usage: 61 | run `nbmetaclean` or `nbcheck` command with `path` to notebook or folder with notebooks. 62 | If no `path` is provided, current directory will be used as `path`. 63 | 64 | It is possible to use `nbclean` command instead of `nbmetaclean`. 65 | `nbmetaclean` will be used by defaults in favour of usage with `uvx` 66 | 67 | 68 | 69 | ```bash 70 | nbmetaclean 71 | ``` 72 | 73 | `nbcheck` should be run with flags: 74 | - `--ec` for execution_count check 75 | - `--err` for check errors in outputs 76 | - `--warn` for check warnings in outputs 77 | ```bash 78 | nbcheck --ec --err --warn 79 | ``` 80 | 81 | 82 | ## Nbmetaclean 83 | ### Default settings 84 | By default, the following settings are used: 85 | 86 | - Clean notebook metadata, except `authors` and `language_info / name`. 87 | - Clean cells execution_count. 88 | - Preserve metadata at cells. 89 | - Preserve cells outputs. 90 | - After cleaning notebook, timestamp for file will be set to previous values. 91 | 92 | 93 | 94 | 95 | 96 | 97 | ### Arguments 98 | Check available arguments: 99 | 100 | ```bash 101 | nbmetaclean -h 102 | 103 | usage: nbmetaclean [-h] [-s] [--not_ec] [--not-pt] [--dont_clear_nb_metadata] [--clear_cell_metadata] [--clear_outputs] 104 | [--nb_metadata_preserve_mask NB_METADATA_PRESERVE_MASK [NB_METADATA_PRESERVE_MASK ...]] 105 | [--cell_metadata_preserve_mask CELL_METADATA_PRESERVE_MASK [CELL_METADATA_PRESERVE_MASK ...]] [--dont_merge_masks] [--clean_hidden_nbs] [-D] [-V] 106 | [path ...] 107 | 108 | Clean metadata and execution_count from Jupyter notebooks. 109 | 110 | positional arguments: 111 | path Path for nb or folder with notebooks. 112 | 113 | options: 114 | -h, --help show this help message and exit 115 | -s, --silent Silent mode. 116 | --not_ec Do not clear execution_count. 117 | --not-pt Do not preserve timestamp. 118 | --dont_clear_nb_metadata 119 | Do not clear notebook metadata. 120 | --clear_cell_metadata 121 | Clear cell metadata. 122 | --clear_outputs Clear outputs. 123 | --nb_metadata_preserve_mask NB_METADATA_PRESERVE_MASK [NB_METADATA_PRESERVE_MASK ...] 124 | Preserve mask for notebook metadata. 125 | --cell_metadata_preserve_mask CELL_METADATA_PRESERVE_MASK [CELL_METADATA_PRESERVE_MASK ...] 126 | Preserve mask for cell metadata. 127 | --dont_merge_masks Do not merge masks. 128 | --clean_hidden_nbs Clean hidden notebooks. 129 | -D, --dry_run perform a trial run, don't write results 130 | -V, --verbose Verbose mode. Print extra information. 131 | ``` 132 | 133 | ### Execution_count 134 | If you want to leave execution_count add `--not_ec` flag at command line or `args: [--not_ec]` line to `.pre-commit-config.yaml`. 135 | 136 | ```yaml 137 | repos: 138 | - repo: https://github.com/ayasyrev/nbmetaclean 139 | rev: 0.1.1 140 | hooks: 141 | - id: nbmetaclean 142 | args: [ --not_ec ] 143 | ``` 144 | 145 | ```bash 146 | nbmetaclean --not_ec 147 | ``` 148 | 149 | ### Clear outputs 150 | If you want to clear outputs, add `--clear_outputs` at command line or `[ --clean_outputs ]` line to `.pre-commit-config.yaml`. 151 | ```yaml 152 | repos: 153 | - repo: https://github.com/ayasyrev/nbmetaclean 154 | rev: 0.1.1 155 | hooks: 156 | - id: nbmetaclean 157 | args: [ --clean_outputs ] 158 | ``` 159 | 160 | ```bash 161 | nbmetaclean --clean_outputs 162 | ``` 163 | 164 | ## Nbcheck 165 | Check Jupyter Notebooks for correct execution_count, errors and (or) warnings in outputs. 166 | 167 | ### Execution_count 168 | Check that all code cells executed one after another. 169 | 170 | #### Strict mode 171 | By default, execution_count check in `strict` mode. 172 | All cells must be executed, one after another. 173 | 174 | pre-commit config example: 175 | ```yaml 176 | repos: 177 | - repo: https://github.com/ayasyrev/nbmetaclean 178 | rev: 0.1.1 179 | hooks: 180 | - id: nbcheck 181 | args: [ --ec ] 182 | ``` 183 | 184 | command line example: 185 | ```bash 186 | nbcheck --ec 187 | ``` 188 | 189 | #### Not strict mode 190 | `--not_strict` flag can be used to check that next cell executed after previous one, but execution number can be more than `+1`. 191 | 192 | pre-commit config example: 193 | ```yaml 194 | repos: 195 | - repo: https://github.com/ayasyrev/nbmetaclean 196 | rev: 0.1.1 197 | hooks: 198 | - id: nbcheck 199 | args: [ --ec, --not_strict ] 200 | ``` 201 | 202 | command line example: 203 | ```bash 204 | nbcheck --ec --not_strict 205 | ``` 206 | 207 | #### Allow notebooks with no execution_count 208 | 209 | `--no_exec` flag allows notebooks with all cells without execution_count. 210 | If notebook has cells with execution_count and without execution_count, pre-commit will return error. 211 | 212 | pre-commit config example: 213 | ```yaml 214 | repos: 215 | - repo: https://github.com/ayasyrev/nbmetaclean 216 | rev: 0.1.1 217 | - id: nbcheck 218 | args: [ --ec, --no_exec ] 219 | ``` 220 | 221 | command line example: 222 | ```bash 223 | nbcheck --ec --no_exec 224 | ``` 225 | 226 | 227 | 228 | ### Errors and Warnings 229 | 230 | `--err` and `--warn` flags can be used to check for errors and warnings in outputs. 231 | 232 | pre-commit config example: 233 | ```yaml 234 | repos: 235 | - repo: https://github.com/ayasyrev/nbmetaclean 236 | rev: 0.1.1 237 | hooks: 238 | - id: nbcheck 239 | args: [ --err, --warn ] 240 | ``` 241 | 242 | command line example: 243 | ```bash 244 | nbcheck --err --warn 245 | ``` 246 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | hide: 3 | - navigation 4 | --- 5 | 6 | # nbmetaclean 7 | Collections of python scripts for checking and cleaning Jupyter Notebooks metadata, execution_count and optionally output. 8 | Can be used as command line tool or pre-commit hook. 9 | 10 | 11 | Pure Python, no dependencies. 12 | 13 | Can be used as a pre-commit hook or as a command line tool. 14 | 15 | 16 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/benchmark-utils)](https://pypi.org/project/nbmetaclean/) 17 | [![PyPI Status](https://badge.fury.io/py/nbmetaclean.svg)](https://badge.fury.io/py/nbmetaclean) 18 | [![Tests](https://github.com/ayasyrev/nbmetaclean/workflows/Tests/badge.svg)](https://github.com/ayasyrev/nbmetaclean/actions?workflow=Tests) [![Codecov](https://codecov.io/gh/ayasyrev/nbmetaclean/branch/main/graph/badge.svg)](https://codecov.io/gh/ayasyrev/nbmetaclean) 19 | 20 | ## nbmetaclean 21 | 22 | Clean Jupyter Notebooks metadata, execution_count and optionally output. 23 | 24 | ## nbcheck 25 | Check Jupyter Notebooks for errors and (or) warnings in outputs. 26 | 27 | 28 | ## Base usage 29 | 30 | ### Pre-commit hook 31 | Nbmetaclean can be used as a pre-commit hook, with [pre-commit](https://pre-commit.com/pre-commit) 32 | You do not need to install nbmetaclean, it will be installed automatically. 33 | add to `.pre-commit-config.yaml`: 34 | ```yaml 35 | repos: 36 | - repo: https://github.com/ayasyrev/nbmetaclean 37 | rev: 0.1.1 38 | hooks: 39 | - id: nbmetaclean 40 | - id: nbcheck 41 | args: [ --ec, --err, --warn ] 42 | ``` 43 | 44 | 45 | 46 | ### Command line tool 47 | 48 | #### Without install: 49 | If you use [uv](https://github.com/astral-sh/uv) package manager, you can nbmetaclean without install. 50 | To clean notebooks: 51 | ```bash 52 | uvx nbmetaclean 53 | ``` 54 | To check notebooks: 55 | ```bash 56 | uvx --from nbmetaclean nbcheck --ec --err --warn 57 | ``` 58 | 59 | 60 | #### Install: 61 | ```bash 62 | pip install nbmetaclean 63 | ``` 64 | 65 | Usage: 66 | run `nbmetaclean` or `nbcheck` command with `path` to notebook or folder with notebooks. 67 | If no `path` is provided, current directory will be used as `path`. 68 | 69 | It is possible to use `nbclean` command instead of `nbmetaclean`. 70 | `nbmetaclean` will be used by defaults in favour of usage with `uvx` 71 | 72 | 73 | 74 | ```bash 75 | nbmetaclean 76 | ``` 77 | 78 | `nbcheck` should be run with flags: 79 | - `--ec` for execution_count check 80 | - `--err` for check errors in outputs 81 | - `--warn` for check warnings in outputs 82 | ```bash 83 | nbcheck --ec --err --warn 84 | ``` 85 | 86 | 87 | ## Nbmetaclean 88 | ### Default settings 89 | By default, the following settings are used: 90 | 91 | - Clean notebook metadata, except `authors` and `language_info / name`. 92 | - Clean cells execution_count. 93 | - Preserve metadata at cells. 94 | - Preserve cells outputs. 95 | - After cleaning notebook, timestamp for file will be set to previous values. 96 | 97 | 98 | 99 | 100 | 101 | 102 | ### Arguments 103 | Check available arguments: 104 | 105 | ```bash 106 | nbmetaclean -h 107 | 108 | usage: nbmetaclean [-h] [-s] [--not_ec] [--not-pt] [--dont_clear_nb_metadata] [--clear_cell_metadata] [--clear_outputs] 109 | [--nb_metadata_preserve_mask NB_METADATA_PRESERVE_MASK [NB_METADATA_PRESERVE_MASK ...]] 110 | [--cell_metadata_preserve_mask CELL_METADATA_PRESERVE_MASK [CELL_METADATA_PRESERVE_MASK ...]] [--dont_merge_masks] [--clean_hidden_nbs] [-D] [-V] 111 | [path ...] 112 | 113 | Clean metadata and execution_count from Jupyter notebooks. 114 | 115 | positional arguments: 116 | path Path for nb or folder with notebooks. 117 | 118 | options: 119 | -h, --help show this help message and exit 120 | -s, --silent Silent mode. 121 | --not_ec Do not clear execution_count. 122 | --not-pt Do not preserve timestamp. 123 | --dont_clear_nb_metadata 124 | Do not clear notebook metadata. 125 | --clear_cell_metadata 126 | Clear cell metadata. 127 | --clear_outputs Clear outputs. 128 | --nb_metadata_preserve_mask NB_METADATA_PRESERVE_MASK [NB_METADATA_PRESERVE_MASK ...] 129 | Preserve mask for notebook metadata. 130 | --cell_metadata_preserve_mask CELL_METADATA_PRESERVE_MASK [CELL_METADATA_PRESERVE_MASK ...] 131 | Preserve mask for cell metadata. 132 | --dont_merge_masks Do not merge masks. 133 | --clean_hidden_nbs Clean hidden notebooks. 134 | -D, --dry_run perform a trial run, don't write results 135 | -V, --verbose Verbose mode. Print extra information. 136 | ``` 137 | 138 | ### Execution_count 139 | If you want to leave execution_count add `--not_ec` flag at command line or `args: [--not_ec]` line to `.pre-commit-config.yaml`. 140 | 141 | ```yaml 142 | repos: 143 | - repo: https://github.com/ayasyrev/nbmetaclean 144 | rev: 0.1.1 145 | hooks: 146 | - id: nbmetaclean 147 | args: [ --not_ec ] 148 | ``` 149 | 150 | ```bash 151 | nbmetaclean --not_ec 152 | ``` 153 | 154 | ### Clear outputs 155 | If you want to clear outputs, add `--clear_outputs` at command line or `[ --clean_outputs ]` line to `.pre-commit-config.yaml`. 156 | ```yaml 157 | repos: 158 | - repo: https://github.com/ayasyrev/nbmetaclean 159 | rev: 0.1.1 160 | hooks: 161 | - id: nbmetaclean 162 | args: [ --clean_outputs ] 163 | ``` 164 | 165 | ```bash 166 | nbmetaclean --clean_outputs 167 | ``` 168 | 169 | ## Nbcheck 170 | Check Jupyter Notebooks for correct execution_count, errors and (or) warnings in outputs. 171 | 172 | ### Execution_count 173 | Check that all code cells executed one after another. 174 | 175 | #### Strict mode 176 | By default, execution_count check in `strict` mode. 177 | All cells must be executed, one after another. 178 | 179 | pre-commit config example: 180 | ```yaml 181 | repos: 182 | - repo: https://github.com/ayasyrev/nbmetaclean 183 | rev: 0.1.1 184 | hooks: 185 | - id: nbcheck 186 | args: [ --ec ] 187 | ``` 188 | 189 | command line example: 190 | ```bash 191 | nbcheck --ec 192 | ``` 193 | 194 | #### Not strict mode 195 | `--not_strict` flag can be used to check that next cell executed after previous one, but execution number can be more than `+1`. 196 | 197 | pre-commit config example: 198 | ```yaml 199 | repos: 200 | - repo: https://github.com/ayasyrev/nbmetaclean 201 | rev: 0.1.1 202 | hooks: 203 | - id: nbcheck 204 | args: [ --ec, --not_strict ] 205 | ``` 206 | 207 | command line example: 208 | ```bash 209 | nbcheck --ec --not_strict 210 | ``` 211 | 212 | #### Allow notebooks with no execution_count 213 | 214 | `--no_exec` flag allows notebooks with all cells without execution_count. 215 | If notebook has cells with execution_count and without execution_count, pre-commit will return error. 216 | 217 | pre-commit config example: 218 | ```yaml 219 | repos: 220 | - repo: https://github.com/ayasyrev/nbmetaclean 221 | rev: 0.1.1 222 | - id: nbcheck 223 | args: [ --ec, --no_exec ] 224 | ``` 225 | 226 | command line example: 227 | ```bash 228 | nbcheck --ec --no_exec 229 | ``` 230 | 231 | 232 | 233 | ### Errors and Warnings 234 | 235 | `--err` and `--warn` flags can be used to check for errors and warnings in outputs. 236 | 237 | pre-commit config example: 238 | ```yaml 239 | repos: 240 | - repo: https://github.com/ayasyrev/nbmetaclean 241 | rev: 0.1.1 242 | hooks: 243 | - id: nbcheck 244 | args: [ --err, --warn ] 245 | ``` 246 | 247 | command line example: 248 | ```bash 249 | nbcheck --err --warn 250 | ``` 251 | -------------------------------------------------------------------------------- /docs/overrides/partials/copyright.html: -------------------------------------------------------------------------------- 1 | 18 | -------------------------------------------------------------------------------- /mkdocs.yaml: -------------------------------------------------------------------------------- 1 | site_name: Nbmetaclean 2 | repo_url: https://github.com/ayasyrev/nbmetaclean 3 | repo_name: nbmetaclean 4 | docs_dir: docs 5 | 6 | # copyright: 7 | theme: 8 | name: material 9 | custom_dir: docs/overrides 10 | 11 | palette: 12 | - scheme: default 13 | toggle: 14 | icon: material/toggle-switch-off-outline 15 | name: Switch to dark mode 16 | - scheme: slate 17 | toggle: 18 | icon: material/toggle-switch 19 | name: Switch to light mode 20 | markdown_extensions: 21 | - admonition 22 | - pymdownx.details 23 | - pymdownx.superfences 24 | 25 | extra: 26 | analytics: 27 | provider: google 28 | property: G-0F3FK713C2 29 | copyright: Copyright © 2023-2024 Andrei Yasyrev. 30 | -------------------------------------------------------------------------------- /noxfile.py: -------------------------------------------------------------------------------- 1 | import nox 2 | 3 | 4 | nox.options.default_venv_backend = "uv|virtualenv" 5 | nox.options.reuse_existing_virtualenvs = True 6 | 7 | 8 | @nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]) 9 | def tests(session: nox.Session) -> None: 10 | args = session.posargs or ["--cov"] 11 | session.install("-e", ".[test]") 12 | session.run("pytest", *args) 13 | -------------------------------------------------------------------------------- /noxfile_conda.py: -------------------------------------------------------------------------------- 1 | import nox 2 | 3 | nox.options.default_venv_backend = "mamba|conda" 4 | nox.options.reuse_existing_virtualenvs = True 5 | 6 | 7 | @nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]) 8 | def conda_tests(session: nox.Session) -> None: 9 | args = session.posargs or ["--cov"] 10 | session.conda_install("uv") 11 | session.run("uv", "pip", "install", "-e", ".[test]") 12 | session.run("pytest", *args) 13 | -------------------------------------------------------------------------------- /noxfile_conda_lint.py: -------------------------------------------------------------------------------- 1 | import nox 2 | 3 | locations = "." 4 | nox.options.default_venv_backend = "mamba|conda" 5 | nox.options.reuse_existing_virtualenvs = True 6 | 7 | 8 | @nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]) 9 | def conda_lint(session: nox.Session) -> None: 10 | args = session.posargs or locations 11 | session.conda_install("ruff") 12 | session.run("ruff", "check", *args) 13 | -------------------------------------------------------------------------------- /noxfile_lint.py: -------------------------------------------------------------------------------- 1 | import nox 2 | 3 | locations = "." 4 | nox.options.default_venv_backend = "uv|virtualenv" 5 | nox.options.reuse_existing_virtualenvs = True 6 | 7 | 8 | @nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]) 9 | def lint(session: nox.Session) -> None: 10 | args = session.posargs or locations 11 | session.install("ruff") 12 | session.run("ruff", "check", *args) 13 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.coverage.paths] 2 | source = ["src"] 3 | 4 | [tool.coverage.run] 5 | branch = true 6 | source = ["nbmetaclean"] 7 | 8 | [tool.coverage.report] 9 | show_missing = true 10 | 11 | [tool.ruff] 12 | extend-include = ["*.ipynb"] 13 | indent-width = 4 14 | 15 | [tool.ruff.lint] 16 | explicit-preview-rules = true 17 | 18 | [tool.ruff.format] 19 | quote-style = "double" 20 | indent-style = "space" 21 | skip-magic-trailing-comma = false 22 | line-ending = "auto" 23 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | black 2 | black[jupyter] 3 | coverage[toml] 4 | flake8 5 | isort 6 | mypy 7 | nox 8 | pre-commit 9 | ruff 10 | -------------------------------------------------------------------------------- /requirements_test.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-cov 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = nbmetaclean 3 | version = attr: nbmetaclean.version.__version__ 4 | author = Yasyrev Andrei 5 | author_email = a.yasyrev@gmail.com 6 | description = Clean jupyter notebooks. Remove metadata and execution counts. 7 | long_description = file: README.md 8 | long_description_content_type = text/markdown 9 | url = https://github.com/ayasyrev/nbmetaclean 10 | license = apache2 11 | classifiers = 12 | Programming Language :: Python :: 3.8 13 | Programming Language :: Python :: 3.9 14 | Programming Language :: Python :: 3.10 15 | Programming Language :: Python :: 3.11 16 | Programming Language :: Python :: 3.12 17 | Programming Language :: Python :: 3.13 18 | License :: OSI Approved :: Apache Software License 19 | Operating System :: OS Independent 20 | 21 | [options] 22 | package_dir = 23 | = src 24 | packages = find: 25 | python_requires = >=3.8 26 | 27 | [options.packages.find] 28 | where = src 29 | 30 | [options.entry_points] 31 | console_scripts = 32 | nbmetaclean=nbmetaclean.app_clean:app_clean 33 | nbclean=nbmetaclean.app_clean:app_clean 34 | nbcheck=nbmetaclean.app_check:app_check 35 | pipx.run = 36 | nbmetaclean=nbmetaclean.app_clean:app_clean 37 | nbclean=nbmetaclean.app_clean:app_clean 38 | nbcheck=nbmetaclean.app_check:app_check 39 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | 4 | REQUIREMENTS_TEST_FILENAME = "requirements_test.txt" 5 | REQUIREMENTS_DEV_FILENAME = "requirements_dev.txt" 6 | 7 | 8 | def load_requirements(filename: str) -> list[str]: 9 | """Load requirements from file""" 10 | try: 11 | with open(filename, encoding="utf-8") as fh: 12 | return fh.read().splitlines() 13 | except FileNotFoundError: 14 | return [] 15 | 16 | 17 | TEST_REQUIRED = load_requirements(REQUIREMENTS_TEST_FILENAME) 18 | DEV_REQUIRED = load_requirements(REQUIREMENTS_DEV_FILENAME) 19 | 20 | 21 | # What packages are optional? 22 | EXTRAS = { 23 | "test": TEST_REQUIRED, 24 | "dev": DEV_REQUIRED + TEST_REQUIRED, 25 | } 26 | 27 | 28 | setup( 29 | extras_require=EXTRAS, 30 | ) 31 | -------------------------------------------------------------------------------- /src/nbmetaclean/__init__.py: -------------------------------------------------------------------------------- 1 | from .check import check_nb_ec, check_nb_errors 2 | from .clean import clean_nb_file, CleanConfig, clean_nb 3 | from .helpers import read_nb, write_nb, get_nb_names, get_nb_names_from_list 4 | 5 | 6 | __all__ = [ 7 | "get_nb_names", 8 | "get_nb_names_from_list", 9 | "check_nb_ec", 10 | "check_nb_errors", 11 | "clean_nb", 12 | "clean_nb_file", 13 | "CleanConfig", 14 | "read_nb", 15 | "write_nb", 16 | ] 17 | -------------------------------------------------------------------------------- /src/nbmetaclean/app_check.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import argparse 4 | from pathlib import Path 5 | import sys 6 | 7 | from nbmetaclean.check import check_nb_ec, check_nb_errors, check_nb_warnings 8 | from nbmetaclean.helpers import get_nb_names_from_list, read_nb 9 | from nbmetaclean.version import __version__ 10 | 11 | 12 | parser = argparse.ArgumentParser( 13 | prog="nbcheck", 14 | description="Check Jupyter notebooks for correct sequence of execution_count and (or) errors in outputs.", 15 | ) 16 | parser.add_argument( 17 | "path", 18 | default=".", 19 | nargs="*", 20 | help="Path for nb or folder with notebooks.", 21 | ) 22 | parser.add_argument( 23 | "--ec", 24 | action="store_true", 25 | help="Check execution_count.", 26 | ) 27 | parser.add_argument( 28 | "--err", 29 | action="store_true", 30 | help="Check errors in outputs.", 31 | ) 32 | parser.add_argument( 33 | "--warn", 34 | action="store_true", 35 | help="Check warnings in outputs.", 36 | ) 37 | parser.add_argument( 38 | "--not_strict", 39 | action="store_true", 40 | help="Not strict mode.", 41 | ) 42 | parser.add_argument( 43 | "--no_exec", 44 | action="store_true", 45 | help="Ignore notebooks with all code cells without execution_count.", 46 | ) 47 | parser.add_argument( 48 | "-V", 49 | "--verbose", 50 | action="store_true", 51 | help="Verbose mode. Print extra information.", 52 | ) 53 | parser.add_argument( 54 | "-v", 55 | "--version", 56 | action="store_true", 57 | help="Print version information.", 58 | ) 59 | 60 | 61 | def print_error( 62 | nbs: list[Path], 63 | message: str, 64 | ) -> None: 65 | """Print error message.""" 66 | print(f"{len(nbs)} notebooks with {message}:") 67 | for nb in nbs: 68 | print("- ", nb) 69 | 70 | 71 | def print_results( 72 | wrong_ec: list[Path], 73 | nb_errors: list[Path], 74 | nb_warnings: list[Path], 75 | read_error: list[Path], 76 | ) -> None: 77 | """Print results.""" 78 | if wrong_ec: 79 | print_error(wrong_ec, "wrong execution_count") 80 | if nb_errors: 81 | print_error(nb_errors, "errors in outputs") 82 | if nb_warnings: 83 | print_error(nb_warnings, "warnings in outputs") 84 | if read_error: 85 | print_error(read_error, "read error") 86 | 87 | 88 | def app_check() -> None: 89 | """Check notebooks for correct sequence of execution_count and errors in outputs.""" 90 | cfg = parser.parse_args() 91 | 92 | if cfg.version: 93 | print(f"nbcheck from nbmetaclean, version: {__version__}") 94 | sys.exit(0) 95 | 96 | if not cfg.ec and not cfg.err and not cfg.warn: 97 | print( 98 | "No checks are selected. Please select at least one check: " 99 | "--ec (for execution_count) or " 100 | "--err (for errors in outputs) or " 101 | "--warn (for warnings in outputs)." 102 | ) 103 | sys.exit(1) 104 | 105 | nb_files = get_nb_names_from_list(cfg.path) 106 | read_error: list[Path] = [] 107 | if cfg.verbose: 108 | print(f"Checking {len(nb_files)} notebooks.") 109 | 110 | wrong_ec: list[Path] = [] 111 | nb_errors: list[Path] = [] 112 | nb_warnings: list[Path] = [] 113 | for nb_name in nb_files: 114 | nb = read_nb(nb_name) 115 | if nb is None: 116 | read_error.append(nb_name) 117 | continue 118 | 119 | if cfg.ec and not check_nb_ec(nb, not cfg.not_strict, cfg.no_exec): 120 | wrong_ec.append(nb_name) 121 | 122 | if cfg.err and not check_nb_errors(nb): 123 | nb_errors.append(nb_name) 124 | 125 | if cfg.warn and not check_nb_warnings(nb): 126 | nb_warnings.append(nb_name) 127 | 128 | print_results(wrong_ec, nb_errors, nb_warnings, read_error) 129 | 130 | if wrong_ec or nb_errors or nb_warnings or read_error: 131 | sys.exit(1) 132 | 133 | 134 | if __name__ == "__main__": # pragma: no cover 135 | app_check() 136 | -------------------------------------------------------------------------------- /src/nbmetaclean/app_clean.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import argparse 4 | import sys 5 | from pathlib import Path 6 | from typing import Union 7 | 8 | from nbmetaclean.clean import CleanConfig, TupleStr, clean_nb_file 9 | from nbmetaclean.helpers import get_nb_names_from_list 10 | from nbmetaclean.version import __version__ 11 | 12 | 13 | parser = argparse.ArgumentParser( 14 | prog="nbmetaclean", 15 | description="Clean metadata and execution_count from Jupyter notebooks.", 16 | ) 17 | parser.add_argument( 18 | "path", 19 | default=".", 20 | nargs="*", 21 | help="Path for nb or folder with notebooks.", 22 | ) 23 | parser.add_argument( 24 | "-s", 25 | "--silent", 26 | action="store_true", 27 | help="Silent mode.", 28 | ) 29 | parser.add_argument( 30 | "--not_ec", 31 | action="store_false", 32 | help="Do not clear execution_count.", 33 | ) 34 | parser.add_argument( 35 | "--not-pt", 36 | action="store_true", 37 | help="Do not preserve timestamp.", 38 | ) 39 | parser.add_argument( 40 | "--dont_clear_nb_metadata", 41 | action="store_true", 42 | help="Do not clear notebook metadata.", 43 | ) 44 | parser.add_argument( 45 | "--clear_cell_metadata", 46 | action="store_true", 47 | help="Clear cell metadata.", 48 | ) 49 | parser.add_argument( 50 | "--clear_outputs", 51 | action="store_true", 52 | help="Clear outputs.", 53 | ) 54 | parser.add_argument( 55 | "--nb_metadata_preserve_mask", 56 | nargs="+", 57 | help="Preserve mask for notebook metadata.", 58 | ) 59 | parser.add_argument( 60 | "--cell_metadata_preserve_mask", 61 | nargs="+", 62 | help="Preserve mask for cell metadata.", 63 | ) 64 | parser.add_argument( 65 | "--dont_merge_masks", 66 | action="store_true", 67 | help="Do not merge masks.", 68 | ) 69 | parser.add_argument( 70 | "--clean_hidden_nbs", 71 | action="store_true", 72 | help="Clean hidden notebooks.", 73 | ) 74 | parser.add_argument( 75 | "-D", 76 | "--dry_run", 77 | action="store_true", 78 | help="perform a trial run, don't write results", 79 | ) 80 | parser.add_argument( 81 | "-V", 82 | "--verbose", 83 | action="store_true", 84 | help="Verbose mode. Print extra information.", 85 | ) 86 | parser.add_argument( 87 | "-v", 88 | "--version", 89 | action="store_true", 90 | help="Print version information.", 91 | ) 92 | 93 | 94 | def process_mask(mask: Union[list[str], None]) -> Union[tuple[TupleStr, ...], None]: 95 | if mask is None: 96 | return None 97 | return tuple(tuple(item.split(".")) for item in mask) 98 | 99 | 100 | def print_result( 101 | cleaned: list[Path], 102 | errors: list[Path], 103 | clean_config: CleanConfig, 104 | path: list[str], 105 | num_nbs: int, 106 | ) -> None: 107 | if clean_config.verbose: 108 | print( 109 | f"Path: {', '.join(path)}, preserve timestamp: {clean_config.preserve_timestamp}" 110 | ) 111 | print(f"checked: {num_nbs} notebooks") 112 | if cleaned: 113 | if len(cleaned) == 1: 114 | print(f"cleaned: {cleaned[0]}") 115 | else: 116 | print(f"cleaned: {len(cleaned)} notebooks") 117 | for nb in cleaned: 118 | print("- ", nb) 119 | if errors: 120 | print(f"with errors: {len(errors)}") 121 | for nb in errors: 122 | print("- ", nb) 123 | 124 | 125 | def app_clean() -> None: 126 | """Clean metadata and execution_count from Jupyter notebook.""" 127 | cfg = parser.parse_args() 128 | 129 | if cfg.version: 130 | print(f"nbmetaclean version: {__version__}") 131 | sys.exit(0) 132 | 133 | clean_config = CleanConfig( 134 | clear_nb_metadata=not cfg.dont_clear_nb_metadata, 135 | clear_cell_metadata=cfg.clear_cell_metadata, 136 | clear_execution_count=cfg.not_ec, 137 | clear_outputs=cfg.clear_outputs, 138 | preserve_timestamp=not cfg.not_pt, 139 | silent=cfg.silent, 140 | nb_metadata_preserve_mask=process_mask(cfg.nb_metadata_preserve_mask), 141 | cell_metadata_preserve_mask=process_mask(cfg.cell_metadata_preserve_mask), 142 | mask_merge=not cfg.dont_merge_masks, 143 | dry_run=cfg.dry_run, 144 | verbose=cfg.verbose if not cfg.silent else False, 145 | ) 146 | path_list: list[str] = cfg.path if isinstance(cfg.path, list) else [cfg.path] 147 | nb_files = get_nb_names_from_list(path_list, hidden=cfg.clean_hidden_nbs) 148 | 149 | cleaned, errors = clean_nb_file( 150 | nb_files, 151 | clean_config, 152 | ) 153 | # print(cfg) 154 | if cfg.path == ".": # if running without arguments add some info. 155 | if not nb_files: 156 | print("No notebooks found at current directory.") 157 | sys.exit(0) 158 | elif not cfg.silent and not cleaned and not errors: 159 | print(f"Checked: {len(nb_files)} notebooks. All notebooks are clean.") 160 | 161 | if not cfg.silent: 162 | print_result(cleaned, errors, clean_config, path_list, len(nb_files)) 163 | 164 | 165 | if __name__ == "__main__": # pragma: no cover 166 | app_clean() 167 | -------------------------------------------------------------------------------- /src/nbmetaclean/check.py: -------------------------------------------------------------------------------- 1 | from typing import cast 2 | from .nb_types import CodeCell, Nb 3 | 4 | 5 | __all__ = ["check_nb_ec", "check_nb_errors"] 6 | 7 | 8 | def check_nb_ec(nb: Nb, strict: bool = True, no_exec: bool = False) -> bool: 9 | """Check nb for correct sequence of execution_count. 10 | Expecting all code cells executed one after another. 11 | If `strict` is False, check that next cell executed after previous one, number can be more than `+1` 12 | If `no_exec` is True, ignore notebooks with all code cells without execution_count. 13 | 14 | Args: 15 | nb (Nb): Notebook to check. 16 | strict (bool, optional): Strict mode. Defaults to True. 17 | no_exec (bool): Ignore notebooks with all code cells without execution_count. 18 | 19 | Returns: 20 | bool: True if correct. 21 | """ 22 | 23 | current = 0 24 | no_exec_cells = 0 25 | for cell in nb["cells"]: 26 | if cell["cell_type"] == "code": 27 | cell = cast(CodeCell, cell) 28 | if not cell["source"]: 29 | if cell[ 30 | "execution_count" 31 | ]: # if cell without code but with execution_count 32 | return False 33 | continue 34 | 35 | if not cell["execution_count"]: 36 | if not no_exec: 37 | return False 38 | else: 39 | no_exec_cells += 1 40 | else: 41 | if cell["execution_count"] != current + 1 and strict: 42 | return False 43 | if cell["execution_count"] <= current: 44 | return False 45 | current = cell["execution_count"] 46 | if no_exec_cells and current: # if we got not executed cells and executed. 47 | return False 48 | return True 49 | 50 | 51 | def check_nb_errors(nb: Nb) -> bool: 52 | """Check nb for cells with errors. 53 | 54 | Args: 55 | nb (Nb): Notebook to check. 56 | 57 | Returns: 58 | bool: True if no errors. 59 | """ 60 | for cell in nb["cells"]: 61 | if cell["cell_type"] == "code" and "outputs" in cell: 62 | cell = cast(CodeCell, cell) 63 | for output in cell["outputs"]: 64 | if output["output_type"] == "error": 65 | return False 66 | return True 67 | 68 | 69 | def check_nb_warnings(nb: Nb) -> bool: 70 | """Check nb for cells with warnings. 71 | 72 | Args: 73 | nb (Nb): Notebook to check. 74 | 75 | Returns: 76 | bool: True if no warnings. 77 | """ 78 | for cell in nb["cells"]: 79 | if cell["cell_type"] == "code" and "outputs" in cell: 80 | cell = cast(CodeCell, cell) 81 | for output in cell["outputs"]: 82 | if output["output_type"] == "stream" and output["name"] == "stderr": 83 | return False 84 | return True 85 | -------------------------------------------------------------------------------- /src/nbmetaclean/clean.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import copy 4 | from dataclasses import dataclass 5 | from pathlib import Path 6 | from typing import Optional, Tuple, Union 7 | 8 | from .helpers import read_nb, write_nb 9 | 10 | from .nb_types import Cell, CodeCell, Metadata, Nb, Output 11 | 12 | 13 | __all__ = [ 14 | "CleanConfig", 15 | "clean_cell", 16 | "clean_nb", 17 | "clean_nb_file", 18 | "clean_outputs", 19 | "filter_metadata", 20 | "filter_meta_mask", 21 | "NB_METADATA_PRESERVE_MASKS", 22 | "TupleStr", 23 | ] 24 | 25 | TupleStr = Tuple[str, ...] 26 | 27 | NB_METADATA_PRESERVE_MASKS = ( 28 | ("language_info", "name"), 29 | ("authors",), 30 | ) 31 | 32 | 33 | @dataclass 34 | class CleanConfig: 35 | """Clean config. 36 | 37 | Args: 38 | clear_nb_metadata (bool, optional): Clear notebook metadata. Defaults to True. 39 | clear_cell_metadata (bool, optional): Clear cell metadata. Defaults to False. 40 | clear_execution_count (bool, optional): Clear cell execution count. Defaults to True. 41 | clear_outputs (bool, optional): Clear cell outputs. Defaults to False. 42 | preserve_timestamp (bool, optional): Preserve timestamp. Defaults to True. 43 | silent (bool, optional): Silent mode. Defaults to False. 44 | nb_metadata_preserve_mask (Optional[tuple[str, ...]], optional): 45 | Preserve mask for notebook metadata. Defaults to None. 46 | cell_metadata_preserve_mask (Optional[tuple[str, ...]], optional): 47 | Preserve mask for cell metadata. Defaults to None. 48 | mask_merge (bool): Merge masks. Add new mask to default. 49 | If False - use new mask. Defaults to True. 50 | dry_run (bool): perform a trial run, don't write results. Defaults to False. 51 | verbose (bool): Verbose mode. Print extra information. Defaults to False. 52 | """ 53 | 54 | clear_nb_metadata: bool = True 55 | clear_cell_metadata: bool = False 56 | clear_execution_count: bool = True 57 | clear_outputs: bool = False 58 | preserve_timestamp: bool = True 59 | silent: bool = False 60 | nb_metadata_preserve_mask: Optional[tuple[TupleStr, ...]] = None 61 | cell_metadata_preserve_mask: Optional[tuple[TupleStr, ...]] = None 62 | mask_merge: bool = True 63 | dry_run: bool = False 64 | verbose: bool = False 65 | 66 | 67 | def filter_meta_mask( 68 | nb_meta: Union[str, int, Metadata], 69 | mask: Optional[tuple[str, ...]] = None, 70 | ) -> Union[str, int, Metadata]: 71 | """Filter metadata by mask. If no mask return empty dict.""" 72 | if isinstance(nb_meta, (str, int)) or mask == (): 73 | return nb_meta 74 | if mask is None: 75 | return {} 76 | new_meta = {} 77 | value = nb_meta.get(mask[0]) 78 | if value is not None: 79 | new_mask = tuple(mask[1:]) 80 | new_meta[mask[0]] = filter_meta_mask(value, new_mask) or value 81 | return new_meta 82 | 83 | 84 | def filter_metadata( 85 | nb_meta: Metadata, 86 | masks: Optional[tuple[TupleStr, ...]] = None, 87 | ) -> Metadata: 88 | """Clean notebooknode metadata.""" 89 | if masks is None: 90 | return {} 91 | filtered_meta: Metadata = {} 92 | for mask in masks: 93 | filtered_meta.update(filter_meta_mask(nb_meta, mask)) # type: ignore 94 | return filtered_meta 95 | 96 | 97 | def clean_cell( 98 | cell: Cell | CodeCell, 99 | cfg: CleanConfig, 100 | ) -> bool: 101 | """Clean cell: optionally metadata, execution_count and outputs.""" 102 | changed = False 103 | 104 | if cfg.clear_cell_metadata: 105 | if cell.get("metadata", None): 106 | metadata = cell["metadata"] 107 | old_metadata = copy.deepcopy(metadata) 108 | cell["metadata"] = filter_metadata( 109 | metadata, cfg.cell_metadata_preserve_mask 110 | ) 111 | if cell["metadata"] != old_metadata: 112 | changed = True 113 | 114 | if cell["cell_type"] == "code": 115 | if cfg.clear_execution_count and cell.get("execution_count"): 116 | cell["execution_count"] = None # type: ignore # it's code cell 117 | changed = True 118 | 119 | if cell.get("outputs"): 120 | if cfg.clear_outputs: 121 | cell["outputs"] = [] # type: ignore # it's code cell 122 | changed = True 123 | elif cfg.clear_cell_metadata or cfg.clear_execution_count: 124 | result = clean_outputs(cell["outputs"], cfg) # type: ignore # it's code cell 125 | if result: 126 | changed = True 127 | 128 | return changed 129 | 130 | 131 | def clean_outputs(outputs: list[Output], cfg: CleanConfig) -> bool: 132 | """Clean outputs.""" 133 | changed = False 134 | for output in outputs: 135 | if cfg.clear_execution_count and output.get("execution_count", None): 136 | output["execution_count"] = None 137 | changed = True 138 | if cfg.clear_cell_metadata and (metadata := output.get("metadata", None)): 139 | old_metadata = copy.deepcopy(metadata) 140 | output["metadata"] = filter_metadata( 141 | metadata, cfg.cell_metadata_preserve_mask 142 | ) 143 | if output["metadata"] != old_metadata: 144 | changed = True 145 | return changed 146 | 147 | 148 | def clean_nb( 149 | nb: Nb, 150 | cfg: CleanConfig, 151 | ) -> bool: 152 | """Clean notebook - metadata, execution_count, outputs. 153 | 154 | Args: 155 | nb (Notebook): Notebook to clean. 156 | clear_execution_count (bool, optional): Clear execution_count. Defaults to True. 157 | clear_outputs (bool, optional): Clear outputs. Defaults to False. 158 | 159 | Returns: 160 | bool: True if changed. 161 | """ 162 | changed = False 163 | if cfg.clear_nb_metadata and (metadata := nb.get("metadata")): 164 | old_metadata = copy.deepcopy(metadata) 165 | if cfg.nb_metadata_preserve_mask: 166 | if not cfg.mask_merge: 167 | masks = cfg.nb_metadata_preserve_mask 168 | else: 169 | masks = cfg.nb_metadata_preserve_mask + NB_METADATA_PRESERVE_MASKS 170 | else: 171 | masks = NB_METADATA_PRESERVE_MASKS 172 | nb["metadata"] = filter_metadata(metadata, masks=masks) 173 | if nb["metadata"] != old_metadata: 174 | changed = True 175 | if cfg.clear_cell_metadata or cfg.clear_execution_count or cfg.clear_outputs: 176 | for cell in nb["cells"]: 177 | result = clean_cell( 178 | cell, 179 | cfg, 180 | ) 181 | if result: 182 | changed = True 183 | 184 | return changed 185 | 186 | 187 | def clean_nb_file( 188 | path: Union[Path, list[Path]], 189 | cfg: Optional[CleanConfig] = None, 190 | ) -> tuple[list[Path], list[Path]]: 191 | """Clean metadata and execution count from notebook. 192 | 193 | Args: 194 | path (Union[str, PosixPath]): Notebook filename or list of names. 195 | cfg (CleanConfig, optional): Config for job, if None, used default settings. Default is None. 196 | 197 | Returns: 198 | tuple[List[Path], List[TuplePath]]: List of cleaned notebooks, list of notebooks with errors. 199 | """ 200 | cfg = cfg or CleanConfig() 201 | if not isinstance(path, list): 202 | path = [path] 203 | cleaned: list[Path] = [] 204 | errors: list[Path] = [] 205 | for filename in path: 206 | nb = read_nb(filename) 207 | if nb is None: 208 | errors.append(filename) 209 | continue 210 | result = clean_nb( 211 | nb, 212 | cfg, 213 | ) 214 | if result: 215 | cleaned.append(filename) 216 | if cfg.dry_run: 217 | continue 218 | if cfg.preserve_timestamp: 219 | stat = filename.stat() 220 | timestamp = (stat.st_atime, stat.st_mtime) 221 | else: 222 | timestamp = None 223 | write_nb(nb, filename, timestamp) 224 | 225 | return cleaned, errors 226 | -------------------------------------------------------------------------------- /src/nbmetaclean/helpers.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | import os 5 | from pathlib import Path 6 | from typing import Optional 7 | 8 | from .nb_types import Nb, PathOrStr 9 | 10 | __all__ = [ 11 | "get_nb_names", 12 | "get_nb_names_from_list", 13 | "is_notebook", 14 | "read_nb", 15 | "write_nb", 16 | ] 17 | 18 | 19 | def read_nb(path: PathOrStr) -> Nb | None: 20 | """Read notebook from filename. 21 | If file does not exist or is not a valid notebook, return None. 22 | Args: 23 | path (Union[str, PosixPath): Notebook filename. 24 | 25 | Returns: 26 | Notebook Union[None, Notebook]: Jupyter Notebook as dict or None if not valid or does not exist. 27 | """ 28 | nb_path = Path(path) 29 | if not nb_path.exists() or not nb_path.is_file(): 30 | return None 31 | try: 32 | nb = json.load(open(nb_path, "r", encoding="utf-8")) 33 | return nb 34 | except Exception: 35 | return None 36 | 37 | 38 | def write_nb( 39 | nb: Nb, 40 | path: PathOrStr, 41 | timestamp: Optional[tuple[float, float]] = None, 42 | ) -> Path: 43 | """Write notebook to file, optionally set timestamp. 44 | 45 | Args: 46 | nb (Notebook): Notebook to write 47 | path (Union[str, PosixPath]): filename to write 48 | timestamp (Optional[tuple[float, float]]): timestamp to set, (st_atime, st_mtime) defaults to None 49 | Returns: 50 | Path: Filename of written notebook. 51 | """ 52 | filename = Path(path) 53 | if filename.suffix != ".ipynb": 54 | filename = filename.with_suffix(".ipynb") 55 | with filename.open("w", encoding="utf-8") as fh: 56 | fh.write( 57 | json.dumps( 58 | nb, 59 | indent=1, 60 | separators=(",", ": "), 61 | ensure_ascii=False, 62 | sort_keys=True, 63 | ) 64 | + "\n", 65 | ) 66 | if timestamp is not None: 67 | os.utime(filename, timestamp) 68 | return filename 69 | 70 | 71 | def is_notebook(path: Path, hidden: bool = False) -> bool: 72 | """Check if `path` is a notebook and not hidden. If `hidden` is True check also hidden files. 73 | 74 | Args: 75 | path (Union[Path, str]): Path to check. 76 | hidden bool: If True also check hidden files, defaults to False. 77 | 78 | Returns: 79 | bool: True if `path` is a notebook and not hidden. 80 | """ 81 | if path.suffix == ".ipynb": 82 | if path.name.startswith(".") and not hidden: 83 | return False 84 | return True 85 | return False 86 | 87 | 88 | def get_nb_names( 89 | path: Optional[PathOrStr] = None, 90 | recursive: bool = True, 91 | hidden: bool = False, 92 | ) -> list[Path]: 93 | """Return list of notebooks from `path`. If no `path` return notebooks from current folder. 94 | 95 | Args: 96 | path (Union[Path, str, None]): Path for nb or folder with notebooks. 97 | recursive bool: Recursive search. 98 | hidden bool: Skip or not hidden paths, defaults to False. 99 | 100 | Raises: 101 | sys.exit: If filename or dir not exists or not nb file. 102 | 103 | Returns: 104 | List[Path]: List of notebooks names. 105 | """ 106 | nb_path = Path(path or ".") 107 | 108 | if not nb_path.exists(): 109 | raise FileNotFoundError(f"{nb_path} not exists!") 110 | 111 | if nb_path.is_file(): 112 | if is_notebook(nb_path, hidden): 113 | return [nb_path] 114 | 115 | if nb_path.is_dir(): 116 | result = [] 117 | for item in nb_path.iterdir(): 118 | if item.is_file() and is_notebook(item, hidden): 119 | result.append(item) 120 | if item.is_dir() and recursive: 121 | if item.name.startswith(".") and not hidden: 122 | continue 123 | if "checkpoint" in item.name: 124 | continue 125 | result.extend(get_nb_names(item, recursive, hidden)) 126 | 127 | return result 128 | 129 | return [] 130 | 131 | 132 | def get_nb_names_from_list( 133 | path_list: list[PathOrStr] | PathOrStr, 134 | recursive: bool = True, 135 | hidden: bool = False, 136 | ) -> list[Path]: 137 | """Return list of notebooks from `path_list`. 138 | 139 | Args: 140 | path_list (Union[Path, str, None]): Path for nb or folder with notebooks. 141 | recursive (bool): Recursive search. 142 | hidden (bool): Skip or not hidden paths, defaults to False. 143 | 144 | Returns: 145 | List[Path]: List of notebooks names. 146 | """ 147 | path_list = [path_list] if isinstance(path_list, (str, Path)) else path_list 148 | nb_files: list[Path] = [] 149 | for path in path_list: 150 | if Path(path).exists(): 151 | nb_files.extend(get_nb_names(path, recursive, hidden)) 152 | else: 153 | print(f"{path} not exists!") 154 | 155 | return nb_files 156 | -------------------------------------------------------------------------------- /src/nbmetaclean/nb_types.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path, PosixPath 2 | from typing import Dict, List, Literal, Optional, TypedDict, TypeVar, Union 3 | 4 | 5 | __all__ = [ 6 | "Cell", 7 | "CodeCell", 8 | "DisplayData", 9 | "Error", 10 | "ExecuteResult", 11 | "Metadata", 12 | "MultilineText", 13 | "Nb", 14 | "NbMetadata", 15 | "NbNode", 16 | "PathOrStr", 17 | "Output", 18 | "Stream", 19 | ] 20 | 21 | PathOrStr = TypeVar("PathOrStr", Path, PosixPath, str) 22 | 23 | NbNode = Dict[str, Union[str, int, "NbNode"]] 24 | Metadata = Dict[str, Union[str, int, "Metadata"]] 25 | MultilineText = Union[str, List[str]] 26 | 27 | 28 | class NbMetadata(TypedDict): 29 | language_info: Metadata 30 | kernelspec: Metadata 31 | authors: Metadata 32 | 33 | 34 | class Output(TypedDict): 35 | output_type: Literal[ 36 | "execute_result", 37 | "display_data", 38 | "stream", 39 | "error", 40 | ] 41 | execution_count: Optional[int] 42 | metadata: Metadata 43 | 44 | 45 | class ExecuteResult(Output): # output_type = "execute_result" 46 | data: Dict[str, MultilineText] 47 | 48 | 49 | class DisplayData(Output): # output_type = "display_data" 50 | data: Dict[str, MultilineText] # fix it - mimebundle 51 | 52 | 53 | class Stream(Output): # output_type = "stream" 54 | name: Literal["stdout", "stderr"] # "The name of the stream (stdout, stderr)." 55 | text: MultilineText 56 | 57 | 58 | class Error(Output): # output_type = "error" 59 | ename: str # "The name of the error." 60 | evalue: str # "The value, or message, of the error." 61 | traceback: List[str] 62 | 63 | 64 | class Cell(TypedDict): 65 | """Notebook cell base.""" 66 | 67 | id: int # from nbformat 4.5 68 | cell_type: Literal["code", "markdown", "raw"] 69 | metadata: Metadata 70 | source: MultilineText 71 | attachments: Optional[Dict[str, MultilineText]] 72 | 73 | 74 | class CodeCell(Cell): # cell_type = "code" 75 | """Code cell.""" 76 | 77 | outputs: List[Output] 78 | execution_count: Optional[int] 79 | 80 | 81 | class Nb(TypedDict): 82 | """Notebook.""" 83 | 84 | nbformat: int 85 | nbformat_minor: int 86 | cells: List[Cell] 87 | metadata: Metadata 88 | -------------------------------------------------------------------------------- /src/nbmetaclean/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.4" # pragma: no cover 2 | 3 | __all__ = ["__version__"] # pragma: no cover 4 | -------------------------------------------------------------------------------- /tests/test_app_check.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pathlib import Path 4 | import subprocess 5 | 6 | import pytest 7 | 8 | from nbmetaclean.helpers import read_nb, write_nb 9 | from nbmetaclean.version import __version__ 10 | 11 | 12 | def run_app( 13 | nb_path: Path, 14 | args: list[str] = [], 15 | ) -> tuple[str, str]: 16 | """run app""" 17 | run_result = subprocess.run( 18 | ["python", "-m", "nbmetaclean.app_check", str(nb_path), *args], 19 | capture_output=True, 20 | check=False, 21 | ) 22 | return run_result.stdout.decode("utf-8"), run_result.stderr.decode("utf-8") 23 | 24 | 25 | example_nbs_path = Path("tests/test_nbs") 26 | nb_name = "test_nb_3_ec.ipynb" 27 | 28 | 29 | def test_run_script(): 30 | """test run script""" 31 | app_path = Path("src/nbmetaclean/app_check.py") 32 | run_result = subprocess.run( 33 | ["python", app_path, "-h"], capture_output=True, check=False 34 | ) 35 | assert run_result.returncode == 0 36 | res_out = run_result.stdout.decode("utf-8") 37 | assert res_out.startswith( 38 | "usage: nbcheck [-h] [--ec] [--err] [--warn] [--not_strict] [--no_exec]" 39 | ) 40 | res_err = run_result.stderr.decode("utf-8") 41 | assert not res_err 42 | 43 | 44 | def test_check_nb_ec(tmp_path: Path): 45 | """test check `--ec`""" 46 | # base notebook - no execution_count 47 | 48 | test_nb = read_nb(example_nbs_path / nb_name) 49 | test_nb_path = tmp_path / nb_name 50 | write_nb(test_nb, test_nb_path) 51 | 52 | # check if no args 53 | res_out, res_err = run_app(test_nb_path, []) 54 | assert res_out.startswith( 55 | "No checks are selected. Please select at least one check: " 56 | "--ec (for execution_count) or --err (for errors in outputs) or " 57 | "--warn (for warnings in outputs)." 58 | ) 59 | assert not res_err 60 | 61 | # default execution_count 62 | res_out, res_err = run_app(test_nb_path, ["--ec"]) 63 | assert res_out.startswith("1 notebooks with wrong execution_count:\n") 64 | assert res_out.endswith("test_nb_3_ec.ipynb\n") 65 | assert not res_err 66 | 67 | # `-V` option 68 | res_out, res_err = run_app(test_nb_path, ["--ec", "-V"]) 69 | assert res_out.startswith("Checking 1 notebooks.\n") 70 | assert not res_err 71 | 72 | # check with `no_exec` option 73 | res_out, res_err = run_app(test_nb_path, ["--ec", "--no_exec"]) 74 | assert not res_out 75 | assert not res_err 76 | 77 | # set correct execution_count 78 | test_nb["cells"][2]["execution_count"] = 1 79 | test_nb["cells"][3]["execution_count"] = 2 80 | test_nb["cells"][5]["execution_count"] = 3 81 | write_nb(test_nb, test_nb_path) 82 | 83 | res_out, res_err = run_app(test_nb_path, ["--ec"]) 84 | assert not res_out 85 | assert not res_err 86 | 87 | # test strict 88 | test_nb["cells"][5]["execution_count"] = 4 89 | write_nb(test_nb, test_nb_path) 90 | res_out, res_err = run_app(test_nb_path, ["--ec"]) 91 | assert res_out.startswith("1 notebooks with wrong execution_count:\n") 92 | assert res_out.endswith("test_nb_3_ec.ipynb\n") 93 | assert not res_err 94 | 95 | res_out, res_err = run_app(test_nb_path, ["--ec", "--not_strict"]) 96 | assert not res_out 97 | assert not res_err 98 | 99 | # empty source, but with execution_count 100 | test_nb["cells"][5]["execution_count"] = 3 101 | test_nb["cells"][6]["execution_count"] = 4 102 | write_nb(test_nb, test_nb_path) 103 | 104 | res_out, res_err = run_app(test_nb_path, ["--ec"]) 105 | assert res_out.startswith("1 notebooks with wrong execution_count:\n") 106 | assert res_out.endswith("test_nb_3_ec.ipynb\n") 107 | assert not res_err 108 | res_out, res_err = run_app(test_nb_path, ["--ec", "--not_strict"]) 109 | assert res_out.startswith("1 notebooks with wrong execution_count:\n") 110 | assert res_out.endswith("test_nb_3_ec.ipynb\n") 111 | assert not res_err 112 | 113 | # start not from 1 114 | test_nb = read_nb(example_nbs_path / nb_name) 115 | test_nb["cells"][2]["execution_count"] = 2 116 | test_nb["cells"][3]["execution_count"] = 3 117 | test_nb["cells"][5]["execution_count"] = 4 118 | write_nb(test_nb, test_nb_path) 119 | 120 | res_out, res_err = run_app(test_nb_path, ["--ec"]) 121 | assert res_out.startswith("1 notebooks with wrong execution_count:\n") 122 | assert res_out.endswith("test_nb_3_ec.ipynb\n") 123 | assert not res_err 124 | res_out, res_err = run_app(test_nb_path, ["--ec", "--not_strict"]) 125 | assert not res_out 126 | assert not res_err 127 | 128 | # next is less 129 | test_nb["cells"][3]["execution_count"] = 5 130 | write_nb(test_nb, test_nb_path) 131 | res_out, res_err = run_app(test_nb_path, ["--ec"]) 132 | assert res_out.startswith("1 notebooks with wrong execution_count:\n") 133 | assert res_out.endswith("test_nb_3_ec.ipynb\n") 134 | assert not res_err 135 | 136 | # code cell without execution_count 137 | test_nb = read_nb("tests/test_nbs/test_nb_3_ec.ipynb") 138 | test_nb["cells"][2]["execution_count"] = 1 139 | write_nb(test_nb, test_nb_path) 140 | 141 | res_out, res_err = run_app(test_nb_path, ["--ec"]) 142 | assert res_out.startswith("1 notebooks with wrong execution_count:\n") 143 | assert res_out.endswith("test_nb_3_ec.ipynb\n") 144 | assert not res_err 145 | 146 | # check with `no_exec` option should be False 147 | res_out, res_err = run_app(test_nb_path, ["--ec", "--no_exec"]) 148 | assert res_out.startswith("1 notebooks with wrong execution_count:\n") 149 | assert res_out.endswith("test_nb_3_ec.ipynb\n") 150 | assert not res_err 151 | 152 | 153 | def test_check_nb_errors(tmp_path: Path): 154 | """test check `--err` option.""" 155 | nb_name = "test_nb_3_ec.ipynb" 156 | test_nb = read_nb(example_nbs_path / nb_name) 157 | assert test_nb is not None 158 | 159 | test_nb_path = tmp_path / nb_name 160 | write_nb(test_nb, test_nb_path) 161 | res_out, res_err = run_app(test_nb_path, ["--err"]) 162 | assert not res_out 163 | assert not res_err 164 | 165 | test_nb["cells"][2]["outputs"][0]["output_type"] = "error" 166 | write_nb(test_nb, test_nb_path) 167 | res_out, res_err = run_app(test_nb_path, ["--err"]) 168 | assert res_out.startswith("1 notebooks with errors in outputs:\n") 169 | assert res_out.endswith("test_nb_3_ec.ipynb\n") 170 | assert not res_err 171 | 172 | 173 | def test_check_nb_warnings(tmp_path): 174 | """test check `--warn` option.""" 175 | test_nb = read_nb(example_nbs_path / nb_name) 176 | test_nb_path = tmp_path / nb_name 177 | write_nb(test_nb, test_nb_path) 178 | res_out, res_err = run_app(test_nb_path, ["--warn"]) 179 | assert not res_out 180 | assert not res_err 181 | 182 | # if error, result is OK 183 | test_nb["cells"][2]["outputs"][0]["output_type"] = "error" 184 | write_nb(test_nb, test_nb_path) 185 | res_out, res_err = run_app(test_nb_path, ["--warn"]) 186 | assert not res_out 187 | assert not res_err 188 | 189 | test_nb["cells"][2]["outputs"][0]["output_type"] = "stream" 190 | test_nb["cells"][2]["outputs"][0]["name"] = "stderr" 191 | write_nb(test_nb, test_nb_path) 192 | res_out, res_err = run_app(test_nb_path, ["--warn"]) 193 | assert res_out.startswith("1 notebooks with warnings in outputs:\n") 194 | assert res_out.endswith("test_nb_3_ec.ipynb\n") 195 | assert not res_err 196 | 197 | 198 | def test_check_app_version(): 199 | """test check `--version` option.""" 200 | res_out, res_err = run_app("--version") 201 | assert res_out == f"nbcheck from nbmetaclean, version: {__version__}\n" 202 | assert not res_err 203 | 204 | res_out, res_err = run_app("-v") 205 | assert res_out == f"nbcheck from nbmetaclean, version: {__version__}\n" 206 | assert not res_err 207 | 208 | 209 | @pytest.mark.parametrize("arg", ["--ec", "--err", "--warn"]) 210 | def test_check_app_read_error(tmp_path: Path, arg: str): 211 | """test check_app with wrong nb file.""" 212 | test_nb_path = tmp_path / "test_nb.ipynb" 213 | with open(test_nb_path, "w") as fh: 214 | fh.write("") 215 | 216 | res_out, res_err = run_app(test_nb_path, [arg]) 217 | assert res_out.startswith("1 notebooks with read error:\n") 218 | assert res_out.endswith("test_nb.ipynb\n") 219 | assert not res_err 220 | -------------------------------------------------------------------------------- /tests/test_app_clean.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pathlib import Path 4 | 5 | import subprocess 6 | 7 | from nbmetaclean.helpers import read_nb, write_nb 8 | 9 | 10 | def run_app( 11 | nb_path: Path | list[Path] | None = None, 12 | args: list[str] | None = None, 13 | cwd: Path | None = None, 14 | ) -> tuple[str, str]: 15 | """run app""" 16 | args = args or [] 17 | if isinstance(nb_path, Path): 18 | args.insert(0, str(nb_path)) 19 | elif isinstance(nb_path, list): 20 | args = [str(nb) for nb in nb_path] + args 21 | 22 | run_result = subprocess.run( 23 | ["python", "-m", "nbmetaclean.app_clean", *args], 24 | capture_output=True, 25 | check=False, 26 | cwd=cwd, 27 | ) 28 | return run_result.stdout.decode("utf-8"), run_result.stderr.decode("utf-8") 29 | 30 | 31 | example_nbs_path = Path("tests/test_nbs") 32 | 33 | # this test conflict with coverage - need to be fixed 34 | # def test_app_clean_no_args(tmp_path: Path) -> None: 35 | # """test app_clean with no args""" 36 | # res_out, res_err = run_app(cwd=tmp_path) 37 | # assert res_out == "No notebooks found at current directory.\n" 38 | # assert not res_err 39 | 40 | # # prepare test clean notebook 41 | # nb_name_clean = "test_nb_2_clean.ipynb" 42 | # test_nb = read_nb(example_nbs_path / nb_name_clean) 43 | # test_nb_path = tmp_path / nb_name_clean 44 | # write_nb(test_nb, test_nb_path) 45 | 46 | # res_out, res_err = run_app(cwd=tmp_path) 47 | # assert res_out == "Checked: 1 notebooks. All notebooks are clean.\n" 48 | # assert not res_err 49 | 50 | # # add metadata 51 | # test_nb["metadata"]["some key"] = "some value" 52 | # write_nb(test_nb, test_nb_path) 53 | 54 | # res_out, res_err = run_app(cwd=tmp_path) 55 | # assert res_out == "cleaned: test_nb_2_clean.ipynb\n" 56 | # assert not res_err 57 | 58 | 59 | def test_clean_nb_metadata(tmp_path: Path) -> None: 60 | """test clean_nb_metadata""" 61 | nb_name_clean = "test_nb_2_clean.ipynb" 62 | test_nb = read_nb(example_nbs_path / nb_name_clean) 63 | test_nb_path = tmp_path / nb_name_clean 64 | write_nb(test_nb, test_nb_path) 65 | 66 | # default run no args, clean notebooks 67 | res_out, res_err = run_app(test_nb_path, []) 68 | assert not res_out 69 | assert not res_err 70 | 71 | # add metadata, new filter, mask not merged 72 | test_nb["metadata"]["some key"] = "some value" 73 | write_nb(test_nb, test_nb_path) 74 | 75 | # check with preserve mask, expect no changes 76 | res_out, res_err = run_app( 77 | test_nb_path, ["--nb_metadata_preserve_mask", "some key"] 78 | ) 79 | assert not res_out 80 | assert not res_err 81 | res_nb = read_nb(test_nb_path) 82 | assert res_nb["metadata"]["some key"] == "some value" 83 | 84 | # check without preserve mask, dry run 85 | res_out, res_err = run_app(test_nb_path, ["-D"]) 86 | assert res_out 87 | assert not res_err 88 | res_nb = read_nb(test_nb_path) 89 | assert res_nb["metadata"]["some key"] == "some value" 90 | 91 | # check without preserve mask, expect changes 92 | res_out, res_err = run_app(test_nb_path, []) 93 | assert res_out 94 | assert not res_err 95 | res_nb = read_nb(test_nb_path) 96 | nb_metadata = res_nb.get("metadata") 97 | assert nb_metadata 98 | assert not nb_metadata.get("some key") 99 | 100 | # verbose flag. 101 | # nb now cleaned 102 | res_out, res_err = run_app(test_nb_path, ["-V"]) 103 | assert res_out.startswith("Path: ") 104 | assert res_out.endswith( 105 | "test_nb_2_clean.ipynb, preserve timestamp: True\nchecked: 1 notebooks\n" 106 | ) 107 | assert not res_err 108 | 109 | # rewrite notebook 110 | write_nb(test_nb, test_nb_path) 111 | res_out, res_err = run_app(test_nb_path, ["-V"]) 112 | assert res_out.startswith("Path: ") 113 | assert "cleaned:" in res_out 114 | assert res_out.endswith("test_nb_2_clean.ipynb\n") 115 | assert not res_err 116 | 117 | 118 | def test_clean_nb_ec_output(tmp_path: Path): 119 | """test execution count and output""" 120 | nb_name_clean = "test_nb_2_clean.ipynb" 121 | test_nb = read_nb(example_nbs_path / nb_name_clean) 122 | test_nb_path = tmp_path / nb_name_clean 123 | 124 | test_nb["cells"][1]["execution_count"] = 1 125 | test_nb["cells"][1]["outputs"][0]["execution_count"] = 1 126 | write_nb(test_nb, test_nb_path) 127 | 128 | # default settings 129 | res_out, res_err = run_app(test_nb_path, []) 130 | assert res_out.startswith("cleaned:") 131 | assert res_out.endswith("test_nb_2_clean.ipynb\n") 132 | assert not res_err 133 | nb = read_nb(test_nb_path) 134 | assert nb["cells"][1]["execution_count"] is None 135 | assert nb["cells"][1]["outputs"][0]["data"] == {"text/plain": ["2"]} 136 | assert nb["cells"][1]["outputs"][0]["execution_count"] is None 137 | 138 | # dry run 139 | write_nb(test_nb, test_nb_path) 140 | res_out, res_err = run_app(test_nb_path, ["-D"]) 141 | assert res_out.startswith("cleaned:") 142 | assert res_out.endswith("test_nb_2_clean.ipynb\n") 143 | assert not res_err 144 | nb = read_nb(test_nb_path) 145 | assert nb["cells"][1]["execution_count"] == 1 146 | assert nb["cells"][1]["outputs"][0]["execution_count"] == 1 147 | # dry, verbose 148 | res_out, res_err = run_app(test_nb_path, ["-DV"]) 149 | assert res_out.startswith("Path: ") 150 | assert nb_name_clean in res_out 151 | assert res_out.endswith("test_nb_2_clean.ipynb\n") 152 | assert not res_err 153 | 154 | # silent 155 | write_nb(test_nb, test_nb_path) 156 | res_out, res_err = run_app(test_nb_path, ["-s"]) 157 | assert not res_out 158 | assert not res_err 159 | nb = read_nb(test_nb_path) 160 | assert nb["cells"][1]["execution_count"] is None 161 | assert nb["cells"][1]["outputs"][0]["execution_count"] is None 162 | 163 | # clean output 164 | write_nb(test_nb, test_nb_path) 165 | res_out, res_err = run_app(test_nb_path, ["--clear_outputs"]) 166 | assert res_out.startswith("cleaned:") 167 | assert res_out.endswith("test_nb_2_clean.ipynb\n") 168 | assert not res_err 169 | nb = read_nb(test_nb_path) 170 | assert nb["cells"][1]["execution_count"] is None 171 | assert nb["cells"][1]["outputs"] == [] 172 | 173 | # path as arg 174 | write_nb(test_nb, test_nb_path) 175 | res_out, res_err = run_app(test_nb_path, []) 176 | assert res_out.startswith("cleaned:") 177 | assert res_out.endswith("test_nb_2_clean.ipynb\n") 178 | assert not res_err 179 | nb = read_nb(test_nb_path) 180 | assert nb["metadata"]["authors"][0]["name"] == "Andrei Yasyrev" 181 | assert nb["cells"][1]["execution_count"] is None 182 | assert nb["cells"][1]["outputs"][0]["execution_count"] is None 183 | 184 | # two nbs 185 | write_nb(test_nb, test_nb_path) 186 | # add second notebook 187 | nb_name_clean_2 = "test_nb_3_ec.ipynb" 188 | test_nb_2 = read_nb(example_nbs_path / nb_name_clean_2) 189 | test_nb_2["metadata"]["some key"] = "some value" 190 | write_nb(test_nb_2, tmp_path / nb_name_clean_2) 191 | 192 | res_out, res_err = run_app(tmp_path, []) 193 | assert res_out.startswith("cleaned: 2 notebooks\n") 194 | assert nb_name_clean in res_out 195 | assert nb_name_clean_2 in res_out 196 | assert not res_err 197 | 198 | 199 | def test_clean_nb_wrong_file(tmp_path: Path): 200 | """test app_clean with wrong file""" 201 | nb_name = tmp_path / "wrong.ipynb" 202 | with nb_name.open("w", encoding="utf-8") as fh: 203 | fh.write("some text") 204 | 205 | res_out, res_err = run_app(nb_name, []) 206 | assert res_out.startswith("with errors: 1") 207 | assert str(nb_name) in res_out 208 | assert not res_err 209 | 210 | 211 | def test_app_clean_version(): 212 | """test check `--version` option.""" 213 | res_out, res_err = run_app(args=["--version"]) 214 | assert res_out.startswith("nbmetaclean version: ") 215 | assert not res_err 216 | 217 | res_out, res_err = run_app(args=["-v"]) 218 | assert res_out.startswith("nbmetaclean version: ") 219 | assert not res_err 220 | -------------------------------------------------------------------------------- /tests/test_check.py: -------------------------------------------------------------------------------- 1 | from nbmetaclean.check import check_nb_ec, check_nb_errors, check_nb_warnings 2 | from nbmetaclean.helpers import read_nb 3 | 4 | 5 | def test_check_nb_ec(): 6 | """test check_nb_ec""" 7 | # base notebook - no execution_count 8 | test_nb = read_nb("tests/test_nbs/test_nb_3_ec.ipynb") 9 | result = check_nb_ec(test_nb) 10 | assert not result 11 | 12 | # check with `no_exec` option 13 | result = check_nb_ec(test_nb, strict=False, no_exec=True) 14 | assert result 15 | 16 | test_nb["cells"][2]["execution_count"] = 1 17 | test_nb["cells"][3]["execution_count"] = 2 18 | test_nb["cells"][5]["execution_count"] = 3 19 | 20 | result = check_nb_ec(test_nb) 21 | assert result 22 | 23 | # test strict 24 | test_nb["cells"][5]["execution_count"] = 4 25 | result = check_nb_ec(test_nb) 26 | assert not result 27 | result = check_nb_ec(test_nb, strict=False) 28 | assert result 29 | 30 | # empty source, but with execution_count 31 | test_nb["cells"][5]["execution_count"] = 3 32 | test_nb["cells"][6]["execution_count"] = 4 33 | 34 | result = check_nb_ec(test_nb) 35 | assert not result 36 | result = check_nb_ec(test_nb, strict=False) 37 | assert not result 38 | 39 | # start not from 1 40 | test_nb = read_nb("tests/test_nbs/test_nb_3_ec.ipynb") 41 | test_nb["cells"][2]["execution_count"] = 2 42 | test_nb["cells"][3]["execution_count"] = 3 43 | test_nb["cells"][5]["execution_count"] = 4 44 | 45 | result = check_nb_ec(test_nb) 46 | assert not result 47 | result = check_nb_ec(test_nb, strict=False) 48 | assert result 49 | 50 | # next is less 51 | test_nb["cells"][3]["execution_count"] = 5 52 | 53 | result = check_nb_ec(test_nb, strict=False) 54 | assert not result 55 | 56 | # code cell without execution_count 57 | test_nb = read_nb("tests/test_nbs/test_nb_3_ec.ipynb") 58 | test_nb["cells"][2]["execution_count"] = 1 59 | 60 | result = check_nb_ec(test_nb, strict=False) 61 | assert not result 62 | 63 | # check with `no_exec` option should be False 64 | result = check_nb_ec(test_nb, strict=False, no_exec=True) 65 | assert not result 66 | 67 | 68 | def test_check_nb_errors(): 69 | """test check_nb_errors""" 70 | test_nb = read_nb("tests/test_nbs/test_nb_3_ec.ipynb") 71 | result = check_nb_errors(test_nb) 72 | assert result 73 | 74 | test_nb["cells"][2]["outputs"][0]["output_type"] = "error" 75 | result = check_nb_errors(test_nb) 76 | assert not result 77 | 78 | 79 | def test_check_nb_warnings(): 80 | """test check_nb_warnings""" 81 | test_nb = read_nb("tests/test_nbs/test_nb_3_ec.ipynb") 82 | result = check_nb_warnings(test_nb) 83 | assert result 84 | 85 | test_nb["cells"][2]["outputs"][0]["output_type"] = "error" 86 | result = check_nb_warnings(test_nb) 87 | assert result 88 | 89 | test_nb["cells"][2]["outputs"][0]["output_type"] = "stream" 90 | test_nb["cells"][2]["outputs"][0]["name"] = "stderr" 91 | result = check_nb_warnings(test_nb) 92 | assert not result 93 | -------------------------------------------------------------------------------- /tests/test_clean.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | from pathlib import Path 4 | 5 | from pytest import CaptureFixture 6 | 7 | from nbmetaclean.clean import ( 8 | NB_METADATA_PRESERVE_MASKS, 9 | CleanConfig, 10 | clean_cell, 11 | clean_nb, 12 | clean_nb_file, 13 | filter_meta_mask, 14 | filter_metadata, 15 | ) 16 | from nbmetaclean.helpers import read_nb, write_nb 17 | 18 | 19 | def test_get_meta_by_mask(): 20 | """test get_meta_by_mask""" 21 | nb = read_nb(Path("tests/test_nbs/.test_nb_2_meta.ipynb")) 22 | nb_meta = nb.get("metadata") 23 | 24 | # string as nb_meta 25 | new_meta = filter_meta_mask("some string") 26 | assert new_meta == "some string" 27 | 28 | # no mask 29 | new_meta = filter_meta_mask(nb_meta) 30 | assert new_meta == {} 31 | 32 | # mask 33 | nb_meta["some key"] = "some value" 34 | new_meta = filter_meta_mask(nb_meta, ("some key",)) 35 | assert new_meta == {"some key": "some value"} 36 | new_meta = filter_meta_mask(nb_meta, NB_METADATA_PRESERVE_MASKS[0]) 37 | assert new_meta == {"language_info": {"name": "python"}} 38 | 39 | # mask for empty result 40 | new_meta = filter_meta_mask(nb_meta, ("some other key",)) 41 | assert new_meta == {} 42 | 43 | 44 | def test_new_metadata(): 45 | """test new_metadata""" 46 | nb_meta = read_nb("tests/test_nbs/.test_nb_2_meta.ipynb").get("metadata") 47 | new_meta = filter_metadata(nb_meta) 48 | assert isinstance(new_meta, dict) 49 | assert not new_meta 50 | new_meta = filter_metadata(nb_meta, [("language_info", "name")]) 51 | assert new_meta == {"language_info": {"name": "python"}} 52 | 53 | 54 | def test_clean_nb_metadata(): 55 | """test clean_nb_metadata""" 56 | test_nb = read_nb("tests/test_nbs/test_nb_2_clean.ipynb") 57 | cfg = CleanConfig() 58 | result = clean_nb(test_nb, cfg) 59 | assert not result 60 | 61 | # add metadata, new filter, mask not merged 62 | test_nb["metadata"]["some key"] = "some value" 63 | cfg.nb_metadata_preserve_mask = (("some key",),) 64 | cfg.mask_merge = False 65 | result = clean_nb(test_nb, cfg) 66 | assert result 67 | assert test_nb["metadata"] == {"some key": "some value"} 68 | 69 | # add metadata, new filter, mask merged 70 | test_nb = read_nb("tests/test_nbs/test_nb_2_clean.ipynb") 71 | test_nb["metadata"]["some_key"] = {"key_1": 1, "key_2": 2} 72 | cfg.nb_metadata_preserve_mask = (("some_key", "key_1"),) 73 | cfg.mask_merge = True 74 | result = clean_nb(test_nb, cfg) 75 | assert result 76 | assert test_nb["metadata"]["authors"][0]["name"] == "Andrei Yasyrev" 77 | assert test_nb["metadata"]["some_key"] == {"key_1": 1} 78 | 79 | 80 | def test_clean_cell_metadata(): 81 | """test clean_cell_metadata""" 82 | test_nb = read_nb("tests/test_nbs/.test_nb_2_meta.ipynb") 83 | 84 | # clear outputs 85 | cell = copy.deepcopy(test_nb.get("cells")[1]) 86 | assert cell["cell_type"] == "code" 87 | assert cell.get("outputs") 88 | assert not cell.get("metadata") 89 | assert cell.get("execution_count") == 1 90 | cell["metadata"] = {"some key": "some value"} 91 | changed = clean_cell( 92 | cell, 93 | cfg=CleanConfig( 94 | clear_outputs=True, 95 | clear_cell_metadata=True, 96 | ), 97 | ) 98 | assert changed 99 | assert not cell.get("outputs") 100 | assert not cell.get("metadata") 101 | assert not cell.get("execution_count") 102 | # run again - no changes 103 | changed = clean_cell( 104 | cell, 105 | cfg=CleanConfig( 106 | clear_outputs=True, 107 | clear_cell_metadata=True, 108 | ), 109 | ) 110 | assert not changed 111 | 112 | # dont clear outputs, execution_count, mask 113 | cell = copy.deepcopy(test_nb.get("cells")[1]) 114 | cell["metadata"] = {"some key": "some value"} 115 | cell["outputs"][0]["metadata"] = { 116 | "some key": "some value", 117 | "some other key": "some value", 118 | } 119 | changed = clean_cell( 120 | cell, 121 | CleanConfig( 122 | clear_execution_count=False, 123 | clear_cell_metadata=True, 124 | cell_metadata_preserve_mask=(("some key",),), 125 | ), 126 | ) 127 | assert changed 128 | assert cell["outputs"][0]["metadata"] == {"some key": "some value"} 129 | assert cell["metadata"] == {"some key": "some value"} 130 | assert cell["execution_count"] == 1 131 | 132 | # clear outputs, same mask -> no changes meta, clear execution_count 133 | changed = clean_cell( 134 | cell, 135 | cfg=CleanConfig(), 136 | ) 137 | assert changed 138 | assert cell["execution_count"] is None 139 | assert cell["metadata"] == {"some key": "some value"} 140 | 141 | # clear execution_count, metadata 142 | changed = clean_cell( 143 | cell, 144 | cfg=CleanConfig( 145 | clear_cell_metadata=True, 146 | ), 147 | ) 148 | assert changed 149 | assert not cell["outputs"][0]["metadata"] 150 | assert not cell["execution_count"] 151 | assert not cell["metadata"] 152 | assert not cell["outputs"][0]["metadata"] 153 | 154 | 155 | def test_clean_cell(): 156 | """test clean_cel""" 157 | test_nb = read_nb("tests/test_nbs/.test_nb_2_meta.ipynb") 158 | 159 | # nothing to clean. 160 | cell = copy.deepcopy(test_nb.get("cells")[1]) 161 | assert cell.get("outputs") 162 | assert not cell.get("metadata") 163 | assert cell.get("execution_count") == 1 164 | result = clean_cell(cell, CleanConfig(clear_execution_count=False)) 165 | assert not result 166 | 167 | # clean cell metadata, cell without metadata 168 | cell["metadata"] = {} 169 | result = clean_cell(cell, CleanConfig(clear_cell_metadata=True)) 170 | assert result 171 | assert not cell.get("metadata") 172 | assert cell.get("outputs") 173 | 174 | # clear output metadata 175 | cell["outputs"][0]["metadata"] = {"some key": "some value"} 176 | result = clean_cell( 177 | cell, 178 | CleanConfig( 179 | clear_cell_metadata=True, 180 | cell_metadata_preserve_mask=(("some key",),), 181 | ), 182 | ) 183 | assert not result 184 | assert cell["outputs"][0].get("metadata") == {"some key": "some value"} 185 | 186 | 187 | def test_clean_cell_metadata_markdown(): 188 | """test clean_cell_metadata with markdown cell""" 189 | test_nb = read_nb("tests/test_nbs/.test_nb_2_meta.ipynb") 190 | cell = copy.deepcopy(test_nb["cells"][0]) 191 | cell["metadata"] = {"some key": "some value"} 192 | changed = clean_cell( 193 | cell, 194 | cfg=CleanConfig( 195 | clear_cell_metadata=True, 196 | ), 197 | ) 198 | assert changed 199 | assert not cell["metadata"] 200 | 201 | 202 | def test_clean_nb(): 203 | """test clean nb""" 204 | path = Path("tests/test_nbs") 205 | nb_path = path / ".test_nb_2_meta.ipynb" 206 | nb_clean = path / "test_nb_2_clean.ipynb" 207 | nb = read_nb(nb_path) 208 | assert nb["cells"][1]["execution_count"] == 1 209 | assert nb["cells"][1]["outputs"][0]["execution_count"] == 1 210 | assert nb["metadata"] 211 | result = clean_nb(nb, cfg=CleanConfig()) 212 | assert result is True 213 | assert nb["cells"][1]["execution_count"] is None 214 | assert nb["cells"][1]["outputs"][0]["execution_count"] is None 215 | nb_clean = read_nb(nb_clean) 216 | assert nb == nb_clean 217 | 218 | # # try clean cleaned 219 | result = clean_nb(nb_clean, cfg=CleanConfig()) 220 | assert not result 221 | 222 | # # clean metadata, leave execution_count 223 | nb = read_nb(nb_path) 224 | result = clean_nb( 225 | nb, 226 | cfg=CleanConfig(clear_execution_count=False), 227 | ) 228 | assert result 229 | assert nb["cells"][1]["execution_count"] == 1 230 | assert nb["cells"][1]["outputs"][0]["execution_count"] == 1 231 | assert nb["metadata"] == nb_clean["metadata"] 232 | 233 | # clean nb metadata, leave cells metadata 234 | nb = read_nb(nb_path) 235 | nb["cells"][1]["metadata"] = {"some key": "some value"} 236 | result = clean_nb(nb, CleanConfig(clear_execution_count=False)) 237 | assert result 238 | assert nb["metadata"] == nb_clean["metadata"] 239 | assert nb["cells"][1]["metadata"] == {"some key": "some value"} 240 | assert nb["cells"][1]["execution_count"] == 1 241 | 242 | # clean cells metadata, leave nb metadata 243 | nb = read_nb(nb_path) 244 | nb_meta = copy.deepcopy(nb["metadata"]) 245 | result = clean_nb(nb, CleanConfig(clear_nb_metadata=False)) 246 | assert result 247 | assert nb["metadata"] == nb_meta 248 | assert nb["cells"][1]["execution_count"] is None 249 | 250 | 251 | def test_clean_nb_file(tmp_path: Path, capsys: CaptureFixture[str]): 252 | """test clean nb file""" 253 | path = Path("tests/test_nbs") 254 | nb_name = ".test_nb_2_meta.ipynb" 255 | nb_clean = read_nb(path / "test_nb_2_clean.ipynb") 256 | 257 | # prepare temp test notebook 258 | nb_source = read_nb(path / nb_name) 259 | test_nb_path = write_nb(nb_source, tmp_path / nb_name) 260 | 261 | # clean meta, leave execution_count 262 | # first lets dry run 263 | cleaned, errors = clean_nb_file( 264 | test_nb_path, 265 | cfg=CleanConfig( 266 | clear_execution_count=False, 267 | dry_run=True, 268 | ), 269 | ) 270 | assert len(cleaned) == 1 271 | assert len(errors) == 0 272 | nb = read_nb(cleaned[0]) 273 | assert nb["metadata"] == nb_source["metadata"] 274 | assert nb["cells"][1]["execution_count"] == 1 275 | assert nb["cells"][1]["outputs"][0]["execution_count"] == 1 276 | 277 | # clean meta, leave execution_count 278 | cleaned, errors = clean_nb_file( 279 | test_nb_path, 280 | cfg=CleanConfig(clear_execution_count=False), 281 | ) 282 | assert len(cleaned) == 1 283 | assert len(errors) == 0 284 | nb = read_nb(cleaned[0]) 285 | assert nb["metadata"] == nb_clean["metadata"] 286 | assert nb["cells"][1]["execution_count"] == 1 287 | assert nb["cells"][1]["outputs"][0]["execution_count"] == 1 288 | 289 | # clean meta, execution_count 290 | # path as list 291 | cleaned, errors = clean_nb_file([test_nb_path], CleanConfig()) 292 | assert len(cleaned) == 1 293 | nb = read_nb(cleaned[0]) 294 | assert nb == nb_clean 295 | 296 | # try clean cleaned 297 | cleaned, errors = clean_nb_file(test_nb_path, CleanConfig()) 298 | assert len(cleaned) == 0 299 | assert len(errors) == 0 300 | 301 | 302 | def test_clean_nb_file_errors(capsys: CaptureFixture[str], tmp_path: Path): 303 | """test clean_nb_file, errors""" 304 | # not existing nb 305 | path = tmp_path / "wrong_name" 306 | cleaned, errors = clean_nb_file(path) 307 | assert len(cleaned) == 0 308 | assert len(errors) == 1 309 | assert errors[0] == path 310 | captured = capsys.readouterr() 311 | assert not captured.out 312 | assert not captured.err 313 | 314 | # not valid nb 315 | with path.open("w", encoding="utf-8") as fh: 316 | fh.write("wrong nb") 317 | cleaned, errors = clean_nb_file(path) 318 | assert len(cleaned) == 0 319 | assert len(errors) == 1 320 | assert errors[0].name == "wrong_name" 321 | 322 | captured = capsys.readouterr() 323 | assert not captured.out 324 | assert not captured.err 325 | 326 | 327 | def test_clean_nb_file_timestamp(tmp_path: Path): 328 | """test clean_nb_file, timestamp""" 329 | path = Path("tests/test_nbs") 330 | nb_name = ".test_nb_2_meta.ipynb" 331 | nb_stat = (path / nb_name).stat() 332 | 333 | # prepare temp test notebook, set timestamp 334 | test_nb_path = write_nb(read_nb(path / nb_name), tmp_path / nb_name) 335 | os.utime(test_nb_path, (nb_stat.st_atime, nb_stat.st_mtime)) 336 | test_nb_stat = test_nb_path.stat() 337 | assert test_nb_stat.st_atime == nb_stat.st_atime 338 | assert test_nb_stat.st_mtime == nb_stat.st_mtime 339 | 340 | cleaned, errors = clean_nb_file(test_nb_path) 341 | assert len(cleaned) == 1 342 | assert len(errors) == 0 343 | cleaned_stat = cleaned[0].stat() 344 | assert True 345 | assert cleaned_stat.st_mtime == test_nb_stat.st_mtime 346 | 347 | # dont preserve timestamp 348 | test_nb_path = write_nb(read_nb(path / nb_name), tmp_path / nb_name) 349 | os.utime(test_nb_path, (nb_stat.st_atime, nb_stat.st_mtime)) 350 | cleaned, errors = clean_nb_file(test_nb_path, CleanConfig(preserve_timestamp=False)) 351 | assert len(cleaned) == 1 352 | assert len(errors) == 0 353 | cleaned_stat = cleaned[0].stat() 354 | assert True 355 | assert cleaned_stat.st_mtime != nb_stat.st_mtime 356 | -------------------------------------------------------------------------------- /tests/test_get_nbnames.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from nbmetaclean.helpers import get_nb_names, get_nb_names_from_list, is_notebook 4 | 5 | 6 | def test_is_notebook(): 7 | """test is_notebook""" 8 | assert is_notebook(Path("tests/test_nbs/test_nb_1.ipynb")) 9 | assert not is_notebook(Path("tests/test_nbs/test_nb_1.py")) 10 | assert not is_notebook(Path("tests/test_nbs/.test_nb_2_meta.ipynb")) 11 | assert is_notebook(Path("tests/test_nbs/.test_nb_2_meta.ipynb"), hidden=True) 12 | 13 | 14 | def test_get_nb_names(): 15 | """test get_nb_names""" 16 | path = Path("tests/test_nbs") 17 | # filename as argument 18 | file = path / "test_nb_1.ipynb" 19 | names = get_nb_names(file) 20 | assert len(names) == 1 21 | names.sort(key=lambda x: x.name) 22 | assert names[0] == file 23 | # filename but not nb 24 | names = get_nb_names("tests/test_clean.py") 25 | assert len(names) == 0 26 | 27 | # path as argument 28 | names = get_nb_names(path) 29 | assert len(names) == 3 30 | names.sort(key=lambda x: x.name) 31 | assert names[0] == file 32 | # path as argument. add hidden files 33 | names = get_nb_names(path, hidden=True) 34 | assert len(names) == 4 35 | try: 36 | get_nb_names("wrong_name") 37 | assert False 38 | except FileNotFoundError as ex: 39 | assert True 40 | assert str(ex) == "wrong_name not exists!" 41 | 42 | 43 | def test_get_nb_names_recursive_hidden(tmp_path: Path): 44 | """test get_nb_names recursive hidden""" 45 | suffix = ".ipynb" 46 | # add one nb 47 | with open((tmp_path / "tst").with_suffix(suffix), "w", encoding="utf-8") as _: 48 | pass 49 | files = get_nb_names(tmp_path) 50 | assert len(files) == 1 51 | 52 | # add hidden nb 53 | with open((tmp_path / ".tst").with_suffix(suffix), "w", encoding="utf-8") as _: 54 | pass 55 | files = get_nb_names(tmp_path) 56 | assert len(files) == 1 57 | files = get_nb_names(tmp_path, hidden=True) 58 | assert len(files) == 2 59 | # add simple file 60 | with open((tmp_path / "simple"), "w", encoding="utf-8") as _: 61 | pass 62 | files = get_nb_names(tmp_path) 63 | assert len(files) == 1 64 | 65 | # add dir with one nb, hidden nb 66 | new_dir = tmp_path / "new_dir" 67 | new_dir.mkdir() 68 | with open((new_dir / "tst").with_suffix(suffix), "w", encoding="utf-8") as _: 69 | pass 70 | with open((new_dir / ".tst").with_suffix(suffix), "w", encoding="utf-8") as _: 71 | pass 72 | files = get_nb_names(tmp_path) 73 | assert len(files) == 2 74 | files = get_nb_names(tmp_path, hidden=True) 75 | assert len(files) == 4 76 | 77 | files = get_nb_names(tmp_path, recursive=False) 78 | assert len(files) == 1 79 | 80 | # add hidden dir 81 | hid_dir = tmp_path / ".hid_dir" 82 | hid_dir.mkdir() 83 | with open((hid_dir / "tst").with_suffix(suffix), "w", encoding="utf-8") as _: 84 | pass 85 | with open((hid_dir / ".tst").with_suffix(suffix), "w", encoding="utf-8") as _: 86 | pass 87 | files = get_nb_names(tmp_path, hidden=True) 88 | assert len(files) == 6 89 | files = get_nb_names(tmp_path) 90 | assert len(files) == 2 91 | 92 | # add checkpoint dir and file 93 | # files at this dir will be skipped 94 | checkpoint_dir = tmp_path / ".ipynb_checkpoints" 95 | checkpoint_dir.mkdir() 96 | with open( 97 | (checkpoint_dir / "nb-checkpoint").with_suffix(suffix), "w", encoding="utf-8" 98 | ) as _: 99 | pass 100 | with open( 101 | (checkpoint_dir / "some_nb").with_suffix(suffix), "w", encoding="utf-8" 102 | ) as _: 103 | pass 104 | files = get_nb_names(tmp_path) 105 | assert len(files) == 2 106 | files = get_nb_names(tmp_path, hidden=True) 107 | assert len(files) == 6 108 | 109 | 110 | def test_get_nb_names_from_list(): 111 | """test get_nb_names_from_list""" 112 | path = Path("tests/test_nbs") 113 | # filename as argument 114 | file = path / "test_nb_1.ipynb" 115 | names = get_nb_names_from_list(file) 116 | assert len(names) == 1 117 | assert names[0] == file 118 | 119 | # filename as list 120 | names = get_nb_names_from_list([file]) 121 | assert len(names) == 1 122 | assert names[0] == file 123 | 124 | # filename but not nb 125 | names = get_nb_names_from_list("tests/test_clean.py") 126 | assert len(names) == 0 127 | 128 | # path as list, not all notebooks 129 | names = get_nb_names_from_list([file, "wrong_name", "tests/test_clean.py"]) 130 | assert len(names) == 1 131 | assert names[0] == file 132 | 133 | # folder as argument 134 | names = get_nb_names_from_list(path) 135 | assert len(names) == 3 136 | names.sort(key=lambda x: x.name) 137 | assert names[0] == file 138 | # path as argument. add hidden files 139 | names = get_nb_names(path, hidden=True) 140 | assert len(names) == 4 141 | names = get_nb_names_from_list("wrong_name") 142 | assert len(names) == 0 143 | -------------------------------------------------------------------------------- /tests/test_nbs/.test_nb_2_meta.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "markdown cell source" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "data": { 17 | "text/plain": [ 18 | "2" 19 | ] 20 | }, 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "output_type": "execute_result" 24 | } 25 | ], 26 | "source": [ 27 | "1 + 1" 28 | ] 29 | } 30 | ], 31 | "metadata": { 32 | "authors": [ 33 | { 34 | "github": "https://github.com/ayasyrev", 35 | "name": "Andrei Yasyrev" 36 | } 37 | ], 38 | "kernelspec": { 39 | "display_name": "nbmetaclean", 40 | "language": "python", 41 | "name": "python3" 42 | }, 43 | "language_info": { 44 | "codemirror_mode": { 45 | "name": "ipython", 46 | "version": 3 47 | }, 48 | "file_extension": ".py", 49 | "mimetype": "text/x-python", 50 | "name": "python", 51 | "nbconvert_exporter": "python", 52 | "pygments_lexer": "ipython3", 53 | "version": "3.11.6" 54 | } 55 | }, 56 | "nbformat": 4, 57 | "nbformat_minor": 2 58 | } 59 | -------------------------------------------------------------------------------- /tests/test_nbs/test_nb_1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [] 7 | }, 8 | { 9 | "cell_type": "code", 10 | "execution_count": null, 11 | "metadata": {}, 12 | "outputs": [], 13 | "source": [] 14 | } 15 | ], 16 | "metadata": { 17 | "authors": [ 18 | { 19 | "github": "https://github.com/ayasyrev", 20 | "name": "Andrei Yasyrev" 21 | } 22 | ], 23 | "language_info": { 24 | "name": "python" 25 | } 26 | }, 27 | "nbformat": 4, 28 | "nbformat_minor": 2 29 | } 30 | -------------------------------------------------------------------------------- /tests/test_nbs/test_nb_2_clean.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "markdown cell source" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "data": { 17 | "text/plain": [ 18 | "2" 19 | ] 20 | }, 21 | "execution_count": null, 22 | "metadata": {}, 23 | "output_type": "execute_result" 24 | } 25 | ], 26 | "source": [ 27 | "1 + 1" 28 | ] 29 | } 30 | ], 31 | "metadata": { 32 | "authors": [ 33 | { 34 | "github": "https://github.com/ayasyrev", 35 | "name": "Andrei Yasyrev" 36 | } 37 | ], 38 | "language_info": { 39 | "name": "python" 40 | } 41 | }, 42 | "nbformat": 4, 43 | "nbformat_minor": 2 44 | } 45 | -------------------------------------------------------------------------------- /tests/test_nbs/test_nb_3_ec.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "nb for check execution count" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "data": { 24 | "text/plain": [ 25 | "2" 26 | ] 27 | }, 28 | "execution_count": null, 29 | "metadata": {}, 30 | "output_type": "execute_result" 31 | } 32 | ], 33 | "source": [ 34 | "1 + 1" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "data": { 44 | "text/plain": [ 45 | "4" 46 | ] 47 | }, 48 | "execution_count": null, 49 | "metadata": {}, 50 | "output_type": "execute_result" 51 | } 52 | ], 53 | "source": [ 54 | "2 + 2" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "data": { 71 | "text/plain": [ 72 | "6" 73 | ] 74 | }, 75 | "execution_count": null, 76 | "metadata": {}, 77 | "output_type": "execute_result" 78 | } 79 | ], 80 | "source": [ 81 | "3 + 3" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [] 90 | } 91 | ], 92 | "metadata": { 93 | "authors": [ 94 | { 95 | "github": "https://github.com/ayasyrev", 96 | "name": "Andrei Yasyrev" 97 | } 98 | ], 99 | "language_info": { 100 | "name": "python" 101 | } 102 | }, 103 | "nbformat": 4, 104 | "nbformat_minor": 2 105 | } 106 | -------------------------------------------------------------------------------- /tests/test_read_write.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from nbmetaclean.helpers import read_nb, write_nb 4 | 5 | 6 | def test_read_nb(): 7 | """test read notebook""" 8 | file = Path("tests/test_nbs/test_nb_1.ipynb") 9 | nb = read_nb(file) 10 | assert isinstance(nb, dict) 11 | assert nb["metadata"]["language_info"] == {"name": "python"} 12 | assert nb["metadata"]["authors"][0]["name"] == "Andrei Yasyrev" 13 | assert nb["nbformat"] == 4 14 | assert nb["nbformat_minor"] == 2 15 | cells = nb["cells"] 16 | assert isinstance(cells, list) 17 | assert len(cells) == 2 18 | # markdown 19 | assert cells[0]["cell_type"] == "markdown" 20 | assert cells[0]["source"] == [] 21 | assert cells[0]["metadata"] == {} 22 | # code 23 | assert cells[1]["cell_type"] == "code" 24 | assert cells[1]["source"] == [] 25 | assert cells[1]["execution_count"] is None 26 | assert cells[1]["metadata"] == {} 27 | assert cells[1]["outputs"] == [] 28 | 29 | 30 | def test_write_nb(tmp_path: Path): 31 | """test write notebook""" 32 | file = Path("tests/test_nbs/test_nb_1.ipynb") 33 | nb = read_nb(file) 34 | write_nb(nb, tmp_path / file.name) 35 | with open(tmp_path / file.name, "r", encoding="utf-8") as fh: 36 | res_text = fh.read() 37 | with open(file, "r", encoding="utf-8") as fh: 38 | org_text = fh.read() 39 | assert res_text == org_text 40 | 41 | # write with name w/o suffix 42 | result = write_nb(nb, tmp_path / "test_nb_1") 43 | assert result == tmp_path / "test_nb_1.ipynb" 44 | 45 | # write with stat 46 | stat = file.stat() 47 | timestamp = (stat.st_atime, stat.st_mtime) 48 | result = write_nb(nb, tmp_path / "test_nb_1", timestamp=timestamp) 49 | res_stat = result.stat() 50 | assert timestamp == (res_stat.st_atime, res_stat.st_mtime) 51 | 52 | 53 | def test_read_nb_errors(tmp_path: Path): 54 | """test read notebook not exist or invalid""" 55 | # not valid 56 | with open(tmp_path / "test.ipynb", "w", encoding="utf-8") as fh: 57 | fh.write("invalid") 58 | assert read_nb(tmp_path / "test.ipynb") is None 59 | 60 | # not exist 61 | assert read_nb(tmp_path / "test_nb_1.ipynb") is None 62 | 63 | # not file 64 | assert read_nb(tmp_path) is None 65 | --------------------------------------------------------------------------------