├── .flake8
├── .github
    └── workflows
    │   ├── lint.yml
    │   └── tests.yml
├── .gitignore
├── .mypy.ini
├── .pre-commit-config.yaml
├── .pre-commit-hooks.yaml
├── .pylintrc
├── LICENSE
├── Makefile
├── README.md
├── docs
    ├── README.md
    └── overrides
    │   └── partials
    │       └── copyright.html
├── mkdocs.yaml
├── noxfile.py
├── noxfile_conda.py
├── noxfile_conda_lint.py
├── noxfile_lint.py
├── pyproject.toml
├── requirements_dev.txt
├── requirements_test.txt
├── setup.cfg
├── setup.py
├── src
    └── nbmetaclean
    │   ├── __init__.py
    │   ├── app_check.py
    │   ├── app_clean.py
    │   ├── check.py
    │   ├── clean.py
    │   ├── helpers.py
    │   ├── nb_types.py
    │   └── version.py
└── tests
    ├── test_app_check.py
    ├── test_app_clean.py
    ├── test_check.py
    ├── test_clean.py
    ├── test_get_nbnames.py
    ├── test_nbs
        ├── .test_nb_2_meta.ipynb
        ├── test_nb_1.ipynb
        ├── test_nb_2_clean.ipynb
        └── test_nb_3_ec.ipynb
    └── test_read_write.py


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | select = C,E,F,W
3 | max-complexity = 10
4 | max-line-length = 120
5 | extend-ignore = W503
6 | disable-noqa = True
7 | application-import-names = nbmetaclean, tests
8 | import-order-style = google
9 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: Lint
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - dev
 6 |       - main
 7 | jobs:
 8 |   tests:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |     - uses: actions/checkout@main
12 |     - uses: actions/setup-python@main
13 |       with:
14 |         python-version: "3.11"
15 |         architecture: x64
16 |     - run: pip install ruff
17 |     - run: ruff check .
18 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - dev
 6 |       - main
 7 | jobs:
 8 |   tests:
 9 |     runs-on: ubuntu-latest
10 |     strategy:
11 |       matrix:
12 |         python: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
13 |     steps:
14 |     - name: Checkout
15 |       uses: actions/checkout@main
16 |     - name: Setup Python ${{ matrix.python }}
17 |       uses: actions/setup-python@main
18 |       with:
19 |         python-version: ${{ matrix.python }}
20 |         architecture: x64
21 | 
22 |     - name: Install
23 |       run: |
24 |         pip install uv
25 |         uv pip install --system .[test] "coverage[toml]"
26 | 
27 |     - name: Tests
28 |       run: pytest --cov
29 | 
30 |     - name: CodeCov
31 |       if: ${{ matrix.python == '3.11' }}
32 |       uses: codecov/codecov-action@main
33 |       with:
34 |         token: ${{ secrets.CODECOV_TOKEN }}
35 |         slug: ayasyrev/nbmetaclean
36 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # my
  2 | .vscode/
  3 | tmp*/
  4 | cov.xml
  5 | 
  6 | # ide
  7 | 
  8 | .idea/
  9 | .vscode/settings.json
 10 | 
 11 | # nox
 12 | .nox
 13 | 
 14 | # Byte-compiled / optimized / DLL files
 15 | __pycache__/
 16 | *.py[cod]
 17 | *$py.class
 18 | 
 19 | # C extensions
 20 | *.so
 21 | 
 22 | # Distribution / packaging
 23 | .Python
 24 | build/
 25 | develop-eggs/
 26 | dist/
 27 | downloads/
 28 | eggs/
 29 | .eggs/
 30 | lib/
 31 | lib64/
 32 | parts/
 33 | sdist/
 34 | var/
 35 | wheels/
 36 | pip-wheel-metadata/
 37 | share/python-wheels/
 38 | *.egg-info/
 39 | .installed.cfg
 40 | *.egg
 41 | MANIFEST
 42 | 
 43 | # PyInstaller
 44 | #  Usually these files are written by a python script from a template
 45 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 46 | *.manifest
 47 | *.spec
 48 | 
 49 | # Installer logs
 50 | pip-log.txt
 51 | pip-delete-this-directory.txt
 52 | 
 53 | # Unit test / coverage reports
 54 | htmlcov/
 55 | .tox/
 56 | .nox/
 57 | .coverage
 58 | .coverage.*
 59 | .cache
 60 | nosetests.xml
 61 | coverage.xml
 62 | *.cover
 63 | *.py,cover
 64 | .hypothesis/
 65 | .pytest_cache/
 66 | 
 67 | # Translations
 68 | *.mo
 69 | *.pot
 70 | 
 71 | # Django stuff:
 72 | *.log
 73 | local_settings.py
 74 | db.sqlite3
 75 | db.sqlite3-journal
 76 | 
 77 | # Flask stuff:
 78 | instance/
 79 | .webassets-cache
 80 | 
 81 | # Scrapy stuff:
 82 | .scrapy
 83 | 
 84 | # Sphinx documentation
 85 | docs/_build/
 86 | 
 87 | # PyBuilder
 88 | target/
 89 | 
 90 | # Jupyter Notebook
 91 | .ipynb_checkpoints
 92 | 
 93 | # IPython
 94 | profile_default/
 95 | ipython_config.py
 96 | 
 97 | # pyenv
 98 | .python-version
 99 | 
100 | # pipenv
101 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
102 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
103 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
104 | #   install all needed dependencies.
105 | #Pipfile.lock
106 | 
107 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
108 | __pypackages__/
109 | 
110 | # Celery stuff
111 | celerybeat-schedule
112 | celerybeat.pid
113 | 
114 | # SageMath parsed files
115 | *.sage.py
116 | 
117 | # Environments
118 | .env
119 | .venv
120 | env/
121 | venv/
122 | ENV/
123 | env.bak/
124 | venv.bak/
125 | 
126 | # Spyder project settings
127 | .spyderproject
128 | .spyproject
129 | 
130 | # Rope project settings
131 | .ropeproject
132 | 
133 | # mkdocs documentation
134 | /site
135 | 
136 | # mypy
137 | .mypy_cache/
138 | .dmypy.json
139 | dmypy.json
140 | 
141 | # Pyre type checker
142 | .pyre/
143 | 


--------------------------------------------------------------------------------
/.mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: local
 3 | 
 4 |   hooks:
 5 |     # local version for testing
 6 |     - id: nbmetaclean
 7 |       name: nbmetaclean local
 8 |       entry: nbmetaclean
 9 |       language: system
10 |       files: \.ipynb
11 | 
12 |     - id: nbcheck
13 |       name: nbcheck execution_count local
14 |       entry: nbcheck
15 |       language: system
16 |       files: \.ipynb
17 |       args: [ --ec, --no_exec, --err ]
18 | 
19 | - repo: https://github.com/pre-commit/pre-commit-hooks
20 |   rev: v4.6.0
21 |   hooks:
22 |     - id: check-added-large-files
23 |     - id: check-ast
24 |     - id: check-builtin-literals
25 |     - id: check-case-conflict
26 |     - id: check-docstring-first
27 |     - id: check-executables-have-shebangs
28 |     - id: check-shebang-scripts-are-executable
29 |     - id: check-symlinks
30 |     - id: check-toml
31 |     - id: check-xml
32 |     - id: detect-private-key
33 |     - id: forbid-new-submodules
34 |     - id: forbid-submodules
35 |     - id: mixed-line-ending
36 |     - id: destroyed-symlinks
37 |     - id: fix-byte-order-marker
38 |     - id: check-json
39 |     - id: check-yaml
40 |       args: [ --unsafe ]
41 |     - id: debug-statements
42 |     - id: end-of-file-fixer
43 |     - id: trailing-whitespace
44 |     - id: requirements-txt-fixer
45 | - repo: https://github.com/astral-sh/ruff-pre-commit
46 |   # Ruff version.
47 |   rev: v0.6.1
48 | 
49 |   hooks:
50 |     # Run the linter.
51 |     - id: ruff
52 |       args: [ --fix ]
53 |     # Run the formatter.
54 |     - id: ruff-format
55 | - repo: https://github.com/pre-commit/pygrep-hooks
56 |   rev: v1.10.0
57 |   hooks:
58 |     - id: python-check-mock-methods
59 |     - id: python-use-type-annotations
60 |     - id: python-check-blanket-noqa
61 |     - id: text-unicode-replacement-char
62 | 


--------------------------------------------------------------------------------
/.pre-commit-hooks.yaml:
--------------------------------------------------------------------------------
 1 | - id: nbmetaclean
 2 |   name: nbmetaclean
 3 |   description: Clean Jupyter Notebooks metadata and optionally output.
 4 |   entry: nbmetaclean
 5 |   files: \.ipynb$
 6 |   language: python
 7 |   language_version: python3
 8 | 
 9 | # Same as nbmetaclean, for compatibility.
10 | - id: nbclean
11 |   name: nbclean
12 |   description: Clean Jupyter Notebooks metadata and optionally output.
13 |   entry: nbclean
14 |   files: \.ipynb$
15 |   language: python
16 |   language_version: python3
17 | 
18 | - id: nbcheck
19 |   name: nbcheck
20 |   description: Check Jupyter Notebooks for correct sequence of execution_count and (or) errors in outputs.
21 |   entry: nbcheck
22 |   files: \.ipynb$
23 |   language: python
24 |   language_version: python3
25 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
  1 | [MASTER]
  2 | 
  3 | # A comma-separated list of package or module names from where C extensions may
  4 | # be loaded. Extensions are loading into the active Python interpreter and may
  5 | # run arbitrary code.
  6 | extension-pkg-allow-list=
  7 | 
  8 | # A comma-separated list of package or module names from where C extensions may
  9 | # be loaded. Extensions are loading into the active Python interpreter and may
 10 | # run arbitrary code. (This is an alternative name to extension-pkg-allow-list
 11 | # for backward compatibility.)
 12 | extension-pkg-whitelist=pydantic
 13 | ; extension-pkg-whitelist=pydantic  , nbconvert, nbformat
 14 | 
 15 | # Return non-zero exit code if any of these messages/categories are detected,
 16 | # even if score is above --fail-under value. Syntax same as enable. Messages
 17 | # specified are enabled, while categories only check already-enabled messages.
 18 | fail-on=
 19 | 
 20 | # Specify a score threshold to be exceeded before program exits with error.
 21 | fail-under=10.0
 22 | 
 23 | # Files or directories to be skipped. They should be base names, not paths.
 24 | ignore=CVS
 25 | 
 26 | # Add files or directories matching the regex patterns to the ignore-list. The
 27 | # regex matches against paths and can be in Posix or Windows format.
 28 | ignore-paths=
 29 | 
 30 | # Files or directories matching the regex patterns are skipped. The regex
 31 | # matches against base names, not paths.
 32 | ignore-patterns=
 33 | 
 34 | # Python code to execute, usually for sys.path manipulation such as
 35 | # pygtk.require().
 36 | #init-hook=
 37 | 
 38 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
 39 | # number of processors available to use.
 40 | jobs=1
 41 | 
 42 | # Control the amount of potential inferred values when inferring a single
 43 | # object. This can help the performance when dealing with large functions or
 44 | # complex, nested conditions.
 45 | limit-inference-results=100
 46 | 
 47 | # List of plugins (as comma separated values of python module names) to load,
 48 | # usually to register additional checkers.
 49 | load-plugins=
 50 | 
 51 | # Pickle collected data for later comparisons.
 52 | persistent=yes
 53 | 
 54 | # Minimum Python version to use for version dependent checks. Will default to
 55 | # the version used to run pylint.
 56 | py-version=3.9
 57 | 
 58 | # When enabled, pylint would attempt to guess common misconfiguration and emit
 59 | # user-friendly hints instead of false-positive error messages.
 60 | suggestion-mode=yes
 61 | 
 62 | # Allow loading of arbitrary C extensions. Extensions are imported into the
 63 | # active Python interpreter and may run arbitrary code.
 64 | unsafe-load-any-extension=no
 65 | 
 66 | 
 67 | [MESSAGES CONTROL]
 68 | 
 69 | # Only show warnings with the listed confidence levels. Leave empty to show
 70 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
 71 | confidence=
 72 | 
 73 | # Disable the message, report, category or checker with the given id(s). You
 74 | # can either give multiple identifiers separated by comma (,) or put this
 75 | # option multiple times (only on the command line, not in the configuration
 76 | # file where it should appear only once). You can also use "--disable=all" to
 77 | # disable everything first and then reenable specific checks. For example, if
 78 | # you want to run only the similarities checker, you can use "--disable=all
 79 | # --enable=similarities". If you want to run only the classes checker, but have
 80 | # no Warning level messages displayed, use "--disable=all --enable=classes
 81 | # --disable=W".
 82 | disable=raw-checker-failed,
 83 |         bad-inline-option,
 84 |         locally-disabled,
 85 |         file-ignored,
 86 |         suppressed-message,
 87 |         useless-suppression,
 88 |         deprecated-pragma,
 89 |         use-symbolic-message-instead,
 90 |         exec-used,
 91 |         missing-module-docstring,
 92 |         missing-docstring,
 93 |         invalid-name
 94 | 
 95 | # Enable the message, report, category or checker with the given id(s). You can
 96 | # either give multiple identifier separated by comma (,) or put this option
 97 | # multiple time (only on the command line, not in the configuration file where
 98 | # it should appear only once). See also the "--disable" option for examples.
 99 | enable=c-extension-no-member
100 | 
101 | 
102 | [REPORTS]
103 | 
104 | # Python expression which should return a score less than or equal to 10. You
105 | # have access to the variables 'error', 'warning', 'refactor', and 'convention'
106 | # which contain the number of messages in each category, as well as 'statement'
107 | # which is the total number of statements analyzed. This score is used by the
108 | # global evaluation report (RP0004).
109 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
110 | 
111 | # Template used to display messages. This is a python new-style format string
112 | # used to format the message information. See doc for all details.
113 | #msg-template=
114 | 
115 | # Set the output format. Available formats are text, parseable, colorized, json
116 | # and msvs (visual studio). You can also give a reporter class, e.g.
117 | # mypackage.mymodule.MyReporterClass.
118 | output-format=text
119 | 
120 | # Tells whether to display a full report or only the messages.
121 | reports=no
122 | 
123 | # Activate the evaluation score.
124 | score=yes
125 | 
126 | 
127 | [REFACTORING]
128 | 
129 | # Maximum number of nested blocks for function / method body
130 | max-nested-blocks=5
131 | 
132 | # Complete name of functions that never returns. When checking for
133 | # inconsistent-return-statements if a never returning function is called then
134 | # it will be considered as an explicit return statement and no message will be
135 | # printed.
136 | never-returning-functions=sys.exit,argparse.parse_error
137 | 
138 | 
139 | [SPELLING]
140 | 
141 | # Limits count of emitted suggestions for spelling mistakes.
142 | max-spelling-suggestions=4
143 | 
144 | # Spelling dictionary name. Available dictionaries: none. To make it work,
145 | # install the 'python-enchant' package.
146 | spelling-dict=
147 | 
148 | # List of comma separated words that should be considered directives if they
149 | # appear and the beginning of a comment and should not be checked.
150 | spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
151 | 
152 | # List of comma separated words that should not be checked.
153 | spelling-ignore-words=
154 | 
155 | # A path to a file that contains the private dictionary; one word per line.
156 | spelling-private-dict-file=
157 | 
158 | # Tells whether to store unknown words to the private dictionary (see the
159 | # --spelling-private-dict-file option) instead of raising a message.
160 | spelling-store-unknown-words=no
161 | 
162 | 
163 | [FORMAT]
164 | 
165 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
166 | expected-line-ending-format=
167 | 
168 | # Regexp for a line that is allowed to be longer than the limit.
169 | ignore-long-lines=^\s*(# )?<?https?://\S+>?$
170 | 
171 | # Number of spaces of indent required inside a hanging or continued line.
172 | indent-after-paren=4
173 | 
174 | # String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
175 | # tab).
176 | indent-string='    '
177 | 
178 | # Maximum number of characters on a single line.
179 | max-line-length=120
180 | 
181 | # Maximum number of lines in a module.
182 | max-module-lines=1000
183 | 
184 | # Allow the body of a class to be on the same line as the declaration if body
185 | # contains single statement.
186 | single-line-class-stmt=no
187 | 
188 | # Allow the body of an if to be on the same line as the test if there is no
189 | # else.
190 | single-line-if-stmt=no
191 | 
192 | 
193 | [VARIABLES]
194 | 
195 | # List of additional names supposed to be defined in builtins. Remember that
196 | # you should avoid defining new builtins when possible.
197 | additional-builtins=
198 | 
199 | # Tells whether unused global variables should be treated as a violation.
200 | allow-global-unused-variables=yes
201 | 
202 | # List of names allowed to shadow builtins
203 | allowed-redefined-builtins=
204 | 
205 | # List of strings which can identify a callback function by name. A callback
206 | # name must start or end with one of those strings.
207 | callbacks=cb_,
208 |           _cb
209 | 
210 | # A regular expression matching the name of dummy variables (i.e. expected to
211 | # not be used).
212 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
213 | 
214 | # Argument names that match this expression will be ignored. Default to name
215 | # with leading underscore.
216 | ignored-argument-names=_.*|^ignored_|^unused_
217 | 
218 | # Tells whether we should check for unused import in __init__ files.
219 | init-import=no
220 | 
221 | # List of qualified module names which can have objects that can redefine
222 | # builtins.
223 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
224 | 
225 | 
226 | [SIMILARITIES]
227 | 
228 | # Comments are removed from the similarity computation
229 | ignore-comments=yes
230 | 
231 | # Docstrings are removed from the similarity computation
232 | ignore-docstrings=yes
233 | 
234 | # Imports are removed from the similarity computation
235 | ignore-imports=no
236 | 
237 | # Signatures are removed from the similarity computation
238 | ignore-signatures=no
239 | 
240 | # Minimum lines number of a similarity.
241 | min-similarity-lines=4
242 | 
243 | 
244 | [LOGGING]
245 | 
246 | # The type of string formatting that logging methods do. `old` means using %
247 | # formatting, `new` is for `{}` formatting.
248 | logging-format-style=old
249 | 
250 | # Logging modules to check that the string format arguments are in logging
251 | # function parameter format.
252 | logging-modules=logging
253 | 
254 | 
255 | [BASIC]
256 | 
257 | # Naming style matching correct argument names.
258 | argument-naming-style=snake_case
259 | 
260 | # Regular expression matching correct argument names. Overrides argument-
261 | # naming-style.
262 | #argument-rgx=
263 | 
264 | # Naming style matching correct attribute names.
265 | attr-naming-style=snake_case
266 | 
267 | # Regular expression matching correct attribute names. Overrides attr-naming-
268 | # style.
269 | #attr-rgx=
270 | 
271 | # Bad variable names which should always be refused, separated by a comma.
272 | bad-names=foo,
273 |           bar,
274 |           baz,
275 |           toto,
276 |           tutu,
277 |           tata
278 | 
279 | # Bad variable names regexes, separated by a comma. If names match any regex,
280 | # they will always be refused
281 | bad-names-rgxs=
282 | 
283 | # Naming style matching correct class attribute names.
284 | class-attribute-naming-style=any
285 | 
286 | # Regular expression matching correct class attribute names. Overrides class-
287 | # attribute-naming-style.
288 | #class-attribute-rgx=
289 | 
290 | # Naming style matching correct class constant names.
291 | class-const-naming-style=UPPER_CASE
292 | 
293 | # Regular expression matching correct class constant names. Overrides class-
294 | # const-naming-style.
295 | #class-const-rgx=
296 | 
297 | # Naming style matching correct class names.
298 | class-naming-style=PascalCase
299 | 
300 | # Regular expression matching correct class names. Overrides class-naming-
301 | # style.
302 | #class-rgx=
303 | 
304 | # Naming style matching correct constant names.
305 | const-naming-style=UPPER_CASE
306 | 
307 | # Regular expression matching correct constant names. Overrides const-naming-
308 | # style.
309 | #const-rgx=
310 | 
311 | # Minimum line length for functions/classes that require docstrings, shorter
312 | # ones are exempt.
313 | docstring-min-length=-1
314 | 
315 | # Naming style matching correct function names.
316 | function-naming-style=snake_case
317 | 
318 | # Regular expression matching correct function names. Overrides function-
319 | # naming-style.
320 | #function-rgx=
321 | 
322 | # Good variable names which should always be accepted, separated by a comma.
323 | good-names=i,
324 |            j,
325 |            k,
326 |            ex,
327 |            Run,
328 |            _
329 | 
330 | # Good variable names regexes, separated by a comma. If names match any regex,
331 | # they will always be accepted
332 | good-names-rgxs=
333 | 
334 | # Include a hint for the correct naming format with invalid-name.
335 | include-naming-hint=no
336 | 
337 | # Naming style matching correct inline iteration names.
338 | inlinevar-naming-style=any
339 | 
340 | # Regular expression matching correct inline iteration names. Overrides
341 | # inlinevar-naming-style.
342 | #inlinevar-rgx=
343 | 
344 | # Naming style matching correct method names.
345 | method-naming-style=snake_case
346 | 
347 | # Regular expression matching correct method names. Overrides method-naming-
348 | # style.
349 | #method-rgx=
350 | 
351 | # Naming style matching correct module names.
352 | module-naming-style=snake_case
353 | 
354 | # Regular expression matching correct module names. Overrides module-naming-
355 | # style.
356 | #module-rgx=
357 | 
358 | # Colon-delimited sets of names that determine each other's naming style when
359 | # the name regexes allow several styles.
360 | name-group=
361 | 
362 | # Regular expression which should only match function or class names that do
363 | # not require a docstring.
364 | no-docstring-rgx=^_
365 | 
366 | # List of decorators that produce properties, such as abc.abstractproperty. Add
367 | # to this list to register other decorators that produce valid properties.
368 | # These decorators are taken in consideration only for invalid-name.
369 | property-classes=abc.abstractproperty
370 | 
371 | # Naming style matching correct variable names.
372 | variable-naming-style=snake_case
373 | 
374 | # Regular expression matching correct variable names. Overrides variable-
375 | # naming-style.
376 | #variable-rgx=
377 | 
378 | 
379 | [TYPECHECK]
380 | 
381 | # List of decorators that produce context managers, such as
382 | # contextlib.contextmanager. Add to this list to register other decorators that
383 | # produce valid context managers.
384 | contextmanager-decorators=contextlib.contextmanager
385 | 
386 | # List of members which are set dynamically and missed by pylint inference
387 | # system, and so shouldn't trigger E1101 when accessed. Python regular
388 | # expressions are accepted.
389 | generated-members=
390 | 
391 | # Tells whether missing members accessed in mixin class should be ignored. A
392 | # class is considered mixin if its name matches the mixin-class-rgx option.
393 | ignore-mixin-members=yes
394 | 
395 | # Tells whether to warn about missing members when the owner of the attribute
396 | # is inferred to be None.
397 | ignore-none=yes
398 | 
399 | # This flag controls whether pylint should warn about no-member and similar
400 | # checks whenever an opaque object is returned when inferring. The inference
401 | # can return multiple potential results while evaluating a Python object, but
402 | # some branches might not be evaluated, which results in partial inference. In
403 | # that case, it might be useful to still emit no-member and other checks for
404 | # the rest of the inferred objects.
405 | ignore-on-opaque-inference=yes
406 | 
407 | # List of class names for which member attributes should not be checked (useful
408 | # for classes with dynamically set attributes). This supports the use of
409 | # qualified names.
410 | ignored-classes=optparse.Values,thread._local,_thread._local
411 | 
412 | # List of module names for which member attributes should not be checked
413 | # (useful for modules/projects where namespaces are manipulated during runtime
414 | # and thus existing member attributes cannot be deduced by static analysis). It
415 | # supports qualified module names, as well as Unix pattern matching.
416 | ignored-modules=
417 | 
418 | # Show a hint with possible names when a member name was not found. The aspect
419 | # of finding the hint is based on edit distance.
420 | missing-member-hint=yes
421 | 
422 | # The minimum edit distance a name should have in order to be considered a
423 | # similar match for a missing member name.
424 | missing-member-hint-distance=1
425 | 
426 | # The total number of similar names that should be taken in consideration when
427 | # showing a hint for a missing member.
428 | missing-member-max-choices=1
429 | 
430 | # Regex pattern to define which classes are considered mixins ignore-mixin-
431 | # members is set to 'yes'
432 | mixin-class-rgx=.*[Mm]ixin
433 | 
434 | # List of decorators that change the signature of a decorated function.
435 | signature-mutators=
436 | 
437 | 
438 | [MISCELLANEOUS]
439 | 
440 | # List of note tags to take in consideration, separated by a comma.
441 | notes=FIXME,
442 |       XXX,
443 |       TODO
444 | 
445 | # Regular expression of note tags to take in consideration.
446 | #notes-rgx=
447 | 
448 | 
449 | [STRING]
450 | 
451 | # This flag controls whether inconsistent-quotes generates a warning when the
452 | # character used as a quote delimiter is used inconsistently within a module.
453 | check-quote-consistency=no
454 | 
455 | # This flag controls whether the implicit-str-concat should generate a warning
456 | # on implicit string concatenation in sequences defined over several lines.
457 | check-str-concat-over-line-jumps=no
458 | 
459 | 
460 | [IMPORTS]
461 | 
462 | # List of modules that can be imported at any level, not just the top level
463 | # one.
464 | allow-any-import-level=
465 | 
466 | # Allow wildcard imports from modules that define __all__.
467 | allow-wildcard-with-all=no
468 | 
469 | # Analyse import fallback blocks. This can be used to support both Python 2 and
470 | # 3 compatible code, which means that the block might have code that exists
471 | # only in one or another interpreter, leading to false positives when analysed.
472 | analyse-fallback-blocks=no
473 | 
474 | # Deprecated modules which should not be used, separated by a comma.
475 | deprecated-modules=
476 | 
477 | # Output a graph (.gv or any supported image format) of external dependencies
478 | # to the given file (report RP0402 must not be disabled).
479 | ext-import-graph=
480 | 
481 | # Output a graph (.gv or any supported image format) of all (i.e. internal and
482 | # external) dependencies to the given file (report RP0402 must not be
483 | # disabled).
484 | import-graph=
485 | 
486 | # Output a graph (.gv or any supported image format) of internal dependencies
487 | # to the given file (report RP0402 must not be disabled).
488 | int-import-graph=
489 | 
490 | # Force import order to recognize a module as part of the standard
491 | # compatibility libraries.
492 | known-standard-library=
493 | 
494 | # Force import order to recognize a module as part of a third party library.
495 | known-third-party=enchant
496 | 
497 | # Couples of modules and preferred modules, separated by a comma.
498 | preferred-modules=
499 | 
500 | 
501 | [DESIGN]
502 | 
503 | # List of regular expressions of class ancestor names to ignore when counting
504 | # public methods (see R0903)
505 | exclude-too-few-public-methods=
506 | 
507 | # List of qualified class names to ignore when counting class parents (see
508 | # R0901)
509 | ignored-parents=
510 | 
511 | # Maximum number of arguments for function / method.
512 | max-args=5
513 | 
514 | # Maximum number of attributes for a class (see R0902).
515 | max-attributes=7
516 | 
517 | # Maximum number of boolean expressions in an if statement (see R0916).
518 | max-bool-expr=5
519 | 
520 | # Maximum number of branch for function / method body.
521 | max-branches=12
522 | 
523 | # Maximum number of locals for function / method body.
524 | max-locals=15
525 | 
526 | # Maximum number of parents for a class (see R0901).
527 | max-parents=7
528 | 
529 | # Maximum number of public methods for a class (see R0904).
530 | max-public-methods=20
531 | 
532 | # Maximum number of return / yield for function / method body.
533 | max-returns=6
534 | 
535 | # Maximum number of statements in function / method body.
536 | max-statements=50
537 | 
538 | # Minimum number of public methods for a class (see R0903).
539 | min-public-methods=2
540 | 
541 | 
542 | [CLASSES]
543 | 
544 | # Warn about protected attribute access inside special methods
545 | check-protected-access-in-special-methods=no
546 | 
547 | # List of method names used to declare (i.e. assign) instance attributes.
548 | defining-attr-methods=__init__,
549 |                       __new__,
550 |                       setUp,
551 |                       __post_init__
552 | 
553 | # List of member names, which should be excluded from the protected access
554 | # warning.
555 | exclude-protected=_asdict,
556 |                   _fields,
557 |                   _replace,
558 |                   _source,
559 |                   _make
560 | 
561 | # List of valid names for the first argument in a class method.
562 | valid-classmethod-first-arg=cls
563 | 
564 | # List of valid names for the first argument in a metaclass class method.
565 | valid-metaclass-classmethod-first-arg=cls
566 | 
567 | 
568 | [EXCEPTIONS]
569 | 
570 | # Exceptions that will emit a warning when being caught. Defaults to
571 | # "BaseException, Exception".
572 | overgeneral-exceptions=BaseException,
573 |                        Exception
574 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .ONESHELL:
 2 | SHELL := /bin/bash
 3 | 
 4 | pypi: dist
 5 | 	twine upload --repository pypi dist/*
 6 | 
 7 | dist: clean
 8 | 	python3 -m build
 9 | 
10 | clean:
11 | 	rm -rf dist
12 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # nbmetaclean
  2 | Collections of python scripts for checking and cleaning Jupyter Notebooks metadata, execution_count and optionally output.
  3 | Can be used as command line tool or pre-commit hook.
  4 | 
  5 | 
  6 | Pure Python, no dependencies.
  7 | 
  8 | Can be used as a pre-commit hook or as a command line tool.
  9 | 
 10 | 
 11 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/benchmark-utils)](https://pypi.org/project/nbmetaclean/)
 12 | [![PyPI Status](https://badge.fury.io/py/nbmetaclean.svg)](https://badge.fury.io/py/nbmetaclean)
 13 | [![Tests](https://github.com/ayasyrev/nbmetaclean/workflows/Tests/badge.svg)](https://github.com/ayasyrev/nbmetaclean/actions?workflow=Tests)  
 14 | [![Codecov](https://codecov.io/gh/ayasyrev/nbmetaclean/branch/main/graph/badge.svg)](https://codecov.io/gh/ayasyrev/nbmetaclean)
 15 | 
 16 | ## nbmetaclean
 17 | 
 18 | Clean Jupyter Notebooks metadata, execution_count and optionally output.
 19 | 
 20 | ## nbcheck
 21 | Check Jupyter Notebooks for errors and (or) warnings in outputs.
 22 | 
 23 | 
 24 | ## Base usage
 25 | 
 26 | ### Pre-commit hook
 27 | Nbmetaclean can be used as a pre-commit hook, with [pre-commit](https://pre-commit.com/pre-commit)
 28 | You do not need to install nbmetaclean, it will be installed automatically.
 29 | add to `.pre-commit-config.yaml`:
 30 | ```yaml
 31 | repos:
 32 |     - repo: https://github.com/ayasyrev/nbmetaclean
 33 |         rev: 0.1.1
 34 |         hooks:
 35 |         - id: nbmetaclean
 36 |         - id: nbcheck
 37 |           args: [ --ec, --err, --warn ]
 38 | ```
 39 | 
 40 | 
 41 | 
 42 | ### Command line tool
 43 | 
 44 | #### Without install:
 45 | If you use [uv](https://github.com/astral-sh/uv) package manager, you can nbmetaclean without install.
 46 | To clean notebooks:
 47 | ```bash
 48 | uvx nbmetaclean
 49 | ```
 50 | To check notebooks:
 51 | ```bash
 52 | uvx --from nbmetaclean nbcheck --ec --err --warn
 53 | ```
 54 | 
 55 | ####  Install:
 56 | ```bash
 57 | pip install nbmetaclean
 58 | ```
 59 | 
 60 | Usage:
 61 | run `nbmetaclean` or `nbcheck` command with `path` to notebook or folder with notebooks.
 62 | If no `path` is provided, current directory will be used as `path`.
 63 | 
 64 | It is possible to use `nbclean` command instead of `nbmetaclean`.
 65 | `nbmetaclean` will be used by defaults in favour of usage with `uvx`
 66 | 
 67 | 
 68 | 
 69 | ```bash
 70 | nbmetaclean
 71 | ```
 72 | 
 73 | `nbcheck` should be run with flags:
 74 | - `--ec` for execution_count check
 75 | - `--err` for check errors in outputs
 76 | - `--warn` for check warnings in outputs
 77 | ```bash
 78 | nbcheck --ec --err --warn
 79 | ```
 80 | 
 81 | 
 82 | ## Nbmetaclean
 83 | ### Default settings
 84 | By default, the following settings are used:
 85 | 
 86 | - Clean notebook metadata, except `authors` and `language_info / name`.
 87 | - Clean cells execution_count.
 88 | - Preserve metadata at  cells.
 89 | - Preserve cells outputs.
 90 | - After cleaning notebook, timestamp for file will be set to previous values.
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | ### Arguments
 98 | Check available arguments:
 99 | 
100 | ```bash
101 | nbmetaclean -h
102 | 
103 | usage: nbmetaclean [-h] [-s] [--not_ec] [--not-pt] [--dont_clear_nb_metadata] [--clear_cell_metadata] [--clear_outputs]
104 | [--nb_metadata_preserve_mask NB_METADATA_PRESERVE_MASK [NB_METADATA_PRESERVE_MASK ...]]
105 | [--cell_metadata_preserve_mask CELL_METADATA_PRESERVE_MASK [CELL_METADATA_PRESERVE_MASK ...]] [--dont_merge_masks] [--clean_hidden_nbs] [-D] [-V]
106 | [path ...]
107 | 
108 | Clean metadata and execution_count from Jupyter notebooks.
109 | 
110 | positional arguments:
111 |   path                  Path for nb or folder with notebooks.
112 | 
113 | options:
114 |   -h, --help            show this help message and exit
115 |   -s, --silent          Silent mode.
116 |   --not_ec              Do not clear execution_count.
117 |   --not-pt              Do not preserve timestamp.
118 |   --dont_clear_nb_metadata
119 |                         Do not clear notebook metadata.
120 |   --clear_cell_metadata
121 |                         Clear cell metadata.
122 |   --clear_outputs       Clear outputs.
123 |   --nb_metadata_preserve_mask NB_METADATA_PRESERVE_MASK [NB_METADATA_PRESERVE_MASK ...]
124 |                         Preserve mask for notebook metadata.
125 |   --cell_metadata_preserve_mask CELL_METADATA_PRESERVE_MASK [CELL_METADATA_PRESERVE_MASK ...]
126 |                         Preserve mask for cell metadata.
127 |   --dont_merge_masks    Do not merge masks.
128 |   --clean_hidden_nbs    Clean hidden notebooks.
129 |   -D, --dry_run         perform a trial run, don't write results
130 |   -V, --verbose         Verbose mode. Print extra information.
131 | ```
132 | 
133 | ### Execution_count
134 | If you want to leave execution_count add `--not_ec` flag at command line or `args: [--not_ec]` line to `.pre-commit-config.yaml`.
135 | 
136 | ```yaml
137 | repos:
138 |     - repo: https://github.com/ayasyrev/nbmetaclean
139 |         rev: 0.1.1
140 |         hooks:
141 |         - id: nbmetaclean
142 |           args: [ --not_ec ]
143 | ```
144 | 
145 | ```bash
146 | nbmetaclean --not_ec
147 | ```
148 | 
149 | ### Clear outputs
150 | If you want to clear outputs, add `--clear_outputs` at command line or `[ --clean_outputs ]` line to `.pre-commit-config.yaml`.
151 | ```yaml
152 | repos:
153 |     - repo: https://github.com/ayasyrev/nbmetaclean
154 |         rev: 0.1.1
155 |         hooks:
156 |         - id: nbmetaclean
157 |           args: [ --clean_outputs ]
158 | ```
159 | 
160 | ```bash
161 | nbmetaclean --clean_outputs
162 | ```
163 | 
164 | ## Nbcheck
165 | Check Jupyter Notebooks for correct execution_count, errors and (or) warnings in outputs.
166 | 
167 | ### Execution_count
168 | Check that all code cells executed one after another.
169 | 
170 | #### Strict mode
171 | By default, execution_count check in `strict` mode.
172 | All cells must be executed, one after another.
173 | 
174 | pre-commit config example:
175 | ```yaml
176 | repos:
177 |     - repo: https://github.com/ayasyrev/nbmetaclean
178 |         rev: 0.1.1
179 |         hooks:
180 |         - id: nbcheck
181 |           args: [ --ec ]
182 | ```
183 | 
184 | command line example:
185 | ```bash
186 | nbcheck --ec
187 | ```
188 | 
189 | #### Not strict mode
190 | `--not_strict` flag can be used to check that next cell executed after previous one, but execution number can be more than `+1`.
191 | 
192 | pre-commit config example:
193 | ```yaml
194 | repos:
195 |     - repo: https://github.com/ayasyrev/nbmetaclean
196 |         rev: 0.1.1
197 |         hooks:
198 |         - id: nbcheck
199 |           args: [ --ec, --not_strict ]
200 | ```
201 | 
202 | command line example:
203 | ```bash
204 | nbcheck --ec --not_strict
205 | ```
206 | 
207 | #### Allow notebooks with no execution_count
208 | 
209 | `--no_exec` flag allows notebooks with all cells without execution_count.
210 | If notebook has cells with execution_count and without execution_count, pre-commit will return error.
211 | 
212 | pre-commit config example:
213 | ```yaml
214 | repos:
215 |     - repo: https://github.com/ayasyrev/nbmetaclean
216 |         rev: 0.1.1
217 |         - id: nbcheck
218 |           args: [ --ec, --no_exec ]
219 | ```
220 | 
221 | command line example:
222 | ```bash
223 | nbcheck --ec --no_exec
224 | ```
225 | 
226 | 
227 | 
228 | ### Errors and Warnings
229 | 
230 | `--err` and `--warn` flags can be used to check for errors and warnings in outputs.
231 | 
232 | pre-commit config example:
233 | ```yaml
234 | repos:
235 |     - repo: https://github.com/ayasyrev/nbmetaclean
236 |         rev: 0.1.1
237 |         hooks:
238 |         - id: nbcheck
239 |           args: [ --err, --warn ]
240 | ```
241 | 
242 | command line example:
243 | ```bash
244 | nbcheck --err --warn
245 | ```
246 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | hide:
  3 |   - navigation
  4 | ---
  5 | 
  6 | # nbmetaclean
  7 | Collections of python scripts for checking and cleaning Jupyter Notebooks metadata, execution_count and optionally output.
  8 | Can be used as command line tool or pre-commit hook.
  9 | 
 10 | 
 11 | Pure Python, no dependencies.
 12 | 
 13 | Can be used as a pre-commit hook or as a command line tool.
 14 | 
 15 | 
 16 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/benchmark-utils)](https://pypi.org/project/nbmetaclean/)
 17 | [![PyPI Status](https://badge.fury.io/py/nbmetaclean.svg)](https://badge.fury.io/py/nbmetaclean)
 18 | [![Tests](https://github.com/ayasyrev/nbmetaclean/workflows/Tests/badge.svg)](https://github.com/ayasyrev/nbmetaclean/actions?workflow=Tests)  [![Codecov](https://codecov.io/gh/ayasyrev/nbmetaclean/branch/main/graph/badge.svg)](https://codecov.io/gh/ayasyrev/nbmetaclean)
 19 | 
 20 | ## nbmetaclean
 21 | 
 22 | Clean Jupyter Notebooks metadata, execution_count and optionally output.
 23 | 
 24 | ## nbcheck
 25 | Check Jupyter Notebooks for errors and (or) warnings in outputs.
 26 | 
 27 | 
 28 | ## Base usage
 29 | 
 30 | ### Pre-commit hook
 31 | Nbmetaclean can be used as a pre-commit hook, with [pre-commit](https://pre-commit.com/pre-commit)
 32 | You do not need to install nbmetaclean, it will be installed automatically.
 33 | add to `.pre-commit-config.yaml`:
 34 | ```yaml
 35 | repos:
 36 |     - repo: https://github.com/ayasyrev/nbmetaclean
 37 |         rev: 0.1.1
 38 |         hooks:
 39 |         - id: nbmetaclean
 40 |         - id: nbcheck
 41 |           args: [ --ec, --err, --warn ]
 42 | ```
 43 | 
 44 | 
 45 | 
 46 | ### Command line tool
 47 | 
 48 | #### Without install:
 49 | If you use [uv](https://github.com/astral-sh/uv) package manager, you can nbmetaclean without install.
 50 | To clean notebooks:
 51 | ```bash
 52 | uvx nbmetaclean
 53 | ```
 54 | To check notebooks:
 55 | ```bash
 56 | uvx --from nbmetaclean nbcheck --ec --err --warn
 57 | ```
 58 | 
 59 | 
 60 | ####  Install:
 61 | ```bash
 62 | pip install nbmetaclean
 63 | ```
 64 | 
 65 | Usage:
 66 | run `nbmetaclean` or `nbcheck` command with `path` to notebook or folder with notebooks.
 67 | If no `path` is provided, current directory will be used as `path`.
 68 | 
 69 | It is possible to use `nbclean` command instead of `nbmetaclean`.
 70 | `nbmetaclean` will be used by defaults in favour of usage with `uvx`
 71 | 
 72 | 
 73 | 
 74 | ```bash
 75 | nbmetaclean
 76 | ```
 77 | 
 78 | `nbcheck` should be run with flags:
 79 | - `--ec` for execution_count check
 80 | - `--err` for check errors in outputs
 81 | - `--warn` for check warnings in outputs
 82 | ```bash
 83 | nbcheck --ec --err --warn
 84 | ```
 85 | 
 86 | 
 87 | ## Nbmetaclean
 88 | ### Default settings
 89 | By default, the following settings are used:
 90 | 
 91 | - Clean notebook metadata, except `authors` and `language_info / name`.
 92 | - Clean cells execution_count.
 93 | - Preserve metadata at  cells.
 94 | - Preserve cells outputs.
 95 | - After cleaning notebook, timestamp for file will be set to previous values.
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | ### Arguments
103 | Check available arguments:
104 | 
105 | ```bash
106 | nbmetaclean -h
107 | 
108 | usage:  nbmetaclean [-h] [-s] [--not_ec] [--not-pt] [--dont_clear_nb_metadata] [--clear_cell_metadata] [--clear_outputs]
109 | [--nb_metadata_preserve_mask NB_METADATA_PRESERVE_MASK [NB_METADATA_PRESERVE_MASK ...]]
110 | [--cell_metadata_preserve_mask CELL_METADATA_PRESERVE_MASK [CELL_METADATA_PRESERVE_MASK ...]] [--dont_merge_masks] [--clean_hidden_nbs] [-D] [-V]
111 | [path ...]
112 | 
113 | Clean metadata and execution_count from Jupyter notebooks.
114 | 
115 | positional arguments:
116 |   path                  Path for nb or folder with notebooks.
117 | 
118 | options:
119 |   -h, --help            show this help message and exit
120 |   -s, --silent          Silent mode.
121 |   --not_ec              Do not clear execution_count.
122 |   --not-pt              Do not preserve timestamp.
123 |   --dont_clear_nb_metadata
124 |                         Do not clear notebook metadata.
125 |   --clear_cell_metadata
126 |                         Clear cell metadata.
127 |   --clear_outputs       Clear outputs.
128 |   --nb_metadata_preserve_mask NB_METADATA_PRESERVE_MASK [NB_METADATA_PRESERVE_MASK ...]
129 |                         Preserve mask for notebook metadata.
130 |   --cell_metadata_preserve_mask CELL_METADATA_PRESERVE_MASK [CELL_METADATA_PRESERVE_MASK ...]
131 |                         Preserve mask for cell metadata.
132 |   --dont_merge_masks    Do not merge masks.
133 |   --clean_hidden_nbs    Clean hidden notebooks.
134 |   -D, --dry_run         perform a trial run, don't write results
135 |   -V, --verbose         Verbose mode. Print extra information.
136 | ```
137 | 
138 | ### Execution_count
139 | If you want to leave execution_count add `--not_ec` flag at command line or `args: [--not_ec]` line to `.pre-commit-config.yaml`.
140 | 
141 | ```yaml
142 | repos:
143 |     - repo: https://github.com/ayasyrev/nbmetaclean
144 |         rev: 0.1.1
145 |         hooks:
146 |         - id: nbmetaclean
147 |           args: [ --not_ec ]
148 | ```
149 | 
150 | ```bash
151 | nbmetaclean --not_ec
152 | ```
153 | 
154 | ### Clear outputs
155 | If you want to clear outputs, add `--clear_outputs` at command line or `[ --clean_outputs ]` line to `.pre-commit-config.yaml`.
156 | ```yaml
157 | repos:
158 |     - repo: https://github.com/ayasyrev/nbmetaclean
159 |         rev: 0.1.1
160 |         hooks:
161 |         - id: nbmetaclean
162 |           args: [ --clean_outputs ]
163 | ```
164 | 
165 | ```bash
166 | nbmetaclean --clean_outputs
167 | ```
168 | 
169 | ## Nbcheck
170 | Check Jupyter Notebooks for correct execution_count, errors and (or) warnings in outputs.
171 | 
172 | ### Execution_count
173 | Check that all code cells executed one after another.
174 | 
175 | #### Strict mode
176 | By default, execution_count check in `strict` mode.
177 | All cells must be executed, one after another.
178 | 
179 | pre-commit config example:
180 | ```yaml
181 | repos:
182 |     - repo: https://github.com/ayasyrev/nbmetaclean
183 |         rev: 0.1.1
184 |         hooks:
185 |         - id: nbcheck
186 |           args: [ --ec ]
187 | ```
188 | 
189 | command line example:
190 | ```bash
191 | nbcheck --ec
192 | ```
193 | 
194 | #### Not strict mode
195 | `--not_strict` flag can be used to check that next cell executed after previous one, but execution number can be more than `+1`.
196 | 
197 | pre-commit config example:
198 | ```yaml
199 | repos:
200 |     - repo: https://github.com/ayasyrev/nbmetaclean
201 |         rev: 0.1.1
202 |         hooks:
203 |         - id: nbcheck
204 |           args: [ --ec, --not_strict ]
205 | ```
206 | 
207 | command line example:
208 | ```bash
209 | nbcheck --ec --not_strict
210 | ```
211 | 
212 | #### Allow notebooks with no execution_count
213 | 
214 | `--no_exec` flag allows notebooks with all cells without execution_count.
215 | If notebook has cells with execution_count and without execution_count, pre-commit will return error.
216 | 
217 | pre-commit config example:
218 | ```yaml
219 | repos:
220 |     - repo: https://github.com/ayasyrev/nbmetaclean
221 |         rev: 0.1.1
222 |         - id: nbcheck
223 |           args: [ --ec, --no_exec ]
224 | ```
225 | 
226 | command line example:
227 | ```bash
228 | nbcheck --ec --no_exec
229 | ```
230 | 
231 | 
232 | 
233 | ### Errors and Warnings
234 | 
235 | `--err` and `--warn` flags can be used to check for errors and warnings in outputs.
236 | 
237 | pre-commit config example:
238 | ```yaml
239 | repos:
240 |     - repo: https://github.com/ayasyrev/nbmetaclean
241 |         rev: 0.1.1
242 |         hooks:
243 |         - id: nbcheck
244 |           args: [ --err, --warn ]
245 | ```
246 | 
247 | command line example:
248 | ```bash
249 | nbcheck --err --warn
250 | ```
251 | 


--------------------------------------------------------------------------------
/docs/overrides/partials/copyright.html:
--------------------------------------------------------------------------------
 1 | <div class="md-copyright">
 2 |   {% if config.copyright %}
 3 |     <div class="md-copyright__highlight">
 4 |       {{ config.copyright }}
 5 |     </div>
 6 |   {% endif %}
 7 |   {% if not config.extra.generator == false %}
 8 |     Made with
 9 |     <a href="https://github.com/ayasyrev/nbdocs" target="_blank" rel="noopener">
10 |       NbDocs
11 |     </a>
12 |         and
13 |     <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
14 |       Material for MkDocs
15 |     </a>
16 |   {% endif %}
17 | </div>
18 | 


--------------------------------------------------------------------------------
/mkdocs.yaml:
--------------------------------------------------------------------------------
 1 | site_name: Nbmetaclean
 2 | repo_url: https://github.com/ayasyrev/nbmetaclean
 3 | repo_name: nbmetaclean
 4 | docs_dir: docs
 5 | 
 6 | # copyright:
 7 | theme:
 8 |   name: material
 9 |   custom_dir: docs/overrides
10 | 
11 |   palette:
12 |     - scheme: default
13 |       toggle:
14 |         icon: material/toggle-switch-off-outline
15 |         name: Switch to dark mode
16 |     - scheme: slate
17 |       toggle:
18 |         icon: material/toggle-switch
19 |         name: Switch to light mode
20 | markdown_extensions:
21 |   - admonition
22 |   - pymdownx.details
23 |   - pymdownx.superfences
24 | 
25 | extra:
26 |   analytics:
27 |     provider: google
28 |     property: G-0F3FK713C2
29 | copyright: Copyright &copy; 2023-2024 Andrei Yasyrev.
30 | 


--------------------------------------------------------------------------------
/noxfile.py:
--------------------------------------------------------------------------------
 1 | import nox
 2 | 
 3 | 
 4 | nox.options.default_venv_backend = "uv|virtualenv"
 5 | nox.options.reuse_existing_virtualenvs = True
 6 | 
 7 | 
 8 | @nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"])
 9 | def tests(session: nox.Session) -> None:
10 |     args = session.posargs or ["--cov"]
11 |     session.install("-e", ".[test]")
12 |     session.run("pytest", *args)
13 | 


--------------------------------------------------------------------------------
/noxfile_conda.py:
--------------------------------------------------------------------------------
 1 | import nox
 2 | 
 3 | nox.options.default_venv_backend = "mamba|conda"
 4 | nox.options.reuse_existing_virtualenvs = True
 5 | 
 6 | 
 7 | @nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"])
 8 | def conda_tests(session: nox.Session) -> None:
 9 |     args = session.posargs or ["--cov"]
10 |     session.conda_install("uv")
11 |     session.run("uv", "pip", "install", "-e", ".[test]")
12 |     session.run("pytest", *args)
13 | 


--------------------------------------------------------------------------------
/noxfile_conda_lint.py:
--------------------------------------------------------------------------------
 1 | import nox
 2 | 
 3 | locations = "."
 4 | nox.options.default_venv_backend = "mamba|conda"
 5 | nox.options.reuse_existing_virtualenvs = True
 6 | 
 7 | 
 8 | @nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"])
 9 | def conda_lint(session: nox.Session) -> None:
10 |     args = session.posargs or locations
11 |     session.conda_install("ruff")
12 |     session.run("ruff", "check", *args)
13 | 


--------------------------------------------------------------------------------
/noxfile_lint.py:
--------------------------------------------------------------------------------
 1 | import nox
 2 | 
 3 | locations = "."
 4 | nox.options.default_venv_backend = "uv|virtualenv"
 5 | nox.options.reuse_existing_virtualenvs = True
 6 | 
 7 | 
 8 | @nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"])
 9 | def lint(session: nox.Session) -> None:
10 |     args = session.posargs or locations
11 |     session.install("ruff")
12 |     session.run("ruff", "check", *args)
13 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.coverage.paths]
 2 | source = ["src"]
 3 | 
 4 | [tool.coverage.run]
 5 | branch = true
 6 | source = ["nbmetaclean"]
 7 | 
 8 | [tool.coverage.report]
 9 | show_missing = true
10 | 
11 | [tool.ruff]
12 | extend-include = ["*.ipynb"]
13 | indent-width = 4
14 | 
15 | [tool.ruff.lint]
16 | explicit-preview-rules = true
17 | 
18 | [tool.ruff.format]
19 | quote-style = "double"
20 | indent-style = "space"
21 | skip-magic-trailing-comma = false
22 | line-ending = "auto"
23 | 


--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
 1 | black
 2 | black[jupyter]
 3 | coverage[toml]
 4 | flake8
 5 | isort
 6 | mypy
 7 | nox
 8 | pre-commit
 9 | ruff
10 | 


--------------------------------------------------------------------------------
/requirements_test.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-cov
3 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = nbmetaclean
 3 | version = attr: nbmetaclean.version.__version__
 4 | author = Yasyrev Andrei
 5 | author_email = a.yasyrev@gmail.com
 6 | description = Clean jupyter notebooks. Remove metadata and execution counts.
 7 | long_description = file: README.md
 8 | long_description_content_type = text/markdown
 9 | url = https://github.com/ayasyrev/nbmetaclean
10 | license = apache2
11 | classifiers =
12 |     Programming Language :: Python :: 3.8
13 |     Programming Language :: Python :: 3.9
14 |     Programming Language :: Python :: 3.10
15 |     Programming Language :: Python :: 3.11
16 |     Programming Language :: Python :: 3.12
17 |     Programming Language :: Python :: 3.13
18 |     License :: OSI Approved :: Apache Software License
19 |     Operating System :: OS Independent
20 | 
21 | [options]
22 | package_dir =
23 |     = src
24 | packages = find:
25 | python_requires = >=3.8
26 | 
27 | [options.packages.find]
28 | where = src
29 | 
30 | [options.entry_points]
31 | console_scripts =
32 |     nbmetaclean=nbmetaclean.app_clean:app_clean
33 |     nbclean=nbmetaclean.app_clean:app_clean
34 |     nbcheck=nbmetaclean.app_check:app_check
35 | pipx.run =
36 |     nbmetaclean=nbmetaclean.app_clean:app_clean
37 |     nbclean=nbmetaclean.app_clean:app_clean
38 |     nbcheck=nbmetaclean.app_check:app_check
39 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | 
 4 | REQUIREMENTS_TEST_FILENAME = "requirements_test.txt"
 5 | REQUIREMENTS_DEV_FILENAME = "requirements_dev.txt"
 6 | 
 7 | 
 8 | def load_requirements(filename: str) -> list[str]:
 9 |     """Load requirements from file"""
10 |     try:
11 |         with open(filename, encoding="utf-8") as fh:
12 |             return fh.read().splitlines()
13 |     except FileNotFoundError:
14 |         return []
15 | 
16 | 
17 | TEST_REQUIRED = load_requirements(REQUIREMENTS_TEST_FILENAME)
18 | DEV_REQUIRED = load_requirements(REQUIREMENTS_DEV_FILENAME)
19 | 
20 | 
21 | # What packages are optional?
22 | EXTRAS = {
23 |     "test": TEST_REQUIRED,
24 |     "dev": DEV_REQUIRED + TEST_REQUIRED,
25 | }
26 | 
27 | 
28 | setup(
29 |     extras_require=EXTRAS,
30 | )
31 | 


--------------------------------------------------------------------------------
/src/nbmetaclean/__init__.py:
--------------------------------------------------------------------------------
 1 | from .check import check_nb_ec, check_nb_errors
 2 | from .clean import clean_nb_file, CleanConfig, clean_nb
 3 | from .helpers import read_nb, write_nb, get_nb_names, get_nb_names_from_list
 4 | 
 5 | 
 6 | __all__ = [
 7 |     "get_nb_names",
 8 |     "get_nb_names_from_list",
 9 |     "check_nb_ec",
10 |     "check_nb_errors",
11 |     "clean_nb",
12 |     "clean_nb_file",
13 |     "CleanConfig",
14 |     "read_nb",
15 |     "write_nb",
16 | ]
17 | 


--------------------------------------------------------------------------------
/src/nbmetaclean/app_check.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import argparse
  4 | from pathlib import Path
  5 | import sys
  6 | 
  7 | from nbmetaclean.check import check_nb_ec, check_nb_errors, check_nb_warnings
  8 | from nbmetaclean.helpers import get_nb_names_from_list, read_nb
  9 | from nbmetaclean.version import __version__
 10 | 
 11 | 
 12 | parser = argparse.ArgumentParser(
 13 |     prog="nbcheck",
 14 |     description="Check Jupyter notebooks for correct sequence of execution_count and (or) errors in outputs.",
 15 | )
 16 | parser.add_argument(
 17 |     "path",
 18 |     default=".",
 19 |     nargs="*",
 20 |     help="Path for nb or folder with notebooks.",
 21 | )
 22 | parser.add_argument(
 23 |     "--ec",
 24 |     action="store_true",
 25 |     help="Check execution_count.",
 26 | )
 27 | parser.add_argument(
 28 |     "--err",
 29 |     action="store_true",
 30 |     help="Check errors in outputs.",
 31 | )
 32 | parser.add_argument(
 33 |     "--warn",
 34 |     action="store_true",
 35 |     help="Check warnings in outputs.",
 36 | )
 37 | parser.add_argument(
 38 |     "--not_strict",
 39 |     action="store_true",
 40 |     help="Not strict mode.",
 41 | )
 42 | parser.add_argument(
 43 |     "--no_exec",
 44 |     action="store_true",
 45 |     help="Ignore notebooks with all code cells without execution_count.",
 46 | )
 47 | parser.add_argument(
 48 |     "-V",
 49 |     "--verbose",
 50 |     action="store_true",
 51 |     help="Verbose mode. Print extra information.",
 52 | )
 53 | parser.add_argument(
 54 |     "-v",
 55 |     "--version",
 56 |     action="store_true",
 57 |     help="Print version information.",
 58 | )
 59 | 
 60 | 
 61 | def print_error(
 62 |     nbs: list[Path],
 63 |     message: str,
 64 | ) -> None:
 65 |     """Print error message."""
 66 |     print(f"{len(nbs)} notebooks with {message}:")
 67 |     for nb in nbs:
 68 |         print("- ", nb)
 69 | 
 70 | 
 71 | def print_results(
 72 |     wrong_ec: list[Path],
 73 |     nb_errors: list[Path],
 74 |     nb_warnings: list[Path],
 75 |     read_error: list[Path],
 76 | ) -> None:
 77 |     """Print results."""
 78 |     if wrong_ec:
 79 |         print_error(wrong_ec, "wrong execution_count")
 80 |     if nb_errors:
 81 |         print_error(nb_errors, "errors in outputs")
 82 |     if nb_warnings:
 83 |         print_error(nb_warnings, "warnings in outputs")
 84 |     if read_error:
 85 |         print_error(read_error, "read error")
 86 | 
 87 | 
 88 | def app_check() -> None:
 89 |     """Check notebooks for correct sequence of execution_count and errors in outputs."""
 90 |     cfg = parser.parse_args()
 91 | 
 92 |     if cfg.version:
 93 |         print(f"nbcheck from nbmetaclean, version: {__version__}")
 94 |         sys.exit(0)
 95 | 
 96 |     if not cfg.ec and not cfg.err and not cfg.warn:
 97 |         print(
 98 |             "No checks are selected. Please select at least one check: "
 99 |             "--ec (for execution_count) or "
100 |             "--err (for errors in outputs) or "
101 |             "--warn (for warnings in outputs)."
102 |         )
103 |         sys.exit(1)
104 | 
105 |     nb_files = get_nb_names_from_list(cfg.path)
106 |     read_error: list[Path] = []
107 |     if cfg.verbose:
108 |         print(f"Checking {len(nb_files)} notebooks.")
109 | 
110 |     wrong_ec: list[Path] = []
111 |     nb_errors: list[Path] = []
112 |     nb_warnings: list[Path] = []
113 |     for nb_name in nb_files:
114 |         nb = read_nb(nb_name)
115 |         if nb is None:
116 |             read_error.append(nb_name)
117 |             continue
118 | 
119 |         if cfg.ec and not check_nb_ec(nb, not cfg.not_strict, cfg.no_exec):
120 |             wrong_ec.append(nb_name)
121 | 
122 |         if cfg.err and not check_nb_errors(nb):
123 |             nb_errors.append(nb_name)
124 | 
125 |         if cfg.warn and not check_nb_warnings(nb):
126 |             nb_warnings.append(nb_name)
127 | 
128 |     print_results(wrong_ec, nb_errors, nb_warnings, read_error)
129 | 
130 |     if wrong_ec or nb_errors or nb_warnings or read_error:
131 |         sys.exit(1)
132 | 
133 | 
134 | if __name__ == "__main__":  # pragma: no cover
135 |     app_check()
136 | 


--------------------------------------------------------------------------------
/src/nbmetaclean/app_clean.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import argparse
  4 | import sys
  5 | from pathlib import Path
  6 | from typing import Union
  7 | 
  8 | from nbmetaclean.clean import CleanConfig, TupleStr, clean_nb_file
  9 | from nbmetaclean.helpers import get_nb_names_from_list
 10 | from nbmetaclean.version import __version__
 11 | 
 12 | 
 13 | parser = argparse.ArgumentParser(
 14 |     prog="nbmetaclean",
 15 |     description="Clean metadata and execution_count from Jupyter notebooks.",
 16 | )
 17 | parser.add_argument(
 18 |     "path",
 19 |     default=".",
 20 |     nargs="*",
 21 |     help="Path for nb or folder with notebooks.",
 22 | )
 23 | parser.add_argument(
 24 |     "-s",
 25 |     "--silent",
 26 |     action="store_true",
 27 |     help="Silent mode.",
 28 | )
 29 | parser.add_argument(
 30 |     "--not_ec",
 31 |     action="store_false",
 32 |     help="Do not clear execution_count.",
 33 | )
 34 | parser.add_argument(
 35 |     "--not-pt",
 36 |     action="store_true",
 37 |     help="Do not preserve timestamp.",
 38 | )
 39 | parser.add_argument(
 40 |     "--dont_clear_nb_metadata",
 41 |     action="store_true",
 42 |     help="Do not clear notebook metadata.",
 43 | )
 44 | parser.add_argument(
 45 |     "--clear_cell_metadata",
 46 |     action="store_true",
 47 |     help="Clear cell metadata.",
 48 | )
 49 | parser.add_argument(
 50 |     "--clear_outputs",
 51 |     action="store_true",
 52 |     help="Clear outputs.",
 53 | )
 54 | parser.add_argument(
 55 |     "--nb_metadata_preserve_mask",
 56 |     nargs="+",
 57 |     help="Preserve mask for notebook metadata.",
 58 | )
 59 | parser.add_argument(
 60 |     "--cell_metadata_preserve_mask",
 61 |     nargs="+",
 62 |     help="Preserve mask for cell metadata.",
 63 | )
 64 | parser.add_argument(
 65 |     "--dont_merge_masks",
 66 |     action="store_true",
 67 |     help="Do not merge masks.",
 68 | )
 69 | parser.add_argument(
 70 |     "--clean_hidden_nbs",
 71 |     action="store_true",
 72 |     help="Clean hidden notebooks.",
 73 | )
 74 | parser.add_argument(
 75 |     "-D",
 76 |     "--dry_run",
 77 |     action="store_true",
 78 |     help="perform a trial run, don't write results",
 79 | )
 80 | parser.add_argument(
 81 |     "-V",
 82 |     "--verbose",
 83 |     action="store_true",
 84 |     help="Verbose mode. Print extra information.",
 85 | )
 86 | parser.add_argument(
 87 |     "-v",
 88 |     "--version",
 89 |     action="store_true",
 90 |     help="Print version information.",
 91 | )
 92 | 
 93 | 
 94 | def process_mask(mask: Union[list[str], None]) -> Union[tuple[TupleStr, ...], None]:
 95 |     if mask is None:
 96 |         return None
 97 |     return tuple(tuple(item.split(".")) for item in mask)
 98 | 
 99 | 
100 | def print_result(
101 |     cleaned: list[Path],
102 |     errors: list[Path],
103 |     clean_config: CleanConfig,
104 |     path: list[str],
105 |     num_nbs: int,
106 | ) -> None:
107 |     if clean_config.verbose:
108 |         print(
109 |             f"Path: {', '.join(path)}, preserve timestamp: {clean_config.preserve_timestamp}"
110 |         )
111 |         print(f"checked: {num_nbs} notebooks")
112 |     if cleaned:
113 |         if len(cleaned) == 1:
114 |             print(f"cleaned: {cleaned[0]}")
115 |         else:
116 |             print(f"cleaned: {len(cleaned)} notebooks")
117 |             for nb in cleaned:
118 |                 print("- ", nb)
119 |     if errors:
120 |         print(f"with errors: {len(errors)}")
121 |         for nb in errors:
122 |             print("- ", nb)
123 | 
124 | 
125 | def app_clean() -> None:
126 |     """Clean metadata and execution_count from Jupyter notebook."""
127 |     cfg = parser.parse_args()
128 | 
129 |     if cfg.version:
130 |         print(f"nbmetaclean version: {__version__}")
131 |         sys.exit(0)
132 | 
133 |     clean_config = CleanConfig(
134 |         clear_nb_metadata=not cfg.dont_clear_nb_metadata,
135 |         clear_cell_metadata=cfg.clear_cell_metadata,
136 |         clear_execution_count=cfg.not_ec,
137 |         clear_outputs=cfg.clear_outputs,
138 |         preserve_timestamp=not cfg.not_pt,
139 |         silent=cfg.silent,
140 |         nb_metadata_preserve_mask=process_mask(cfg.nb_metadata_preserve_mask),
141 |         cell_metadata_preserve_mask=process_mask(cfg.cell_metadata_preserve_mask),
142 |         mask_merge=not cfg.dont_merge_masks,
143 |         dry_run=cfg.dry_run,
144 |         verbose=cfg.verbose if not cfg.silent else False,
145 |     )
146 |     path_list: list[str] = cfg.path if isinstance(cfg.path, list) else [cfg.path]
147 |     nb_files = get_nb_names_from_list(path_list, hidden=cfg.clean_hidden_nbs)
148 | 
149 |     cleaned, errors = clean_nb_file(
150 |         nb_files,
151 |         clean_config,
152 |     )
153 |     # print(cfg)
154 |     if cfg.path == ".":  # if running without arguments add some info.
155 |         if not nb_files:
156 |             print("No notebooks found at current directory.")
157 |             sys.exit(0)
158 |         elif not cfg.silent and not cleaned and not errors:
159 |             print(f"Checked: {len(nb_files)} notebooks. All notebooks are clean.")
160 | 
161 |     if not cfg.silent:
162 |         print_result(cleaned, errors, clean_config, path_list, len(nb_files))
163 | 
164 | 
165 | if __name__ == "__main__":  # pragma: no cover
166 |     app_clean()
167 | 


--------------------------------------------------------------------------------
/src/nbmetaclean/check.py:
--------------------------------------------------------------------------------
 1 | from typing import cast
 2 | from .nb_types import CodeCell, Nb
 3 | 
 4 | 
 5 | __all__ = ["check_nb_ec", "check_nb_errors"]
 6 | 
 7 | 
 8 | def check_nb_ec(nb: Nb, strict: bool = True, no_exec: bool = False) -> bool:
 9 |     """Check nb for correct sequence of execution_count.
10 |     Expecting all code cells executed one after another.
11 |     If `strict` is False, check that next cell executed after previous one, number can be more than `+1`
12 |     If `no_exec` is True, ignore notebooks with all code cells without execution_count.
13 | 
14 |     Args:
15 |         nb (Nb): Notebook to check.
16 |         strict (bool, optional): Strict mode. Defaults to True.
17 |         no_exec (bool): Ignore notebooks with all code cells without execution_count.
18 | 
19 |     Returns:
20 |         bool: True if correct.
21 |     """
22 | 
23 |     current = 0
24 |     no_exec_cells = 0
25 |     for cell in nb["cells"]:
26 |         if cell["cell_type"] == "code":
27 |             cell = cast(CodeCell, cell)
28 |             if not cell["source"]:
29 |                 if cell[
30 |                     "execution_count"
31 |                 ]:  # if cell without code but with execution_count
32 |                     return False
33 |                 continue
34 | 
35 |             if not cell["execution_count"]:
36 |                 if not no_exec:
37 |                     return False
38 |                 else:
39 |                     no_exec_cells += 1
40 |             else:
41 |                 if cell["execution_count"] != current + 1 and strict:
42 |                     return False
43 |                 if cell["execution_count"] <= current:
44 |                     return False
45 |                 current = cell["execution_count"]
46 |     if no_exec_cells and current:  # if we got not executed cells and executed.
47 |         return False
48 |     return True
49 | 
50 | 
51 | def check_nb_errors(nb: Nb) -> bool:
52 |     """Check nb for cells with errors.
53 | 
54 |     Args:
55 |         nb (Nb): Notebook to check.
56 | 
57 |     Returns:
58 |         bool: True if no errors.
59 |     """
60 |     for cell in nb["cells"]:
61 |         if cell["cell_type"] == "code" and "outputs" in cell:
62 |             cell = cast(CodeCell, cell)
63 |             for output in cell["outputs"]:
64 |                 if output["output_type"] == "error":
65 |                     return False
66 |     return True
67 | 
68 | 
69 | def check_nb_warnings(nb: Nb) -> bool:
70 |     """Check nb for cells with warnings.
71 | 
72 |     Args:
73 |         nb (Nb): Notebook to check.
74 | 
75 |     Returns:
76 |         bool: True if no warnings.
77 |     """
78 |     for cell in nb["cells"]:
79 |         if cell["cell_type"] == "code" and "outputs" in cell:
80 |             cell = cast(CodeCell, cell)
81 |             for output in cell["outputs"]:
82 |                 if output["output_type"] == "stream" and output["name"] == "stderr":
83 |                     return False
84 |     return True
85 | 


--------------------------------------------------------------------------------
/src/nbmetaclean/clean.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import copy
  4 | from dataclasses import dataclass
  5 | from pathlib import Path
  6 | from typing import Optional, Tuple, Union
  7 | 
  8 | from .helpers import read_nb, write_nb
  9 | 
 10 | from .nb_types import Cell, CodeCell, Metadata, Nb, Output
 11 | 
 12 | 
 13 | __all__ = [
 14 |     "CleanConfig",
 15 |     "clean_cell",
 16 |     "clean_nb",
 17 |     "clean_nb_file",
 18 |     "clean_outputs",
 19 |     "filter_metadata",
 20 |     "filter_meta_mask",
 21 |     "NB_METADATA_PRESERVE_MASKS",
 22 |     "TupleStr",
 23 | ]
 24 | 
 25 | TupleStr = Tuple[str, ...]
 26 | 
 27 | NB_METADATA_PRESERVE_MASKS = (
 28 |     ("language_info", "name"),
 29 |     ("authors",),
 30 | )
 31 | 
 32 | 
 33 | @dataclass
 34 | class CleanConfig:
 35 |     """Clean config.
 36 | 
 37 |     Args:
 38 |         clear_nb_metadata (bool, optional): Clear notebook metadata. Defaults to True.
 39 |         clear_cell_metadata (bool, optional): Clear cell metadata. Defaults to False.
 40 |         clear_execution_count (bool, optional): Clear cell execution count. Defaults to True.
 41 |         clear_outputs (bool, optional): Clear cell outputs. Defaults to False.
 42 |         preserve_timestamp (bool, optional): Preserve timestamp. Defaults to True.
 43 |         silent (bool, optional): Silent mode. Defaults to False.
 44 |         nb_metadata_preserve_mask (Optional[tuple[str, ...]], optional):
 45 |             Preserve mask for notebook metadata. Defaults to None.
 46 |         cell_metadata_preserve_mask (Optional[tuple[str, ...]], optional):
 47 |             Preserve mask for cell metadata. Defaults to None.
 48 |         mask_merge (bool): Merge masks. Add new mask to default.
 49 |             If False - use new mask. Defaults to True.
 50 |         dry_run (bool): perform a trial run, don't write results. Defaults to False.
 51 |         verbose (bool): Verbose mode. Print extra information. Defaults to False.
 52 |     """
 53 | 
 54 |     clear_nb_metadata: bool = True
 55 |     clear_cell_metadata: bool = False
 56 |     clear_execution_count: bool = True
 57 |     clear_outputs: bool = False
 58 |     preserve_timestamp: bool = True
 59 |     silent: bool = False
 60 |     nb_metadata_preserve_mask: Optional[tuple[TupleStr, ...]] = None
 61 |     cell_metadata_preserve_mask: Optional[tuple[TupleStr, ...]] = None
 62 |     mask_merge: bool = True
 63 |     dry_run: bool = False
 64 |     verbose: bool = False
 65 | 
 66 | 
 67 | def filter_meta_mask(
 68 |     nb_meta: Union[str, int, Metadata],
 69 |     mask: Optional[tuple[str, ...]] = None,
 70 | ) -> Union[str, int, Metadata]:
 71 |     """Filter metadata by mask. If no mask return empty dict."""
 72 |     if isinstance(nb_meta, (str, int)) or mask == ():
 73 |         return nb_meta
 74 |     if mask is None:
 75 |         return {}
 76 |     new_meta = {}
 77 |     value = nb_meta.get(mask[0])
 78 |     if value is not None:
 79 |         new_mask = tuple(mask[1:])
 80 |         new_meta[mask[0]] = filter_meta_mask(value, new_mask) or value
 81 |     return new_meta
 82 | 
 83 | 
 84 | def filter_metadata(
 85 |     nb_meta: Metadata,
 86 |     masks: Optional[tuple[TupleStr, ...]] = None,
 87 | ) -> Metadata:
 88 |     """Clean notebooknode metadata."""
 89 |     if masks is None:
 90 |         return {}
 91 |     filtered_meta: Metadata = {}
 92 |     for mask in masks:
 93 |         filtered_meta.update(filter_meta_mask(nb_meta, mask))  # type: ignore
 94 |     return filtered_meta
 95 | 
 96 | 
 97 | def clean_cell(
 98 |     cell: Cell | CodeCell,
 99 |     cfg: CleanConfig,
100 | ) -> bool:
101 |     """Clean cell: optionally metadata, execution_count and outputs."""
102 |     changed = False
103 | 
104 |     if cfg.clear_cell_metadata:
105 |         if cell.get("metadata", None):
106 |             metadata = cell["metadata"]
107 |             old_metadata = copy.deepcopy(metadata)
108 |             cell["metadata"] = filter_metadata(
109 |                 metadata, cfg.cell_metadata_preserve_mask
110 |             )
111 |             if cell["metadata"] != old_metadata:
112 |                 changed = True
113 | 
114 |     if cell["cell_type"] == "code":
115 |         if cfg.clear_execution_count and cell.get("execution_count"):
116 |             cell["execution_count"] = None  # type: ignore # it's code cell
117 |             changed = True
118 | 
119 |         if cell.get("outputs"):
120 |             if cfg.clear_outputs:
121 |                 cell["outputs"] = []  # type: ignore  # it's code cell
122 |                 changed = True
123 |             elif cfg.clear_cell_metadata or cfg.clear_execution_count:
124 |                 result = clean_outputs(cell["outputs"], cfg)  # type: ignore # it's code cell
125 |                 if result:
126 |                     changed = True
127 | 
128 |     return changed
129 | 
130 | 
131 | def clean_outputs(outputs: list[Output], cfg: CleanConfig) -> bool:
132 |     """Clean outputs."""
133 |     changed = False
134 |     for output in outputs:
135 |         if cfg.clear_execution_count and output.get("execution_count", None):
136 |             output["execution_count"] = None
137 |             changed = True
138 |         if cfg.clear_cell_metadata and (metadata := output.get("metadata", None)):
139 |             old_metadata = copy.deepcopy(metadata)
140 |             output["metadata"] = filter_metadata(
141 |                 metadata, cfg.cell_metadata_preserve_mask
142 |             )
143 |             if output["metadata"] != old_metadata:
144 |                 changed = True
145 |     return changed
146 | 
147 | 
148 | def clean_nb(
149 |     nb: Nb,
150 |     cfg: CleanConfig,
151 | ) -> bool:
152 |     """Clean notebook - metadata, execution_count, outputs.
153 | 
154 |     Args:
155 |         nb (Notebook): Notebook to clean.
156 |         clear_execution_count (bool, optional): Clear execution_count. Defaults to True.
157 |         clear_outputs (bool, optional): Clear outputs. Defaults to False.
158 | 
159 |     Returns:
160 |         bool: True if changed.
161 |     """
162 |     changed = False
163 |     if cfg.clear_nb_metadata and (metadata := nb.get("metadata")):
164 |         old_metadata = copy.deepcopy(metadata)
165 |         if cfg.nb_metadata_preserve_mask:
166 |             if not cfg.mask_merge:
167 |                 masks = cfg.nb_metadata_preserve_mask
168 |             else:
169 |                 masks = cfg.nb_metadata_preserve_mask + NB_METADATA_PRESERVE_MASKS
170 |         else:
171 |             masks = NB_METADATA_PRESERVE_MASKS
172 |         nb["metadata"] = filter_metadata(metadata, masks=masks)
173 |         if nb["metadata"] != old_metadata:
174 |             changed = True
175 |     if cfg.clear_cell_metadata or cfg.clear_execution_count or cfg.clear_outputs:
176 |         for cell in nb["cells"]:
177 |             result = clean_cell(
178 |                 cell,
179 |                 cfg,
180 |             )
181 |             if result:
182 |                 changed = True
183 | 
184 |     return changed
185 | 
186 | 
187 | def clean_nb_file(
188 |     path: Union[Path, list[Path]],
189 |     cfg: Optional[CleanConfig] = None,
190 | ) -> tuple[list[Path], list[Path]]:
191 |     """Clean metadata and execution count from notebook.
192 | 
193 |     Args:
194 |         path (Union[str, PosixPath]): Notebook filename or list of names.
195 |         cfg (CleanConfig, optional): Config for job, if None, used default settings. Default is None.
196 | 
197 |     Returns:
198 |         tuple[List[Path], List[TuplePath]]: List of cleaned notebooks, list of notebooks with errors.
199 |     """
200 |     cfg = cfg or CleanConfig()
201 |     if not isinstance(path, list):
202 |         path = [path]
203 |     cleaned: list[Path] = []
204 |     errors: list[Path] = []
205 |     for filename in path:
206 |         nb = read_nb(filename)
207 |         if nb is None:
208 |             errors.append(filename)
209 |             continue
210 |         result = clean_nb(
211 |             nb,
212 |             cfg,
213 |         )
214 |         if result:
215 |             cleaned.append(filename)
216 |             if cfg.dry_run:
217 |                 continue
218 |             if cfg.preserve_timestamp:
219 |                 stat = filename.stat()
220 |                 timestamp = (stat.st_atime, stat.st_mtime)
221 |             else:
222 |                 timestamp = None
223 |             write_nb(nb, filename, timestamp)
224 | 
225 |     return cleaned, errors
226 | 


--------------------------------------------------------------------------------
/src/nbmetaclean/helpers.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import json
  4 | import os
  5 | from pathlib import Path
  6 | from typing import Optional
  7 | 
  8 | from .nb_types import Nb, PathOrStr
  9 | 
 10 | __all__ = [
 11 |     "get_nb_names",
 12 |     "get_nb_names_from_list",
 13 |     "is_notebook",
 14 |     "read_nb",
 15 |     "write_nb",
 16 | ]
 17 | 
 18 | 
 19 | def read_nb(path: PathOrStr) -> Nb | None:
 20 |     """Read notebook from filename.
 21 |     If file does not exist or is not a valid notebook, return None.
 22 |     Args:
 23 |         path (Union[str, PosixPath): Notebook filename.
 24 | 
 25 |     Returns:
 26 |         Notebook Union[None, Notebook]: Jupyter Notebook as dict or None if not valid or does not exist.
 27 |     """
 28 |     nb_path = Path(path)
 29 |     if not nb_path.exists() or not nb_path.is_file():
 30 |         return None
 31 |     try:
 32 |         nb = json.load(open(nb_path, "r", encoding="utf-8"))
 33 |         return nb
 34 |     except Exception:
 35 |         return None
 36 | 
 37 | 
 38 | def write_nb(
 39 |     nb: Nb,
 40 |     path: PathOrStr,
 41 |     timestamp: Optional[tuple[float, float]] = None,
 42 | ) -> Path:
 43 |     """Write notebook to file, optionally set timestamp.
 44 | 
 45 |     Args:
 46 |         nb (Notebook): Notebook to write
 47 |         path (Union[str, PosixPath]): filename to write
 48 |         timestamp (Optional[tuple[float, float]]): timestamp to set, (st_atime, st_mtime) defaults to None
 49 |     Returns:
 50 |         Path: Filename of written notebook.
 51 |     """
 52 |     filename = Path(path)
 53 |     if filename.suffix != ".ipynb":
 54 |         filename = filename.with_suffix(".ipynb")
 55 |     with filename.open("w", encoding="utf-8") as fh:
 56 |         fh.write(
 57 |             json.dumps(
 58 |                 nb,
 59 |                 indent=1,
 60 |                 separators=(",", ": "),
 61 |                 ensure_ascii=False,
 62 |                 sort_keys=True,
 63 |             )
 64 |             + "\n",
 65 |         )
 66 |     if timestamp is not None:
 67 |         os.utime(filename, timestamp)
 68 |     return filename
 69 | 
 70 | 
 71 | def is_notebook(path: Path, hidden: bool = False) -> bool:
 72 |     """Check if `path` is a notebook and not hidden. If `hidden` is True check also hidden files.
 73 | 
 74 |     Args:
 75 |         path (Union[Path, str]): Path to check.
 76 |         hidden bool: If True also check hidden files, defaults to False.
 77 | 
 78 |     Returns:
 79 |         bool: True if `path` is a notebook and not hidden.
 80 |     """
 81 |     if path.suffix == ".ipynb":
 82 |         if path.name.startswith(".") and not hidden:
 83 |             return False
 84 |         return True
 85 |     return False
 86 | 
 87 | 
 88 | def get_nb_names(
 89 |     path: Optional[PathOrStr] = None,
 90 |     recursive: bool = True,
 91 |     hidden: bool = False,
 92 | ) -> list[Path]:
 93 |     """Return list of notebooks from `path`. If no `path` return notebooks from current folder.
 94 | 
 95 |     Args:
 96 |         path (Union[Path, str, None]): Path for nb or folder with notebooks.
 97 |         recursive bool: Recursive search.
 98 |         hidden bool: Skip or not hidden paths, defaults to False.
 99 | 
100 |     Raises:
101 |         sys.exit: If filename or dir not exists or not nb file.
102 | 
103 |     Returns:
104 |         List[Path]: List of notebooks names.
105 |     """
106 |     nb_path = Path(path or ".")
107 | 
108 |     if not nb_path.exists():
109 |         raise FileNotFoundError(f"{nb_path} not exists!")
110 | 
111 |     if nb_path.is_file():
112 |         if is_notebook(nb_path, hidden):
113 |             return [nb_path]
114 | 
115 |     if nb_path.is_dir():
116 |         result = []
117 |         for item in nb_path.iterdir():
118 |             if item.is_file() and is_notebook(item, hidden):
119 |                 result.append(item)
120 |             if item.is_dir() and recursive:
121 |                 if item.name.startswith(".") and not hidden:
122 |                     continue
123 |                 if "checkpoint" in item.name:
124 |                     continue
125 |                 result.extend(get_nb_names(item, recursive, hidden))
126 | 
127 |         return result
128 | 
129 |     return []
130 | 
131 | 
132 | def get_nb_names_from_list(
133 |     path_list: list[PathOrStr] | PathOrStr,
134 |     recursive: bool = True,
135 |     hidden: bool = False,
136 | ) -> list[Path]:
137 |     """Return list of notebooks from `path_list`.
138 | 
139 |     Args:
140 |         path_list (Union[Path, str, None]): Path for nb or folder with notebooks.
141 |         recursive (bool): Recursive search.
142 |         hidden (bool): Skip or not hidden paths, defaults to False.
143 | 
144 |     Returns:
145 |         List[Path]: List of notebooks names.
146 |     """
147 |     path_list = [path_list] if isinstance(path_list, (str, Path)) else path_list
148 |     nb_files: list[Path] = []
149 |     for path in path_list:
150 |         if Path(path).exists():
151 |             nb_files.extend(get_nb_names(path, recursive, hidden))
152 |         else:
153 |             print(f"{path} not exists!")
154 | 
155 |     return nb_files
156 | 


--------------------------------------------------------------------------------
/src/nbmetaclean/nb_types.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path, PosixPath
 2 | from typing import Dict, List, Literal, Optional, TypedDict, TypeVar, Union
 3 | 
 4 | 
 5 | __all__ = [
 6 |     "Cell",
 7 |     "CodeCell",
 8 |     "DisplayData",
 9 |     "Error",
10 |     "ExecuteResult",
11 |     "Metadata",
12 |     "MultilineText",
13 |     "Nb",
14 |     "NbMetadata",
15 |     "NbNode",
16 |     "PathOrStr",
17 |     "Output",
18 |     "Stream",
19 | ]
20 | 
21 | PathOrStr = TypeVar("PathOrStr", Path, PosixPath, str)
22 | 
23 | NbNode = Dict[str, Union[str, int, "NbNode"]]
24 | Metadata = Dict[str, Union[str, int, "Metadata"]]
25 | MultilineText = Union[str, List[str]]
26 | 
27 | 
28 | class NbMetadata(TypedDict):
29 |     language_info: Metadata
30 |     kernelspec: Metadata
31 |     authors: Metadata
32 | 
33 | 
34 | class Output(TypedDict):
35 |     output_type: Literal[
36 |         "execute_result",
37 |         "display_data",
38 |         "stream",
39 |         "error",
40 |     ]
41 |     execution_count: Optional[int]
42 |     metadata: Metadata
43 | 
44 | 
45 | class ExecuteResult(Output):  # output_type = "execute_result"
46 |     data: Dict[str, MultilineText]
47 | 
48 | 
49 | class DisplayData(Output):  # output_type = "display_data"
50 |     data: Dict[str, MultilineText]  # fix it - mimebundle
51 | 
52 | 
53 | class Stream(Output):  # output_type = "stream"
54 |     name: Literal["stdout", "stderr"]  # "The name of the stream (stdout, stderr)."
55 |     text: MultilineText
56 | 
57 | 
58 | class Error(Output):  # output_type = "error"
59 |     ename: str  # "The name of the error."
60 |     evalue: str  # "The value, or message, of the error."
61 |     traceback: List[str]
62 | 
63 | 
64 | class Cell(TypedDict):
65 |     """Notebook cell base."""
66 | 
67 |     id: int  # from nbformat 4.5
68 |     cell_type: Literal["code", "markdown", "raw"]
69 |     metadata: Metadata
70 |     source: MultilineText
71 |     attachments: Optional[Dict[str, MultilineText]]
72 | 
73 | 
74 | class CodeCell(Cell):  # cell_type = "code"
75 |     """Code cell."""
76 | 
77 |     outputs: List[Output]
78 |     execution_count: Optional[int]
79 | 
80 | 
81 | class Nb(TypedDict):
82 |     """Notebook."""
83 | 
84 |     nbformat: int
85 |     nbformat_minor: int
86 |     cells: List[Cell]
87 |     metadata: Metadata
88 | 


--------------------------------------------------------------------------------
/src/nbmetaclean/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.4"  # pragma: no cover
2 | 
3 | __all__ = ["__version__"]  # pragma: no cover
4 | 


--------------------------------------------------------------------------------
/tests/test_app_check.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from pathlib import Path
  4 | import subprocess
  5 | 
  6 | import pytest
  7 | 
  8 | from nbmetaclean.helpers import read_nb, write_nb
  9 | from nbmetaclean.version import __version__
 10 | 
 11 | 
 12 | def run_app(
 13 |     nb_path: Path,
 14 |     args: list[str] = [],
 15 | ) -> tuple[str, str]:
 16 |     """run app"""
 17 |     run_result = subprocess.run(
 18 |         ["python", "-m", "nbmetaclean.app_check", str(nb_path), *args],
 19 |         capture_output=True,
 20 |         check=False,
 21 |     )
 22 |     return run_result.stdout.decode("utf-8"), run_result.stderr.decode("utf-8")
 23 | 
 24 | 
 25 | example_nbs_path = Path("tests/test_nbs")
 26 | nb_name = "test_nb_3_ec.ipynb"
 27 | 
 28 | 
 29 | def test_run_script():
 30 |     """test run script"""
 31 |     app_path = Path("src/nbmetaclean/app_check.py")
 32 |     run_result = subprocess.run(
 33 |         ["python", app_path, "-h"], capture_output=True, check=False
 34 |     )
 35 |     assert run_result.returncode == 0
 36 |     res_out = run_result.stdout.decode("utf-8")
 37 |     assert res_out.startswith(
 38 |         "usage: nbcheck [-h] [--ec] [--err] [--warn] [--not_strict] [--no_exec]"
 39 |     )
 40 |     res_err = run_result.stderr.decode("utf-8")
 41 |     assert not res_err
 42 | 
 43 | 
 44 | def test_check_nb_ec(tmp_path: Path):
 45 |     """test check `--ec`"""
 46 |     # base notebook - no execution_count
 47 | 
 48 |     test_nb = read_nb(example_nbs_path / nb_name)
 49 |     test_nb_path = tmp_path / nb_name
 50 |     write_nb(test_nb, test_nb_path)
 51 | 
 52 |     # check if no args
 53 |     res_out, res_err = run_app(test_nb_path, [])
 54 |     assert res_out.startswith(
 55 |         "No checks are selected. Please select at least one check: "
 56 |         "--ec (for execution_count) or --err (for errors in outputs) or "
 57 |         "--warn (for warnings in outputs)."
 58 |     )
 59 |     assert not res_err
 60 | 
 61 |     # default execution_count
 62 |     res_out, res_err = run_app(test_nb_path, ["--ec"])
 63 |     assert res_out.startswith("1 notebooks with wrong execution_count:\n")
 64 |     assert res_out.endswith("test_nb_3_ec.ipynb\n")
 65 |     assert not res_err
 66 | 
 67 |     # `-V` option
 68 |     res_out, res_err = run_app(test_nb_path, ["--ec", "-V"])
 69 |     assert res_out.startswith("Checking 1 notebooks.\n")
 70 |     assert not res_err
 71 | 
 72 |     # check with `no_exec` option
 73 |     res_out, res_err = run_app(test_nb_path, ["--ec", "--no_exec"])
 74 |     assert not res_out
 75 |     assert not res_err
 76 | 
 77 |     # set correct execution_count
 78 |     test_nb["cells"][2]["execution_count"] = 1
 79 |     test_nb["cells"][3]["execution_count"] = 2
 80 |     test_nb["cells"][5]["execution_count"] = 3
 81 |     write_nb(test_nb, test_nb_path)
 82 | 
 83 |     res_out, res_err = run_app(test_nb_path, ["--ec"])
 84 |     assert not res_out
 85 |     assert not res_err
 86 | 
 87 |     # test strict
 88 |     test_nb["cells"][5]["execution_count"] = 4
 89 |     write_nb(test_nb, test_nb_path)
 90 |     res_out, res_err = run_app(test_nb_path, ["--ec"])
 91 |     assert res_out.startswith("1 notebooks with wrong execution_count:\n")
 92 |     assert res_out.endswith("test_nb_3_ec.ipynb\n")
 93 |     assert not res_err
 94 | 
 95 |     res_out, res_err = run_app(test_nb_path, ["--ec", "--not_strict"])
 96 |     assert not res_out
 97 |     assert not res_err
 98 | 
 99 |     # empty source, but with execution_count
100 |     test_nb["cells"][5]["execution_count"] = 3
101 |     test_nb["cells"][6]["execution_count"] = 4
102 |     write_nb(test_nb, test_nb_path)
103 | 
104 |     res_out, res_err = run_app(test_nb_path, ["--ec"])
105 |     assert res_out.startswith("1 notebooks with wrong execution_count:\n")
106 |     assert res_out.endswith("test_nb_3_ec.ipynb\n")
107 |     assert not res_err
108 |     res_out, res_err = run_app(test_nb_path, ["--ec", "--not_strict"])
109 |     assert res_out.startswith("1 notebooks with wrong execution_count:\n")
110 |     assert res_out.endswith("test_nb_3_ec.ipynb\n")
111 |     assert not res_err
112 | 
113 |     # start not from 1
114 |     test_nb = read_nb(example_nbs_path / nb_name)
115 |     test_nb["cells"][2]["execution_count"] = 2
116 |     test_nb["cells"][3]["execution_count"] = 3
117 |     test_nb["cells"][5]["execution_count"] = 4
118 |     write_nb(test_nb, test_nb_path)
119 | 
120 |     res_out, res_err = run_app(test_nb_path, ["--ec"])
121 |     assert res_out.startswith("1 notebooks with wrong execution_count:\n")
122 |     assert res_out.endswith("test_nb_3_ec.ipynb\n")
123 |     assert not res_err
124 |     res_out, res_err = run_app(test_nb_path, ["--ec", "--not_strict"])
125 |     assert not res_out
126 |     assert not res_err
127 | 
128 |     # next is less
129 |     test_nb["cells"][3]["execution_count"] = 5
130 |     write_nb(test_nb, test_nb_path)
131 |     res_out, res_err = run_app(test_nb_path, ["--ec"])
132 |     assert res_out.startswith("1 notebooks with wrong execution_count:\n")
133 |     assert res_out.endswith("test_nb_3_ec.ipynb\n")
134 |     assert not res_err
135 | 
136 |     # code cell without execution_count
137 |     test_nb = read_nb("tests/test_nbs/test_nb_3_ec.ipynb")
138 |     test_nb["cells"][2]["execution_count"] = 1
139 |     write_nb(test_nb, test_nb_path)
140 | 
141 |     res_out, res_err = run_app(test_nb_path, ["--ec"])
142 |     assert res_out.startswith("1 notebooks with wrong execution_count:\n")
143 |     assert res_out.endswith("test_nb_3_ec.ipynb\n")
144 |     assert not res_err
145 | 
146 |     # check with `no_exec` option should be False
147 |     res_out, res_err = run_app(test_nb_path, ["--ec", "--no_exec"])
148 |     assert res_out.startswith("1 notebooks with wrong execution_count:\n")
149 |     assert res_out.endswith("test_nb_3_ec.ipynb\n")
150 |     assert not res_err
151 | 
152 | 
153 | def test_check_nb_errors(tmp_path: Path):
154 |     """test check `--err` option."""
155 |     nb_name = "test_nb_3_ec.ipynb"
156 |     test_nb = read_nb(example_nbs_path / nb_name)
157 |     assert test_nb is not None
158 | 
159 |     test_nb_path = tmp_path / nb_name
160 |     write_nb(test_nb, test_nb_path)
161 |     res_out, res_err = run_app(test_nb_path, ["--err"])
162 |     assert not res_out
163 |     assert not res_err
164 | 
165 |     test_nb["cells"][2]["outputs"][0]["output_type"] = "error"
166 |     write_nb(test_nb, test_nb_path)
167 |     res_out, res_err = run_app(test_nb_path, ["--err"])
168 |     assert res_out.startswith("1 notebooks with errors in outputs:\n")
169 |     assert res_out.endswith("test_nb_3_ec.ipynb\n")
170 |     assert not res_err
171 | 
172 | 
173 | def test_check_nb_warnings(tmp_path):
174 |     """test check `--warn` option."""
175 |     test_nb = read_nb(example_nbs_path / nb_name)
176 |     test_nb_path = tmp_path / nb_name
177 |     write_nb(test_nb, test_nb_path)
178 |     res_out, res_err = run_app(test_nb_path, ["--warn"])
179 |     assert not res_out
180 |     assert not res_err
181 | 
182 |     # if error, result is OK
183 |     test_nb["cells"][2]["outputs"][0]["output_type"] = "error"
184 |     write_nb(test_nb, test_nb_path)
185 |     res_out, res_err = run_app(test_nb_path, ["--warn"])
186 |     assert not res_out
187 |     assert not res_err
188 | 
189 |     test_nb["cells"][2]["outputs"][0]["output_type"] = "stream"
190 |     test_nb["cells"][2]["outputs"][0]["name"] = "stderr"
191 |     write_nb(test_nb, test_nb_path)
192 |     res_out, res_err = run_app(test_nb_path, ["--warn"])
193 |     assert res_out.startswith("1 notebooks with warnings in outputs:\n")
194 |     assert res_out.endswith("test_nb_3_ec.ipynb\n")
195 |     assert not res_err
196 | 
197 | 
198 | def test_check_app_version():
199 |     """test check `--version` option."""
200 |     res_out, res_err = run_app("--version")
201 |     assert res_out == f"nbcheck from nbmetaclean, version: {__version__}\n"
202 |     assert not res_err
203 | 
204 |     res_out, res_err = run_app("-v")
205 |     assert res_out == f"nbcheck from nbmetaclean, version: {__version__}\n"
206 |     assert not res_err
207 | 
208 | 
209 | @pytest.mark.parametrize("arg", ["--ec", "--err", "--warn"])
210 | def test_check_app_read_error(tmp_path: Path, arg: str):
211 |     """test check_app with wrong nb file."""
212 |     test_nb_path = tmp_path / "test_nb.ipynb"
213 |     with open(test_nb_path, "w") as fh:
214 |         fh.write("")
215 | 
216 |     res_out, res_err = run_app(test_nb_path, [arg])
217 |     assert res_out.startswith("1 notebooks with read error:\n")
218 |     assert res_out.endswith("test_nb.ipynb\n")
219 |     assert not res_err
220 | 


--------------------------------------------------------------------------------
/tests/test_app_clean.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from pathlib import Path
  4 | 
  5 | import subprocess
  6 | 
  7 | from nbmetaclean.helpers import read_nb, write_nb
  8 | 
  9 | 
 10 | def run_app(
 11 |     nb_path: Path | list[Path] | None = None,
 12 |     args: list[str] | None = None,
 13 |     cwd: Path | None = None,
 14 | ) -> tuple[str, str]:
 15 |     """run app"""
 16 |     args = args or []
 17 |     if isinstance(nb_path, Path):
 18 |         args.insert(0, str(nb_path))
 19 |     elif isinstance(nb_path, list):
 20 |         args = [str(nb) for nb in nb_path] + args
 21 | 
 22 |     run_result = subprocess.run(
 23 |         ["python", "-m", "nbmetaclean.app_clean", *args],
 24 |         capture_output=True,
 25 |         check=False,
 26 |         cwd=cwd,
 27 |     )
 28 |     return run_result.stdout.decode("utf-8"), run_result.stderr.decode("utf-8")
 29 | 
 30 | 
 31 | example_nbs_path = Path("tests/test_nbs")
 32 | 
 33 | # this test conflict with coverage - need to be fixed
 34 | # def test_app_clean_no_args(tmp_path: Path) -> None:
 35 | #     """test app_clean with no args"""
 36 | #     res_out, res_err = run_app(cwd=tmp_path)
 37 | #     assert res_out == "No notebooks found at current directory.\n"
 38 | #     assert not res_err
 39 | 
 40 | #     # prepare test clean notebook
 41 | #     nb_name_clean = "test_nb_2_clean.ipynb"
 42 | #     test_nb = read_nb(example_nbs_path / nb_name_clean)
 43 | #     test_nb_path = tmp_path / nb_name_clean
 44 | #     write_nb(test_nb, test_nb_path)
 45 | 
 46 | #     res_out, res_err = run_app(cwd=tmp_path)
 47 | #     assert res_out == "Checked: 1 notebooks. All notebooks are clean.\n"
 48 | #     assert not res_err
 49 | 
 50 | #     # add metadata
 51 | #     test_nb["metadata"]["some key"] = "some value"
 52 | #     write_nb(test_nb, test_nb_path)
 53 | 
 54 | #     res_out, res_err = run_app(cwd=tmp_path)
 55 | #     assert res_out == "cleaned: test_nb_2_clean.ipynb\n"
 56 | #     assert not res_err
 57 | 
 58 | 
 59 | def test_clean_nb_metadata(tmp_path: Path) -> None:
 60 |     """test clean_nb_metadata"""
 61 |     nb_name_clean = "test_nb_2_clean.ipynb"
 62 |     test_nb = read_nb(example_nbs_path / nb_name_clean)
 63 |     test_nb_path = tmp_path / nb_name_clean
 64 |     write_nb(test_nb, test_nb_path)
 65 | 
 66 |     # default run no args, clean notebooks
 67 |     res_out, res_err = run_app(test_nb_path, [])
 68 |     assert not res_out
 69 |     assert not res_err
 70 | 
 71 |     # add metadata, new filter, mask not merged
 72 |     test_nb["metadata"]["some key"] = "some value"
 73 |     write_nb(test_nb, test_nb_path)
 74 | 
 75 |     # check with preserve mask, expect no changes
 76 |     res_out, res_err = run_app(
 77 |         test_nb_path, ["--nb_metadata_preserve_mask", "some key"]
 78 |     )
 79 |     assert not res_out
 80 |     assert not res_err
 81 |     res_nb = read_nb(test_nb_path)
 82 |     assert res_nb["metadata"]["some key"] == "some value"
 83 | 
 84 |     # check without preserve mask, dry run
 85 |     res_out, res_err = run_app(test_nb_path, ["-D"])
 86 |     assert res_out
 87 |     assert not res_err
 88 |     res_nb = read_nb(test_nb_path)
 89 |     assert res_nb["metadata"]["some key"] == "some value"
 90 | 
 91 |     # check without preserve mask, expect changes
 92 |     res_out, res_err = run_app(test_nb_path, [])
 93 |     assert res_out
 94 |     assert not res_err
 95 |     res_nb = read_nb(test_nb_path)
 96 |     nb_metadata = res_nb.get("metadata")
 97 |     assert nb_metadata
 98 |     assert not nb_metadata.get("some key")
 99 | 
100 |     # verbose flag.
101 |     # nb now cleaned
102 |     res_out, res_err = run_app(test_nb_path, ["-V"])
103 |     assert res_out.startswith("Path: ")
104 |     assert res_out.endswith(
105 |         "test_nb_2_clean.ipynb, preserve timestamp: True\nchecked: 1 notebooks\n"
106 |     )
107 |     assert not res_err
108 | 
109 |     # rewrite notebook
110 |     write_nb(test_nb, test_nb_path)
111 |     res_out, res_err = run_app(test_nb_path, ["-V"])
112 |     assert res_out.startswith("Path: ")
113 |     assert "cleaned:" in res_out
114 |     assert res_out.endswith("test_nb_2_clean.ipynb\n")
115 |     assert not res_err
116 | 
117 | 
118 | def test_clean_nb_ec_output(tmp_path: Path):
119 |     """test execution count and output"""
120 |     nb_name_clean = "test_nb_2_clean.ipynb"
121 |     test_nb = read_nb(example_nbs_path / nb_name_clean)
122 |     test_nb_path = tmp_path / nb_name_clean
123 | 
124 |     test_nb["cells"][1]["execution_count"] = 1
125 |     test_nb["cells"][1]["outputs"][0]["execution_count"] = 1
126 |     write_nb(test_nb, test_nb_path)
127 | 
128 |     # default settings
129 |     res_out, res_err = run_app(test_nb_path, [])
130 |     assert res_out.startswith("cleaned:")
131 |     assert res_out.endswith("test_nb_2_clean.ipynb\n")
132 |     assert not res_err
133 |     nb = read_nb(test_nb_path)
134 |     assert nb["cells"][1]["execution_count"] is None
135 |     assert nb["cells"][1]["outputs"][0]["data"] == {"text/plain": ["2"]}
136 |     assert nb["cells"][1]["outputs"][0]["execution_count"] is None
137 | 
138 |     # dry run
139 |     write_nb(test_nb, test_nb_path)
140 |     res_out, res_err = run_app(test_nb_path, ["-D"])
141 |     assert res_out.startswith("cleaned:")
142 |     assert res_out.endswith("test_nb_2_clean.ipynb\n")
143 |     assert not res_err
144 |     nb = read_nb(test_nb_path)
145 |     assert nb["cells"][1]["execution_count"] == 1
146 |     assert nb["cells"][1]["outputs"][0]["execution_count"] == 1
147 |     # dry, verbose
148 |     res_out, res_err = run_app(test_nb_path, ["-DV"])
149 |     assert res_out.startswith("Path: ")
150 |     assert nb_name_clean in res_out
151 |     assert res_out.endswith("test_nb_2_clean.ipynb\n")
152 |     assert not res_err
153 | 
154 |     # silent
155 |     write_nb(test_nb, test_nb_path)
156 |     res_out, res_err = run_app(test_nb_path, ["-s"])
157 |     assert not res_out
158 |     assert not res_err
159 |     nb = read_nb(test_nb_path)
160 |     assert nb["cells"][1]["execution_count"] is None
161 |     assert nb["cells"][1]["outputs"][0]["execution_count"] is None
162 | 
163 |     # clean output
164 |     write_nb(test_nb, test_nb_path)
165 |     res_out, res_err = run_app(test_nb_path, ["--clear_outputs"])
166 |     assert res_out.startswith("cleaned:")
167 |     assert res_out.endswith("test_nb_2_clean.ipynb\n")
168 |     assert not res_err
169 |     nb = read_nb(test_nb_path)
170 |     assert nb["cells"][1]["execution_count"] is None
171 |     assert nb["cells"][1]["outputs"] == []
172 | 
173 |     # path as arg
174 |     write_nb(test_nb, test_nb_path)
175 |     res_out, res_err = run_app(test_nb_path, [])
176 |     assert res_out.startswith("cleaned:")
177 |     assert res_out.endswith("test_nb_2_clean.ipynb\n")
178 |     assert not res_err
179 |     nb = read_nb(test_nb_path)
180 |     assert nb["metadata"]["authors"][0]["name"] == "Andrei Yasyrev"
181 |     assert nb["cells"][1]["execution_count"] is None
182 |     assert nb["cells"][1]["outputs"][0]["execution_count"] is None
183 | 
184 |     # two nbs
185 |     write_nb(test_nb, test_nb_path)
186 |     # add second notebook
187 |     nb_name_clean_2 = "test_nb_3_ec.ipynb"
188 |     test_nb_2 = read_nb(example_nbs_path / nb_name_clean_2)
189 |     test_nb_2["metadata"]["some key"] = "some value"
190 |     write_nb(test_nb_2, tmp_path / nb_name_clean_2)
191 | 
192 |     res_out, res_err = run_app(tmp_path, [])
193 |     assert res_out.startswith("cleaned: 2 notebooks\n")
194 |     assert nb_name_clean in res_out
195 |     assert nb_name_clean_2 in res_out
196 |     assert not res_err
197 | 
198 | 
199 | def test_clean_nb_wrong_file(tmp_path: Path):
200 |     """test app_clean with wrong file"""
201 |     nb_name = tmp_path / "wrong.ipynb"
202 |     with nb_name.open("w", encoding="utf-8") as fh:
203 |         fh.write("some text")
204 | 
205 |     res_out, res_err = run_app(nb_name, [])
206 |     assert res_out.startswith("with errors: 1")
207 |     assert str(nb_name) in res_out
208 |     assert not res_err
209 | 
210 | 
211 | def test_app_clean_version():
212 |     """test check `--version` option."""
213 |     res_out, res_err = run_app(args=["--version"])
214 |     assert res_out.startswith("nbmetaclean version: ")
215 |     assert not res_err
216 | 
217 |     res_out, res_err = run_app(args=["-v"])
218 |     assert res_out.startswith("nbmetaclean version: ")
219 |     assert not res_err
220 | 


--------------------------------------------------------------------------------
/tests/test_check.py:
--------------------------------------------------------------------------------
 1 | from nbmetaclean.check import check_nb_ec, check_nb_errors, check_nb_warnings
 2 | from nbmetaclean.helpers import read_nb
 3 | 
 4 | 
 5 | def test_check_nb_ec():
 6 |     """test check_nb_ec"""
 7 |     # base notebook - no execution_count
 8 |     test_nb = read_nb("tests/test_nbs/test_nb_3_ec.ipynb")
 9 |     result = check_nb_ec(test_nb)
10 |     assert not result
11 | 
12 |     # check with `no_exec` option
13 |     result = check_nb_ec(test_nb, strict=False, no_exec=True)
14 |     assert result
15 | 
16 |     test_nb["cells"][2]["execution_count"] = 1
17 |     test_nb["cells"][3]["execution_count"] = 2
18 |     test_nb["cells"][5]["execution_count"] = 3
19 | 
20 |     result = check_nb_ec(test_nb)
21 |     assert result
22 | 
23 |     # test strict
24 |     test_nb["cells"][5]["execution_count"] = 4
25 |     result = check_nb_ec(test_nb)
26 |     assert not result
27 |     result = check_nb_ec(test_nb, strict=False)
28 |     assert result
29 | 
30 |     # empty source, but with execution_count
31 |     test_nb["cells"][5]["execution_count"] = 3
32 |     test_nb["cells"][6]["execution_count"] = 4
33 | 
34 |     result = check_nb_ec(test_nb)
35 |     assert not result
36 |     result = check_nb_ec(test_nb, strict=False)
37 |     assert not result
38 | 
39 |     # start not from 1
40 |     test_nb = read_nb("tests/test_nbs/test_nb_3_ec.ipynb")
41 |     test_nb["cells"][2]["execution_count"] = 2
42 |     test_nb["cells"][3]["execution_count"] = 3
43 |     test_nb["cells"][5]["execution_count"] = 4
44 | 
45 |     result = check_nb_ec(test_nb)
46 |     assert not result
47 |     result = check_nb_ec(test_nb, strict=False)
48 |     assert result
49 | 
50 |     # next is less
51 |     test_nb["cells"][3]["execution_count"] = 5
52 | 
53 |     result = check_nb_ec(test_nb, strict=False)
54 |     assert not result
55 | 
56 |     # code cell without execution_count
57 |     test_nb = read_nb("tests/test_nbs/test_nb_3_ec.ipynb")
58 |     test_nb["cells"][2]["execution_count"] = 1
59 | 
60 |     result = check_nb_ec(test_nb, strict=False)
61 |     assert not result
62 | 
63 |     # check with `no_exec` option should be False
64 |     result = check_nb_ec(test_nb, strict=False, no_exec=True)
65 |     assert not result
66 | 
67 | 
68 | def test_check_nb_errors():
69 |     """test check_nb_errors"""
70 |     test_nb = read_nb("tests/test_nbs/test_nb_3_ec.ipynb")
71 |     result = check_nb_errors(test_nb)
72 |     assert result
73 | 
74 |     test_nb["cells"][2]["outputs"][0]["output_type"] = "error"
75 |     result = check_nb_errors(test_nb)
76 |     assert not result
77 | 
78 | 
79 | def test_check_nb_warnings():
80 |     """test check_nb_warnings"""
81 |     test_nb = read_nb("tests/test_nbs/test_nb_3_ec.ipynb")
82 |     result = check_nb_warnings(test_nb)
83 |     assert result
84 | 
85 |     test_nb["cells"][2]["outputs"][0]["output_type"] = "error"
86 |     result = check_nb_warnings(test_nb)
87 |     assert result
88 | 
89 |     test_nb["cells"][2]["outputs"][0]["output_type"] = "stream"
90 |     test_nb["cells"][2]["outputs"][0]["name"] = "stderr"
91 |     result = check_nb_warnings(test_nb)
92 |     assert not result
93 | 


--------------------------------------------------------------------------------
/tests/test_clean.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import os
  3 | from pathlib import Path
  4 | 
  5 | from pytest import CaptureFixture
  6 | 
  7 | from nbmetaclean.clean import (
  8 |     NB_METADATA_PRESERVE_MASKS,
  9 |     CleanConfig,
 10 |     clean_cell,
 11 |     clean_nb,
 12 |     clean_nb_file,
 13 |     filter_meta_mask,
 14 |     filter_metadata,
 15 | )
 16 | from nbmetaclean.helpers import read_nb, write_nb
 17 | 
 18 | 
 19 | def test_get_meta_by_mask():
 20 |     """test get_meta_by_mask"""
 21 |     nb = read_nb(Path("tests/test_nbs/.test_nb_2_meta.ipynb"))
 22 |     nb_meta = nb.get("metadata")
 23 | 
 24 |     # string as nb_meta
 25 |     new_meta = filter_meta_mask("some string")
 26 |     assert new_meta == "some string"
 27 | 
 28 |     # no mask
 29 |     new_meta = filter_meta_mask(nb_meta)
 30 |     assert new_meta == {}
 31 | 
 32 |     # mask
 33 |     nb_meta["some key"] = "some value"
 34 |     new_meta = filter_meta_mask(nb_meta, ("some key",))
 35 |     assert new_meta == {"some key": "some value"}
 36 |     new_meta = filter_meta_mask(nb_meta, NB_METADATA_PRESERVE_MASKS[0])
 37 |     assert new_meta == {"language_info": {"name": "python"}}
 38 | 
 39 |     # mask for empty result
 40 |     new_meta = filter_meta_mask(nb_meta, ("some other key",))
 41 |     assert new_meta == {}
 42 | 
 43 | 
 44 | def test_new_metadata():
 45 |     """test new_metadata"""
 46 |     nb_meta = read_nb("tests/test_nbs/.test_nb_2_meta.ipynb").get("metadata")
 47 |     new_meta = filter_metadata(nb_meta)
 48 |     assert isinstance(new_meta, dict)
 49 |     assert not new_meta
 50 |     new_meta = filter_metadata(nb_meta, [("language_info", "name")])
 51 |     assert new_meta == {"language_info": {"name": "python"}}
 52 | 
 53 | 
 54 | def test_clean_nb_metadata():
 55 |     """test clean_nb_metadata"""
 56 |     test_nb = read_nb("tests/test_nbs/test_nb_2_clean.ipynb")
 57 |     cfg = CleanConfig()
 58 |     result = clean_nb(test_nb, cfg)
 59 |     assert not result
 60 | 
 61 |     # add metadata, new filter, mask not merged
 62 |     test_nb["metadata"]["some key"] = "some value"
 63 |     cfg.nb_metadata_preserve_mask = (("some key",),)
 64 |     cfg.mask_merge = False
 65 |     result = clean_nb(test_nb, cfg)
 66 |     assert result
 67 |     assert test_nb["metadata"] == {"some key": "some value"}
 68 | 
 69 |     # add metadata, new filter, mask merged
 70 |     test_nb = read_nb("tests/test_nbs/test_nb_2_clean.ipynb")
 71 |     test_nb["metadata"]["some_key"] = {"key_1": 1, "key_2": 2}
 72 |     cfg.nb_metadata_preserve_mask = (("some_key", "key_1"),)
 73 |     cfg.mask_merge = True
 74 |     result = clean_nb(test_nb, cfg)
 75 |     assert result
 76 |     assert test_nb["metadata"]["authors"][0]["name"] == "Andrei Yasyrev"
 77 |     assert test_nb["metadata"]["some_key"] == {"key_1": 1}
 78 | 
 79 | 
 80 | def test_clean_cell_metadata():
 81 |     """test clean_cell_metadata"""
 82 |     test_nb = read_nb("tests/test_nbs/.test_nb_2_meta.ipynb")
 83 | 
 84 |     # clear outputs
 85 |     cell = copy.deepcopy(test_nb.get("cells")[1])
 86 |     assert cell["cell_type"] == "code"
 87 |     assert cell.get("outputs")
 88 |     assert not cell.get("metadata")
 89 |     assert cell.get("execution_count") == 1
 90 |     cell["metadata"] = {"some key": "some value"}
 91 |     changed = clean_cell(
 92 |         cell,
 93 |         cfg=CleanConfig(
 94 |             clear_outputs=True,
 95 |             clear_cell_metadata=True,
 96 |         ),
 97 |     )
 98 |     assert changed
 99 |     assert not cell.get("outputs")
100 |     assert not cell.get("metadata")
101 |     assert not cell.get("execution_count")
102 |     # run again - no changes
103 |     changed = clean_cell(
104 |         cell,
105 |         cfg=CleanConfig(
106 |             clear_outputs=True,
107 |             clear_cell_metadata=True,
108 |         ),
109 |     )
110 |     assert not changed
111 | 
112 |     # dont clear outputs, execution_count, mask
113 |     cell = copy.deepcopy(test_nb.get("cells")[1])
114 |     cell["metadata"] = {"some key": "some value"}
115 |     cell["outputs"][0]["metadata"] = {
116 |         "some key": "some value",
117 |         "some other key": "some value",
118 |     }
119 |     changed = clean_cell(
120 |         cell,
121 |         CleanConfig(
122 |             clear_execution_count=False,
123 |             clear_cell_metadata=True,
124 |             cell_metadata_preserve_mask=(("some key",),),
125 |         ),
126 |     )
127 |     assert changed
128 |     assert cell["outputs"][0]["metadata"] == {"some key": "some value"}
129 |     assert cell["metadata"] == {"some key": "some value"}
130 |     assert cell["execution_count"] == 1
131 | 
132 |     # clear outputs, same mask -> no changes meta, clear execution_count
133 |     changed = clean_cell(
134 |         cell,
135 |         cfg=CleanConfig(),
136 |     )
137 |     assert changed
138 |     assert cell["execution_count"] is None
139 |     assert cell["metadata"] == {"some key": "some value"}
140 | 
141 |     # clear execution_count, metadata
142 |     changed = clean_cell(
143 |         cell,
144 |         cfg=CleanConfig(
145 |             clear_cell_metadata=True,
146 |         ),
147 |     )
148 |     assert changed
149 |     assert not cell["outputs"][0]["metadata"]
150 |     assert not cell["execution_count"]
151 |     assert not cell["metadata"]
152 |     assert not cell["outputs"][0]["metadata"]
153 | 
154 | 
155 | def test_clean_cell():
156 |     """test clean_cel"""
157 |     test_nb = read_nb("tests/test_nbs/.test_nb_2_meta.ipynb")
158 | 
159 |     # nothing to clean.
160 |     cell = copy.deepcopy(test_nb.get("cells")[1])
161 |     assert cell.get("outputs")
162 |     assert not cell.get("metadata")
163 |     assert cell.get("execution_count") == 1
164 |     result = clean_cell(cell, CleanConfig(clear_execution_count=False))
165 |     assert not result
166 | 
167 |     # clean cell metadata, cell without metadata
168 |     cell["metadata"] = {}
169 |     result = clean_cell(cell, CleanConfig(clear_cell_metadata=True))
170 |     assert result
171 |     assert not cell.get("metadata")
172 |     assert cell.get("outputs")
173 | 
174 |     # clear output metadata
175 |     cell["outputs"][0]["metadata"] = {"some key": "some value"}
176 |     result = clean_cell(
177 |         cell,
178 |         CleanConfig(
179 |             clear_cell_metadata=True,
180 |             cell_metadata_preserve_mask=(("some key",),),
181 |         ),
182 |     )
183 |     assert not result
184 |     assert cell["outputs"][0].get("metadata") == {"some key": "some value"}
185 | 
186 | 
187 | def test_clean_cell_metadata_markdown():
188 |     """test clean_cell_metadata with markdown cell"""
189 |     test_nb = read_nb("tests/test_nbs/.test_nb_2_meta.ipynb")
190 |     cell = copy.deepcopy(test_nb["cells"][0])
191 |     cell["metadata"] = {"some key": "some value"}
192 |     changed = clean_cell(
193 |         cell,
194 |         cfg=CleanConfig(
195 |             clear_cell_metadata=True,
196 |         ),
197 |     )
198 |     assert changed
199 |     assert not cell["metadata"]
200 | 
201 | 
202 | def test_clean_nb():
203 |     """test clean nb"""
204 |     path = Path("tests/test_nbs")
205 |     nb_path = path / ".test_nb_2_meta.ipynb"
206 |     nb_clean = path / "test_nb_2_clean.ipynb"
207 |     nb = read_nb(nb_path)
208 |     assert nb["cells"][1]["execution_count"] == 1
209 |     assert nb["cells"][1]["outputs"][0]["execution_count"] == 1
210 |     assert nb["metadata"]
211 |     result = clean_nb(nb, cfg=CleanConfig())
212 |     assert result is True
213 |     assert nb["cells"][1]["execution_count"] is None
214 |     assert nb["cells"][1]["outputs"][0]["execution_count"] is None
215 |     nb_clean = read_nb(nb_clean)
216 |     assert nb == nb_clean
217 | 
218 |     # # try clean cleaned
219 |     result = clean_nb(nb_clean, cfg=CleanConfig())
220 |     assert not result
221 | 
222 |     # # clean metadata, leave execution_count
223 |     nb = read_nb(nb_path)
224 |     result = clean_nb(
225 |         nb,
226 |         cfg=CleanConfig(clear_execution_count=False),
227 |     )
228 |     assert result
229 |     assert nb["cells"][1]["execution_count"] == 1
230 |     assert nb["cells"][1]["outputs"][0]["execution_count"] == 1
231 |     assert nb["metadata"] == nb_clean["metadata"]
232 | 
233 |     # clean nb metadata, leave cells metadata
234 |     nb = read_nb(nb_path)
235 |     nb["cells"][1]["metadata"] = {"some key": "some value"}
236 |     result = clean_nb(nb, CleanConfig(clear_execution_count=False))
237 |     assert result
238 |     assert nb["metadata"] == nb_clean["metadata"]
239 |     assert nb["cells"][1]["metadata"] == {"some key": "some value"}
240 |     assert nb["cells"][1]["execution_count"] == 1
241 | 
242 |     # clean cells metadata, leave nb metadata
243 |     nb = read_nb(nb_path)
244 |     nb_meta = copy.deepcopy(nb["metadata"])
245 |     result = clean_nb(nb, CleanConfig(clear_nb_metadata=False))
246 |     assert result
247 |     assert nb["metadata"] == nb_meta
248 |     assert nb["cells"][1]["execution_count"] is None
249 | 
250 | 
251 | def test_clean_nb_file(tmp_path: Path, capsys: CaptureFixture[str]):
252 |     """test clean nb file"""
253 |     path = Path("tests/test_nbs")
254 |     nb_name = ".test_nb_2_meta.ipynb"
255 |     nb_clean = read_nb(path / "test_nb_2_clean.ipynb")
256 | 
257 |     # prepare temp test notebook
258 |     nb_source = read_nb(path / nb_name)
259 |     test_nb_path = write_nb(nb_source, tmp_path / nb_name)
260 | 
261 |     # clean meta, leave execution_count
262 |     # first lets dry run
263 |     cleaned, errors = clean_nb_file(
264 |         test_nb_path,
265 |         cfg=CleanConfig(
266 |             clear_execution_count=False,
267 |             dry_run=True,
268 |         ),
269 |     )
270 |     assert len(cleaned) == 1
271 |     assert len(errors) == 0
272 |     nb = read_nb(cleaned[0])
273 |     assert nb["metadata"] == nb_source["metadata"]
274 |     assert nb["cells"][1]["execution_count"] == 1
275 |     assert nb["cells"][1]["outputs"][0]["execution_count"] == 1
276 | 
277 |     # clean meta, leave execution_count
278 |     cleaned, errors = clean_nb_file(
279 |         test_nb_path,
280 |         cfg=CleanConfig(clear_execution_count=False),
281 |     )
282 |     assert len(cleaned) == 1
283 |     assert len(errors) == 0
284 |     nb = read_nb(cleaned[0])
285 |     assert nb["metadata"] == nb_clean["metadata"]
286 |     assert nb["cells"][1]["execution_count"] == 1
287 |     assert nb["cells"][1]["outputs"][0]["execution_count"] == 1
288 | 
289 |     # clean meta, execution_count
290 |     # path as list
291 |     cleaned, errors = clean_nb_file([test_nb_path], CleanConfig())
292 |     assert len(cleaned) == 1
293 |     nb = read_nb(cleaned[0])
294 |     assert nb == nb_clean
295 | 
296 |     # try clean cleaned
297 |     cleaned, errors = clean_nb_file(test_nb_path, CleanConfig())
298 |     assert len(cleaned) == 0
299 |     assert len(errors) == 0
300 | 
301 | 
302 | def test_clean_nb_file_errors(capsys: CaptureFixture[str], tmp_path: Path):
303 |     """test clean_nb_file, errors"""
304 |     # not existing nb
305 |     path = tmp_path / "wrong_name"
306 |     cleaned, errors = clean_nb_file(path)
307 |     assert len(cleaned) == 0
308 |     assert len(errors) == 1
309 |     assert errors[0] == path
310 |     captured = capsys.readouterr()
311 |     assert not captured.out
312 |     assert not captured.err
313 | 
314 |     # not valid nb
315 |     with path.open("w", encoding="utf-8") as fh:
316 |         fh.write("wrong nb")
317 |     cleaned, errors = clean_nb_file(path)
318 |     assert len(cleaned) == 0
319 |     assert len(errors) == 1
320 |     assert errors[0].name == "wrong_name"
321 | 
322 |     captured = capsys.readouterr()
323 |     assert not captured.out
324 |     assert not captured.err
325 | 
326 | 
327 | def test_clean_nb_file_timestamp(tmp_path: Path):
328 |     """test clean_nb_file, timestamp"""
329 |     path = Path("tests/test_nbs")
330 |     nb_name = ".test_nb_2_meta.ipynb"
331 |     nb_stat = (path / nb_name).stat()
332 | 
333 |     # prepare temp test notebook, set timestamp
334 |     test_nb_path = write_nb(read_nb(path / nb_name), tmp_path / nb_name)
335 |     os.utime(test_nb_path, (nb_stat.st_atime, nb_stat.st_mtime))
336 |     test_nb_stat = test_nb_path.stat()
337 |     assert test_nb_stat.st_atime == nb_stat.st_atime
338 |     assert test_nb_stat.st_mtime == nb_stat.st_mtime
339 | 
340 |     cleaned, errors = clean_nb_file(test_nb_path)
341 |     assert len(cleaned) == 1
342 |     assert len(errors) == 0
343 |     cleaned_stat = cleaned[0].stat()
344 |     assert True
345 |     assert cleaned_stat.st_mtime == test_nb_stat.st_mtime
346 | 
347 |     # dont preserve timestamp
348 |     test_nb_path = write_nb(read_nb(path / nb_name), tmp_path / nb_name)
349 |     os.utime(test_nb_path, (nb_stat.st_atime, nb_stat.st_mtime))
350 |     cleaned, errors = clean_nb_file(test_nb_path, CleanConfig(preserve_timestamp=False))
351 |     assert len(cleaned) == 1
352 |     assert len(errors) == 0
353 |     cleaned_stat = cleaned[0].stat()
354 |     assert True
355 |     assert cleaned_stat.st_mtime != nb_stat.st_mtime
356 | 


--------------------------------------------------------------------------------
/tests/test_get_nbnames.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | from nbmetaclean.helpers import get_nb_names, get_nb_names_from_list, is_notebook
  4 | 
  5 | 
  6 | def test_is_notebook():
  7 |     """test is_notebook"""
  8 |     assert is_notebook(Path("tests/test_nbs/test_nb_1.ipynb"))
  9 |     assert not is_notebook(Path("tests/test_nbs/test_nb_1.py"))
 10 |     assert not is_notebook(Path("tests/test_nbs/.test_nb_2_meta.ipynb"))
 11 |     assert is_notebook(Path("tests/test_nbs/.test_nb_2_meta.ipynb"), hidden=True)
 12 | 
 13 | 
 14 | def test_get_nb_names():
 15 |     """test get_nb_names"""
 16 |     path = Path("tests/test_nbs")
 17 |     # filename as argument
 18 |     file = path / "test_nb_1.ipynb"
 19 |     names = get_nb_names(file)
 20 |     assert len(names) == 1
 21 |     names.sort(key=lambda x: x.name)
 22 |     assert names[0] == file
 23 |     # filename but not nb
 24 |     names = get_nb_names("tests/test_clean.py")
 25 |     assert len(names) == 0
 26 | 
 27 |     # path as argument
 28 |     names = get_nb_names(path)
 29 |     assert len(names) == 3
 30 |     names.sort(key=lambda x: x.name)
 31 |     assert names[0] == file
 32 |     # path as argument. add hidden files
 33 |     names = get_nb_names(path, hidden=True)
 34 |     assert len(names) == 4
 35 |     try:
 36 |         get_nb_names("wrong_name")
 37 |         assert False
 38 |     except FileNotFoundError as ex:
 39 |         assert True
 40 |         assert str(ex) == "wrong_name not exists!"
 41 | 
 42 | 
 43 | def test_get_nb_names_recursive_hidden(tmp_path: Path):
 44 |     """test get_nb_names recursive hidden"""
 45 |     suffix = ".ipynb"
 46 |     # add one nb
 47 |     with open((tmp_path / "tst").with_suffix(suffix), "w", encoding="utf-8") as _:
 48 |         pass
 49 |     files = get_nb_names(tmp_path)
 50 |     assert len(files) == 1
 51 | 
 52 |     # add hidden nb
 53 |     with open((tmp_path / ".tst").with_suffix(suffix), "w", encoding="utf-8") as _:
 54 |         pass
 55 |     files = get_nb_names(tmp_path)
 56 |     assert len(files) == 1
 57 |     files = get_nb_names(tmp_path, hidden=True)
 58 |     assert len(files) == 2
 59 |     # add simple file
 60 |     with open((tmp_path / "simple"), "w", encoding="utf-8") as _:
 61 |         pass
 62 |     files = get_nb_names(tmp_path)
 63 |     assert len(files) == 1
 64 | 
 65 |     # add dir with one nb, hidden nb
 66 |     new_dir = tmp_path / "new_dir"
 67 |     new_dir.mkdir()
 68 |     with open((new_dir / "tst").with_suffix(suffix), "w", encoding="utf-8") as _:
 69 |         pass
 70 |     with open((new_dir / ".tst").with_suffix(suffix), "w", encoding="utf-8") as _:
 71 |         pass
 72 |     files = get_nb_names(tmp_path)
 73 |     assert len(files) == 2
 74 |     files = get_nb_names(tmp_path, hidden=True)
 75 |     assert len(files) == 4
 76 | 
 77 |     files = get_nb_names(tmp_path, recursive=False)
 78 |     assert len(files) == 1
 79 | 
 80 |     # add hidden dir
 81 |     hid_dir = tmp_path / ".hid_dir"
 82 |     hid_dir.mkdir()
 83 |     with open((hid_dir / "tst").with_suffix(suffix), "w", encoding="utf-8") as _:
 84 |         pass
 85 |     with open((hid_dir / ".tst").with_suffix(suffix), "w", encoding="utf-8") as _:
 86 |         pass
 87 |     files = get_nb_names(tmp_path, hidden=True)
 88 |     assert len(files) == 6
 89 |     files = get_nb_names(tmp_path)
 90 |     assert len(files) == 2
 91 | 
 92 |     # add checkpoint dir and file
 93 |     # files at this dir will be skipped
 94 |     checkpoint_dir = tmp_path / ".ipynb_checkpoints"
 95 |     checkpoint_dir.mkdir()
 96 |     with open(
 97 |         (checkpoint_dir / "nb-checkpoint").with_suffix(suffix), "w", encoding="utf-8"
 98 |     ) as _:
 99 |         pass
100 |     with open(
101 |         (checkpoint_dir / "some_nb").with_suffix(suffix), "w", encoding="utf-8"
102 |     ) as _:
103 |         pass
104 |     files = get_nb_names(tmp_path)
105 |     assert len(files) == 2
106 |     files = get_nb_names(tmp_path, hidden=True)
107 |     assert len(files) == 6
108 | 
109 | 
110 | def test_get_nb_names_from_list():
111 |     """test get_nb_names_from_list"""
112 |     path = Path("tests/test_nbs")
113 |     # filename as argument
114 |     file = path / "test_nb_1.ipynb"
115 |     names = get_nb_names_from_list(file)
116 |     assert len(names) == 1
117 |     assert names[0] == file
118 | 
119 |     # filename as list
120 |     names = get_nb_names_from_list([file])
121 |     assert len(names) == 1
122 |     assert names[0] == file
123 | 
124 |     # filename but not nb
125 |     names = get_nb_names_from_list("tests/test_clean.py")
126 |     assert len(names) == 0
127 | 
128 |     # path as list, not all notebooks
129 |     names = get_nb_names_from_list([file, "wrong_name", "tests/test_clean.py"])
130 |     assert len(names) == 1
131 |     assert names[0] == file
132 | 
133 |     # folder as argument
134 |     names = get_nb_names_from_list(path)
135 |     assert len(names) == 3
136 |     names.sort(key=lambda x: x.name)
137 |     assert names[0] == file
138 |     # path as argument. add hidden files
139 |     names = get_nb_names(path, hidden=True)
140 |     assert len(names) == 4
141 |     names = get_nb_names_from_list("wrong_name")
142 |     assert len(names) == 0
143 | 


--------------------------------------------------------------------------------
/tests/test_nbs/.test_nb_2_meta.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "markdown cell source"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 1,
13 |    "metadata": {},
14 |    "outputs": [
15 |     {
16 |      "data": {
17 |       "text/plain": [
18 |        "2"
19 |       ]
20 |      },
21 |      "execution_count": 1,
22 |      "metadata": {},
23 |      "output_type": "execute_result"
24 |     }
25 |    ],
26 |    "source": [
27 |     "1 + 1"
28 |    ]
29 |   }
30 |  ],
31 |  "metadata": {
32 |   "authors": [
33 |    {
34 |     "github": "https://github.com/ayasyrev",
35 |     "name": "Andrei Yasyrev"
36 |    }
37 |   ],
38 |   "kernelspec": {
39 |    "display_name": "nbmetaclean",
40 |    "language": "python",
41 |    "name": "python3"
42 |   },
43 |   "language_info": {
44 |    "codemirror_mode": {
45 |     "name": "ipython",
46 |     "version": 3
47 |    },
48 |    "file_extension": ".py",
49 |    "mimetype": "text/x-python",
50 |    "name": "python",
51 |    "nbconvert_exporter": "python",
52 |    "pygments_lexer": "ipython3",
53 |    "version": "3.11.6"
54 |   }
55 |  },
56 |  "nbformat": 4,
57 |  "nbformat_minor": 2
58 | }
59 | 


--------------------------------------------------------------------------------
/tests/test_nbs/test_nb_1.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": []
 7 |   },
 8 |   {
 9 |    "cell_type": "code",
10 |    "execution_count": null,
11 |    "metadata": {},
12 |    "outputs": [],
13 |    "source": []
14 |   }
15 |  ],
16 |  "metadata": {
17 |   "authors": [
18 |    {
19 |     "github": "https://github.com/ayasyrev",
20 |     "name": "Andrei Yasyrev"
21 |    }
22 |   ],
23 |   "language_info": {
24 |    "name": "python"
25 |   }
26 |  },
27 |  "nbformat": 4,
28 |  "nbformat_minor": 2
29 | }
30 | 


--------------------------------------------------------------------------------
/tests/test_nbs/test_nb_2_clean.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "markdown cell source"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": null,
13 |    "metadata": {},
14 |    "outputs": [
15 |     {
16 |      "data": {
17 |       "text/plain": [
18 |        "2"
19 |       ]
20 |      },
21 |      "execution_count": null,
22 |      "metadata": {},
23 |      "output_type": "execute_result"
24 |     }
25 |    ],
26 |    "source": [
27 |     "1 + 1"
28 |    ]
29 |   }
30 |  ],
31 |  "metadata": {
32 |   "authors": [
33 |    {
34 |     "github": "https://github.com/ayasyrev",
35 |     "name": "Andrei Yasyrev"
36 |    }
37 |   ],
38 |   "language_info": {
39 |    "name": "python"
40 |   }
41 |  },
42 |  "nbformat": 4,
43 |  "nbformat_minor": 2
44 | }
45 | 


--------------------------------------------------------------------------------
/tests/test_nbs/test_nb_3_ec.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "nb for check execution count"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": []
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [
 22 |     {
 23 |      "data": {
 24 |       "text/plain": [
 25 |        "2"
 26 |       ]
 27 |      },
 28 |      "execution_count": null,
 29 |      "metadata": {},
 30 |      "output_type": "execute_result"
 31 |     }
 32 |    ],
 33 |    "source": [
 34 |     "1 + 1"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {},
 41 |    "outputs": [
 42 |     {
 43 |      "data": {
 44 |       "text/plain": [
 45 |        "4"
 46 |       ]
 47 |      },
 48 |      "execution_count": null,
 49 |      "metadata": {},
 50 |      "output_type": "execute_result"
 51 |     }
 52 |    ],
 53 |    "source": [
 54 |     "2 + 2"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": []
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [
 69 |     {
 70 |      "data": {
 71 |       "text/plain": [
 72 |        "6"
 73 |       ]
 74 |      },
 75 |      "execution_count": null,
 76 |      "metadata": {},
 77 |      "output_type": "execute_result"
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "3 + 3"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": []
 90 |   }
 91 |  ],
 92 |  "metadata": {
 93 |   "authors": [
 94 |    {
 95 |     "github": "https://github.com/ayasyrev",
 96 |     "name": "Andrei Yasyrev"
 97 |    }
 98 |   ],
 99 |   "language_info": {
100 |    "name": "python"
101 |   }
102 |  },
103 |  "nbformat": 4,
104 |  "nbformat_minor": 2
105 | }
106 | 


--------------------------------------------------------------------------------
/tests/test_read_write.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from nbmetaclean.helpers import read_nb, write_nb
 4 | 
 5 | 
 6 | def test_read_nb():
 7 |     """test read notebook"""
 8 |     file = Path("tests/test_nbs/test_nb_1.ipynb")
 9 |     nb = read_nb(file)
10 |     assert isinstance(nb, dict)
11 |     assert nb["metadata"]["language_info"] == {"name": "python"}
12 |     assert nb["metadata"]["authors"][0]["name"] == "Andrei Yasyrev"
13 |     assert nb["nbformat"] == 4
14 |     assert nb["nbformat_minor"] == 2
15 |     cells = nb["cells"]
16 |     assert isinstance(cells, list)
17 |     assert len(cells) == 2
18 |     # markdown
19 |     assert cells[0]["cell_type"] == "markdown"
20 |     assert cells[0]["source"] == []
21 |     assert cells[0]["metadata"] == {}
22 |     # code
23 |     assert cells[1]["cell_type"] == "code"
24 |     assert cells[1]["source"] == []
25 |     assert cells[1]["execution_count"] is None
26 |     assert cells[1]["metadata"] == {}
27 |     assert cells[1]["outputs"] == []
28 | 
29 | 
30 | def test_write_nb(tmp_path: Path):
31 |     """test write notebook"""
32 |     file = Path("tests/test_nbs/test_nb_1.ipynb")
33 |     nb = read_nb(file)
34 |     write_nb(nb, tmp_path / file.name)
35 |     with open(tmp_path / file.name, "r", encoding="utf-8") as fh:
36 |         res_text = fh.read()
37 |     with open(file, "r", encoding="utf-8") as fh:
38 |         org_text = fh.read()
39 |     assert res_text == org_text
40 | 
41 |     # write with name w/o suffix
42 |     result = write_nb(nb, tmp_path / "test_nb_1")
43 |     assert result == tmp_path / "test_nb_1.ipynb"
44 | 
45 |     # write with stat
46 |     stat = file.stat()
47 |     timestamp = (stat.st_atime, stat.st_mtime)
48 |     result = write_nb(nb, tmp_path / "test_nb_1", timestamp=timestamp)
49 |     res_stat = result.stat()
50 |     assert timestamp == (res_stat.st_atime, res_stat.st_mtime)
51 | 
52 | 
53 | def test_read_nb_errors(tmp_path: Path):
54 |     """test read notebook not exist or invalid"""
55 |     # not valid
56 |     with open(tmp_path / "test.ipynb", "w", encoding="utf-8") as fh:
57 |         fh.write("invalid")
58 |     assert read_nb(tmp_path / "test.ipynb") is None
59 | 
60 |     # not exist
61 |     assert read_nb(tmp_path / "test_nb_1.ipynb") is None
62 | 
63 |     # not file
64 |     assert read_nb(tmp_path) is None
65 | 


--------------------------------------------------------------------------------