├── .flake8
├── .github
└── workflows
│ ├── lint.yml
│ └── tests.yml
├── .gitignore
├── .mypy.ini
├── .pre-commit-config.yaml
├── .pre-commit-hooks.yaml
├── .pylintrc
├── LICENSE
├── Makefile
├── README.md
├── docs
├── README.md
└── overrides
│ └── partials
│ └── copyright.html
├── mkdocs.yaml
├── noxfile.py
├── noxfile_conda.py
├── noxfile_conda_lint.py
├── noxfile_lint.py
├── pyproject.toml
├── requirements_dev.txt
├── requirements_test.txt
├── setup.cfg
├── setup.py
├── src
└── nbmetaclean
│ ├── __init__.py
│ ├── app_check.py
│ ├── app_clean.py
│ ├── check.py
│ ├── clean.py
│ ├── helpers.py
│ ├── nb_types.py
│ └── version.py
└── tests
├── test_app_check.py
├── test_app_clean.py
├── test_check.py
├── test_clean.py
├── test_get_nbnames.py
├── test_nbs
├── .test_nb_2_meta.ipynb
├── test_nb_1.ipynb
├── test_nb_2_clean.ipynb
└── test_nb_3_ec.ipynb
└── test_read_write.py
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | select = C,E,F,W
3 | max-complexity = 10
4 | max-line-length = 120
5 | extend-ignore = W503
6 | disable-noqa = True
7 | application-import-names = nbmetaclean, tests
8 | import-order-style = google
9 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: Lint
2 | on:
3 | push:
4 | branches:
5 | - dev
6 | - main
7 | jobs:
8 | tests:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: actions/checkout@main
12 | - uses: actions/setup-python@main
13 | with:
14 | python-version: "3.11"
15 | architecture: x64
16 | - run: pip install ruff
17 | - run: ruff check .
18 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 | on:
3 | push:
4 | branches:
5 | - dev
6 | - main
7 | jobs:
8 | tests:
9 | runs-on: ubuntu-latest
10 | strategy:
11 | matrix:
12 | python: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
13 | steps:
14 | - name: Checkout
15 | uses: actions/checkout@main
16 | - name: Setup Python ${{ matrix.python }}
17 | uses: actions/setup-python@main
18 | with:
19 | python-version: ${{ matrix.python }}
20 | architecture: x64
21 |
22 | - name: Install
23 | run: |
24 | pip install uv
25 | uv pip install --system .[test] "coverage[toml]"
26 |
27 | - name: Tests
28 | run: pytest --cov
29 |
30 | - name: CodeCov
31 | if: ${{ matrix.python == '3.11' }}
32 | uses: codecov/codecov-action@main
33 | with:
34 | token: ${{ secrets.CODECOV_TOKEN }}
35 | slug: ayasyrev/nbmetaclean
36 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # my
2 | .vscode/
3 | tmp*/
4 | cov.xml
5 |
6 | # ide
7 |
8 | .idea/
9 | .vscode/settings.json
10 |
11 | # nox
12 | .nox
13 |
14 | # Byte-compiled / optimized / DLL files
15 | __pycache__/
16 | *.py[cod]
17 | *$py.class
18 |
19 | # C extensions
20 | *.so
21 |
22 | # Distribution / packaging
23 | .Python
24 | build/
25 | develop-eggs/
26 | dist/
27 | downloads/
28 | eggs/
29 | .eggs/
30 | lib/
31 | lib64/
32 | parts/
33 | sdist/
34 | var/
35 | wheels/
36 | pip-wheel-metadata/
37 | share/python-wheels/
38 | *.egg-info/
39 | .installed.cfg
40 | *.egg
41 | MANIFEST
42 |
43 | # PyInstaller
44 | # Usually these files are written by a python script from a template
45 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
46 | *.manifest
47 | *.spec
48 |
49 | # Installer logs
50 | pip-log.txt
51 | pip-delete-this-directory.txt
52 |
53 | # Unit test / coverage reports
54 | htmlcov/
55 | .tox/
56 | .nox/
57 | .coverage
58 | .coverage.*
59 | .cache
60 | nosetests.xml
61 | coverage.xml
62 | *.cover
63 | *.py,cover
64 | .hypothesis/
65 | .pytest_cache/
66 |
67 | # Translations
68 | *.mo
69 | *.pot
70 |
71 | # Django stuff:
72 | *.log
73 | local_settings.py
74 | db.sqlite3
75 | db.sqlite3-journal
76 |
77 | # Flask stuff:
78 | instance/
79 | .webassets-cache
80 |
81 | # Scrapy stuff:
82 | .scrapy
83 |
84 | # Sphinx documentation
85 | docs/_build/
86 |
87 | # PyBuilder
88 | target/
89 |
90 | # Jupyter Notebook
91 | .ipynb_checkpoints
92 |
93 | # IPython
94 | profile_default/
95 | ipython_config.py
96 |
97 | # pyenv
98 | .python-version
99 |
100 | # pipenv
101 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
102 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
103 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
104 | # install all needed dependencies.
105 | #Pipfile.lock
106 |
107 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
108 | __pypackages__/
109 |
110 | # Celery stuff
111 | celerybeat-schedule
112 | celerybeat.pid
113 |
114 | # SageMath parsed files
115 | *.sage.py
116 |
117 | # Environments
118 | .env
119 | .venv
120 | env/
121 | venv/
122 | ENV/
123 | env.bak/
124 | venv.bak/
125 |
126 | # Spyder project settings
127 | .spyderproject
128 | .spyproject
129 |
130 | # Rope project settings
131 | .ropeproject
132 |
133 | # mkdocs documentation
134 | /site
135 |
136 | # mypy
137 | .mypy_cache/
138 | .dmypy.json
139 | dmypy.json
140 |
141 | # Pyre type checker
142 | .pyre/
143 |
--------------------------------------------------------------------------------
/.mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: local
3 |
4 | hooks:
5 | # local version for testing
6 | - id: nbmetaclean
7 | name: nbmetaclean local
8 | entry: nbmetaclean
9 | language: system
10 | files: \.ipynb
11 |
12 | - id: nbcheck
13 | name: nbcheck execution_count local
14 | entry: nbcheck
15 | language: system
16 | files: \.ipynb
17 | args: [ --ec, --no_exec, --err ]
18 |
19 | - repo: https://github.com/pre-commit/pre-commit-hooks
20 | rev: v4.6.0
21 | hooks:
22 | - id: check-added-large-files
23 | - id: check-ast
24 | - id: check-builtin-literals
25 | - id: check-case-conflict
26 | - id: check-docstring-first
27 | - id: check-executables-have-shebangs
28 | - id: check-shebang-scripts-are-executable
29 | - id: check-symlinks
30 | - id: check-toml
31 | - id: check-xml
32 | - id: detect-private-key
33 | - id: forbid-new-submodules
34 | - id: forbid-submodules
35 | - id: mixed-line-ending
36 | - id: destroyed-symlinks
37 | - id: fix-byte-order-marker
38 | - id: check-json
39 | - id: check-yaml
40 | args: [ --unsafe ]
41 | - id: debug-statements
42 | - id: end-of-file-fixer
43 | - id: trailing-whitespace
44 | - id: requirements-txt-fixer
45 | - repo: https://github.com/astral-sh/ruff-pre-commit
46 | # Ruff version.
47 | rev: v0.6.1
48 |
49 | hooks:
50 | # Run the linter.
51 | - id: ruff
52 | args: [ --fix ]
53 | # Run the formatter.
54 | - id: ruff-format
55 | - repo: https://github.com/pre-commit/pygrep-hooks
56 | rev: v1.10.0
57 | hooks:
58 | - id: python-check-mock-methods
59 | - id: python-use-type-annotations
60 | - id: python-check-blanket-noqa
61 | - id: text-unicode-replacement-char
62 |
--------------------------------------------------------------------------------
/.pre-commit-hooks.yaml:
--------------------------------------------------------------------------------
1 | - id: nbmetaclean
2 | name: nbmetaclean
3 | description: Clean Jupyter Notebooks metadata and optionally output.
4 | entry: nbmetaclean
5 | files: \.ipynb$
6 | language: python
7 | language_version: python3
8 |
9 | # Same as nbmetaclean, for compatibility.
10 | - id: nbclean
11 | name: nbclean
12 | description: Clean Jupyter Notebooks metadata and optionally output.
13 | entry: nbclean
14 | files: \.ipynb$
15 | language: python
16 | language_version: python3
17 |
18 | - id: nbcheck
19 | name: nbcheck
20 | description: Check Jupyter Notebooks for correct sequence of execution_count and (or) errors in outputs.
21 | entry: nbcheck
22 | files: \.ipynb$
23 | language: python
24 | language_version: python3
25 |
--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | [MASTER]
2 |
3 | # A comma-separated list of package or module names from where C extensions may
4 | # be loaded. Extensions are loading into the active Python interpreter and may
5 | # run arbitrary code.
6 | extension-pkg-allow-list=
7 |
8 | # A comma-separated list of package or module names from where C extensions may
9 | # be loaded. Extensions are loading into the active Python interpreter and may
10 | # run arbitrary code. (This is an alternative name to extension-pkg-allow-list
11 | # for backward compatibility.)
12 | extension-pkg-whitelist=pydantic
13 | ; extension-pkg-whitelist=pydantic , nbconvert, nbformat
14 |
15 | # Return non-zero exit code if any of these messages/categories are detected,
16 | # even if score is above --fail-under value. Syntax same as enable. Messages
17 | # specified are enabled, while categories only check already-enabled messages.
18 | fail-on=
19 |
20 | # Specify a score threshold to be exceeded before program exits with error.
21 | fail-under=10.0
22 |
23 | # Files or directories to be skipped. They should be base names, not paths.
24 | ignore=CVS
25 |
26 | # Add files or directories matching the regex patterns to the ignore-list. The
27 | # regex matches against paths and can be in Posix or Windows format.
28 | ignore-paths=
29 |
30 | # Files or directories matching the regex patterns are skipped. The regex
31 | # matches against base names, not paths.
32 | ignore-patterns=
33 |
34 | # Python code to execute, usually for sys.path manipulation such as
35 | # pygtk.require().
36 | #init-hook=
37 |
38 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
39 | # number of processors available to use.
40 | jobs=1
41 |
42 | # Control the amount of potential inferred values when inferring a single
43 | # object. This can help the performance when dealing with large functions or
44 | # complex, nested conditions.
45 | limit-inference-results=100
46 |
47 | # List of plugins (as comma separated values of python module names) to load,
48 | # usually to register additional checkers.
49 | load-plugins=
50 |
51 | # Pickle collected data for later comparisons.
52 | persistent=yes
53 |
54 | # Minimum Python version to use for version dependent checks. Will default to
55 | # the version used to run pylint.
56 | py-version=3.9
57 |
58 | # When enabled, pylint would attempt to guess common misconfiguration and emit
59 | # user-friendly hints instead of false-positive error messages.
60 | suggestion-mode=yes
61 |
62 | # Allow loading of arbitrary C extensions. Extensions are imported into the
63 | # active Python interpreter and may run arbitrary code.
64 | unsafe-load-any-extension=no
65 |
66 |
67 | [MESSAGES CONTROL]
68 |
69 | # Only show warnings with the listed confidence levels. Leave empty to show
70 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
71 | confidence=
72 |
73 | # Disable the message, report, category or checker with the given id(s). You
74 | # can either give multiple identifiers separated by comma (,) or put this
75 | # option multiple times (only on the command line, not in the configuration
76 | # file where it should appear only once). You can also use "--disable=all" to
77 | # disable everything first and then reenable specific checks. For example, if
78 | # you want to run only the similarities checker, you can use "--disable=all
79 | # --enable=similarities". If you want to run only the classes checker, but have
80 | # no Warning level messages displayed, use "--disable=all --enable=classes
81 | # --disable=W".
82 | disable=raw-checker-failed,
83 | bad-inline-option,
84 | locally-disabled,
85 | file-ignored,
86 | suppressed-message,
87 | useless-suppression,
88 | deprecated-pragma,
89 | use-symbolic-message-instead,
90 | exec-used,
91 | missing-module-docstring,
92 | missing-docstring,
93 | invalid-name
94 |
95 | # Enable the message, report, category or checker with the given id(s). You can
96 | # either give multiple identifier separated by comma (,) or put this option
97 | # multiple time (only on the command line, not in the configuration file where
98 | # it should appear only once). See also the "--disable" option for examples.
99 | enable=c-extension-no-member
100 |
101 |
102 | [REPORTS]
103 |
104 | # Python expression which should return a score less than or equal to 10. You
105 | # have access to the variables 'error', 'warning', 'refactor', and 'convention'
106 | # which contain the number of messages in each category, as well as 'statement'
107 | # which is the total number of statements analyzed. This score is used by the
108 | # global evaluation report (RP0004).
109 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
110 |
111 | # Template used to display messages. This is a python new-style format string
112 | # used to format the message information. See doc for all details.
113 | #msg-template=
114 |
115 | # Set the output format. Available formats are text, parseable, colorized, json
116 | # and msvs (visual studio). You can also give a reporter class, e.g.
117 | # mypackage.mymodule.MyReporterClass.
118 | output-format=text
119 |
120 | # Tells whether to display a full report or only the messages.
121 | reports=no
122 |
123 | # Activate the evaluation score.
124 | score=yes
125 |
126 |
127 | [REFACTORING]
128 |
129 | # Maximum number of nested blocks for function / method body
130 | max-nested-blocks=5
131 |
132 | # Complete name of functions that never returns. When checking for
133 | # inconsistent-return-statements if a never returning function is called then
134 | # it will be considered as an explicit return statement and no message will be
135 | # printed.
136 | never-returning-functions=sys.exit,argparse.parse_error
137 |
138 |
139 | [SPELLING]
140 |
141 | # Limits count of emitted suggestions for spelling mistakes.
142 | max-spelling-suggestions=4
143 |
144 | # Spelling dictionary name. Available dictionaries: none. To make it work,
145 | # install the 'python-enchant' package.
146 | spelling-dict=
147 |
148 | # List of comma separated words that should be considered directives if they
149 | # appear and the beginning of a comment and should not be checked.
150 | spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
151 |
152 | # List of comma separated words that should not be checked.
153 | spelling-ignore-words=
154 |
155 | # A path to a file that contains the private dictionary; one word per line.
156 | spelling-private-dict-file=
157 |
158 | # Tells whether to store unknown words to the private dictionary (see the
159 | # --spelling-private-dict-file option) instead of raising a message.
160 | spelling-store-unknown-words=no
161 |
162 |
163 | [FORMAT]
164 |
165 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
166 | expected-line-ending-format=
167 |
168 | # Regexp for a line that is allowed to be longer than the limit.
169 | ignore-long-lines=^\s*(# )??$
170 |
171 | # Number of spaces of indent required inside a hanging or continued line.
172 | indent-after-paren=4
173 |
174 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
175 | # tab).
176 | indent-string=' '
177 |
178 | # Maximum number of characters on a single line.
179 | max-line-length=120
180 |
181 | # Maximum number of lines in a module.
182 | max-module-lines=1000
183 |
184 | # Allow the body of a class to be on the same line as the declaration if body
185 | # contains single statement.
186 | single-line-class-stmt=no
187 |
188 | # Allow the body of an if to be on the same line as the test if there is no
189 | # else.
190 | single-line-if-stmt=no
191 |
192 |
193 | [VARIABLES]
194 |
195 | # List of additional names supposed to be defined in builtins. Remember that
196 | # you should avoid defining new builtins when possible.
197 | additional-builtins=
198 |
199 | # Tells whether unused global variables should be treated as a violation.
200 | allow-global-unused-variables=yes
201 |
202 | # List of names allowed to shadow builtins
203 | allowed-redefined-builtins=
204 |
205 | # List of strings which can identify a callback function by name. A callback
206 | # name must start or end with one of those strings.
207 | callbacks=cb_,
208 | _cb
209 |
210 | # A regular expression matching the name of dummy variables (i.e. expected to
211 | # not be used).
212 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
213 |
214 | # Argument names that match this expression will be ignored. Default to name
215 | # with leading underscore.
216 | ignored-argument-names=_.*|^ignored_|^unused_
217 |
218 | # Tells whether we should check for unused import in __init__ files.
219 | init-import=no
220 |
221 | # List of qualified module names which can have objects that can redefine
222 | # builtins.
223 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
224 |
225 |
226 | [SIMILARITIES]
227 |
228 | # Comments are removed from the similarity computation
229 | ignore-comments=yes
230 |
231 | # Docstrings are removed from the similarity computation
232 | ignore-docstrings=yes
233 |
234 | # Imports are removed from the similarity computation
235 | ignore-imports=no
236 |
237 | # Signatures are removed from the similarity computation
238 | ignore-signatures=no
239 |
240 | # Minimum lines number of a similarity.
241 | min-similarity-lines=4
242 |
243 |
244 | [LOGGING]
245 |
246 | # The type of string formatting that logging methods do. `old` means using %
247 | # formatting, `new` is for `{}` formatting.
248 | logging-format-style=old
249 |
250 | # Logging modules to check that the string format arguments are in logging
251 | # function parameter format.
252 | logging-modules=logging
253 |
254 |
255 | [BASIC]
256 |
257 | # Naming style matching correct argument names.
258 | argument-naming-style=snake_case
259 |
260 | # Regular expression matching correct argument names. Overrides argument-
261 | # naming-style.
262 | #argument-rgx=
263 |
264 | # Naming style matching correct attribute names.
265 | attr-naming-style=snake_case
266 |
267 | # Regular expression matching correct attribute names. Overrides attr-naming-
268 | # style.
269 | #attr-rgx=
270 |
271 | # Bad variable names which should always be refused, separated by a comma.
272 | bad-names=foo,
273 | bar,
274 | baz,
275 | toto,
276 | tutu,
277 | tata
278 |
279 | # Bad variable names regexes, separated by a comma. If names match any regex,
280 | # they will always be refused
281 | bad-names-rgxs=
282 |
283 | # Naming style matching correct class attribute names.
284 | class-attribute-naming-style=any
285 |
286 | # Regular expression matching correct class attribute names. Overrides class-
287 | # attribute-naming-style.
288 | #class-attribute-rgx=
289 |
290 | # Naming style matching correct class constant names.
291 | class-const-naming-style=UPPER_CASE
292 |
293 | # Regular expression matching correct class constant names. Overrides class-
294 | # const-naming-style.
295 | #class-const-rgx=
296 |
297 | # Naming style matching correct class names.
298 | class-naming-style=PascalCase
299 |
300 | # Regular expression matching correct class names. Overrides class-naming-
301 | # style.
302 | #class-rgx=
303 |
304 | # Naming style matching correct constant names.
305 | const-naming-style=UPPER_CASE
306 |
307 | # Regular expression matching correct constant names. Overrides const-naming-
308 | # style.
309 | #const-rgx=
310 |
311 | # Minimum line length for functions/classes that require docstrings, shorter
312 | # ones are exempt.
313 | docstring-min-length=-1
314 |
315 | # Naming style matching correct function names.
316 | function-naming-style=snake_case
317 |
318 | # Regular expression matching correct function names. Overrides function-
319 | # naming-style.
320 | #function-rgx=
321 |
322 | # Good variable names which should always be accepted, separated by a comma.
323 | good-names=i,
324 | j,
325 | k,
326 | ex,
327 | Run,
328 | _
329 |
330 | # Good variable names regexes, separated by a comma. If names match any regex,
331 | # they will always be accepted
332 | good-names-rgxs=
333 |
334 | # Include a hint for the correct naming format with invalid-name.
335 | include-naming-hint=no
336 |
337 | # Naming style matching correct inline iteration names.
338 | inlinevar-naming-style=any
339 |
340 | # Regular expression matching correct inline iteration names. Overrides
341 | # inlinevar-naming-style.
342 | #inlinevar-rgx=
343 |
344 | # Naming style matching correct method names.
345 | method-naming-style=snake_case
346 |
347 | # Regular expression matching correct method names. Overrides method-naming-
348 | # style.
349 | #method-rgx=
350 |
351 | # Naming style matching correct module names.
352 | module-naming-style=snake_case
353 |
354 | # Regular expression matching correct module names. Overrides module-naming-
355 | # style.
356 | #module-rgx=
357 |
358 | # Colon-delimited sets of names that determine each other's naming style when
359 | # the name regexes allow several styles.
360 | name-group=
361 |
362 | # Regular expression which should only match function or class names that do
363 | # not require a docstring.
364 | no-docstring-rgx=^_
365 |
366 | # List of decorators that produce properties, such as abc.abstractproperty. Add
367 | # to this list to register other decorators that produce valid properties.
368 | # These decorators are taken in consideration only for invalid-name.
369 | property-classes=abc.abstractproperty
370 |
371 | # Naming style matching correct variable names.
372 | variable-naming-style=snake_case
373 |
374 | # Regular expression matching correct variable names. Overrides variable-
375 | # naming-style.
376 | #variable-rgx=
377 |
378 |
379 | [TYPECHECK]
380 |
381 | # List of decorators that produce context managers, such as
382 | # contextlib.contextmanager. Add to this list to register other decorators that
383 | # produce valid context managers.
384 | contextmanager-decorators=contextlib.contextmanager
385 |
386 | # List of members which are set dynamically and missed by pylint inference
387 | # system, and so shouldn't trigger E1101 when accessed. Python regular
388 | # expressions are accepted.
389 | generated-members=
390 |
391 | # Tells whether missing members accessed in mixin class should be ignored. A
392 | # class is considered mixin if its name matches the mixin-class-rgx option.
393 | ignore-mixin-members=yes
394 |
395 | # Tells whether to warn about missing members when the owner of the attribute
396 | # is inferred to be None.
397 | ignore-none=yes
398 |
399 | # This flag controls whether pylint should warn about no-member and similar
400 | # checks whenever an opaque object is returned when inferring. The inference
401 | # can return multiple potential results while evaluating a Python object, but
402 | # some branches might not be evaluated, which results in partial inference. In
403 | # that case, it might be useful to still emit no-member and other checks for
404 | # the rest of the inferred objects.
405 | ignore-on-opaque-inference=yes
406 |
407 | # List of class names for which member attributes should not be checked (useful
408 | # for classes with dynamically set attributes). This supports the use of
409 | # qualified names.
410 | ignored-classes=optparse.Values,thread._local,_thread._local
411 |
412 | # List of module names for which member attributes should not be checked
413 | # (useful for modules/projects where namespaces are manipulated during runtime
414 | # and thus existing member attributes cannot be deduced by static analysis). It
415 | # supports qualified module names, as well as Unix pattern matching.
416 | ignored-modules=
417 |
418 | # Show a hint with possible names when a member name was not found. The aspect
419 | # of finding the hint is based on edit distance.
420 | missing-member-hint=yes
421 |
422 | # The minimum edit distance a name should have in order to be considered a
423 | # similar match for a missing member name.
424 | missing-member-hint-distance=1
425 |
426 | # The total number of similar names that should be taken in consideration when
427 | # showing a hint for a missing member.
428 | missing-member-max-choices=1
429 |
430 | # Regex pattern to define which classes are considered mixins ignore-mixin-
431 | # members is set to 'yes'
432 | mixin-class-rgx=.*[Mm]ixin
433 |
434 | # List of decorators that change the signature of a decorated function.
435 | signature-mutators=
436 |
437 |
438 | [MISCELLANEOUS]
439 |
440 | # List of note tags to take in consideration, separated by a comma.
441 | notes=FIXME,
442 | XXX,
443 | TODO
444 |
445 | # Regular expression of note tags to take in consideration.
446 | #notes-rgx=
447 |
448 |
449 | [STRING]
450 |
451 | # This flag controls whether inconsistent-quotes generates a warning when the
452 | # character used as a quote delimiter is used inconsistently within a module.
453 | check-quote-consistency=no
454 |
455 | # This flag controls whether the implicit-str-concat should generate a warning
456 | # on implicit string concatenation in sequences defined over several lines.
457 | check-str-concat-over-line-jumps=no
458 |
459 |
460 | [IMPORTS]
461 |
462 | # List of modules that can be imported at any level, not just the top level
463 | # one.
464 | allow-any-import-level=
465 |
466 | # Allow wildcard imports from modules that define __all__.
467 | allow-wildcard-with-all=no
468 |
469 | # Analyse import fallback blocks. This can be used to support both Python 2 and
470 | # 3 compatible code, which means that the block might have code that exists
471 | # only in one or another interpreter, leading to false positives when analysed.
472 | analyse-fallback-blocks=no
473 |
474 | # Deprecated modules which should not be used, separated by a comma.
475 | deprecated-modules=
476 |
477 | # Output a graph (.gv or any supported image format) of external dependencies
478 | # to the given file (report RP0402 must not be disabled).
479 | ext-import-graph=
480 |
481 | # Output a graph (.gv or any supported image format) of all (i.e. internal and
482 | # external) dependencies to the given file (report RP0402 must not be
483 | # disabled).
484 | import-graph=
485 |
486 | # Output a graph (.gv or any supported image format) of internal dependencies
487 | # to the given file (report RP0402 must not be disabled).
488 | int-import-graph=
489 |
490 | # Force import order to recognize a module as part of the standard
491 | # compatibility libraries.
492 | known-standard-library=
493 |
494 | # Force import order to recognize a module as part of a third party library.
495 | known-third-party=enchant
496 |
497 | # Couples of modules and preferred modules, separated by a comma.
498 | preferred-modules=
499 |
500 |
501 | [DESIGN]
502 |
503 | # List of regular expressions of class ancestor names to ignore when counting
504 | # public methods (see R0903)
505 | exclude-too-few-public-methods=
506 |
507 | # List of qualified class names to ignore when counting class parents (see
508 | # R0901)
509 | ignored-parents=
510 |
511 | # Maximum number of arguments for function / method.
512 | max-args=5
513 |
514 | # Maximum number of attributes for a class (see R0902).
515 | max-attributes=7
516 |
517 | # Maximum number of boolean expressions in an if statement (see R0916).
518 | max-bool-expr=5
519 |
520 | # Maximum number of branch for function / method body.
521 | max-branches=12
522 |
523 | # Maximum number of locals for function / method body.
524 | max-locals=15
525 |
526 | # Maximum number of parents for a class (see R0901).
527 | max-parents=7
528 |
529 | # Maximum number of public methods for a class (see R0904).
530 | max-public-methods=20
531 |
532 | # Maximum number of return / yield for function / method body.
533 | max-returns=6
534 |
535 | # Maximum number of statements in function / method body.
536 | max-statements=50
537 |
538 | # Minimum number of public methods for a class (see R0903).
539 | min-public-methods=2
540 |
541 |
542 | [CLASSES]
543 |
544 | # Warn about protected attribute access inside special methods
545 | check-protected-access-in-special-methods=no
546 |
547 | # List of method names used to declare (i.e. assign) instance attributes.
548 | defining-attr-methods=__init__,
549 | __new__,
550 | setUp,
551 | __post_init__
552 |
553 | # List of member names, which should be excluded from the protected access
554 | # warning.
555 | exclude-protected=_asdict,
556 | _fields,
557 | _replace,
558 | _source,
559 | _make
560 |
561 | # List of valid names for the first argument in a class method.
562 | valid-classmethod-first-arg=cls
563 |
564 | # List of valid names for the first argument in a metaclass class method.
565 | valid-metaclass-classmethod-first-arg=cls
566 |
567 |
568 | [EXCEPTIONS]
569 |
570 | # Exceptions that will emit a warning when being caught. Defaults to
571 | # "BaseException, Exception".
572 | overgeneral-exceptions=BaseException,
573 | Exception
574 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .ONESHELL:
2 | SHELL := /bin/bash
3 |
4 | pypi: dist
5 | twine upload --repository pypi dist/*
6 |
7 | dist: clean
8 | python3 -m build
9 |
10 | clean:
11 | rm -rf dist
12 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # nbmetaclean
2 | Collections of python scripts for checking and cleaning Jupyter Notebooks metadata, execution_count and optionally output.
3 | Can be used as command line tool or pre-commit hook.
4 |
5 |
6 | Pure Python, no dependencies.
7 |
8 | Can be used as a pre-commit hook or as a command line tool.
9 |
10 |
11 | [](https://pypi.org/project/nbmetaclean/)
12 | [](https://badge.fury.io/py/nbmetaclean)
13 | [](https://github.com/ayasyrev/nbmetaclean/actions?workflow=Tests)
14 | [](https://codecov.io/gh/ayasyrev/nbmetaclean)
15 |
16 | ## nbmetaclean
17 |
18 | Clean Jupyter Notebooks metadata, execution_count and optionally output.
19 |
20 | ## nbcheck
21 | Check Jupyter Notebooks for errors and (or) warnings in outputs.
22 |
23 |
24 | ## Base usage
25 |
26 | ### Pre-commit hook
27 | Nbmetaclean can be used as a pre-commit hook, with [pre-commit](https://pre-commit.com/pre-commit)
28 | You do not need to install nbmetaclean, it will be installed automatically.
29 | add to `.pre-commit-config.yaml`:
30 | ```yaml
31 | repos:
32 | - repo: https://github.com/ayasyrev/nbmetaclean
33 | rev: 0.1.1
34 | hooks:
35 | - id: nbmetaclean
36 | - id: nbcheck
37 | args: [ --ec, --err, --warn ]
38 | ```
39 |
40 |
41 |
42 | ### Command line tool
43 |
44 | #### Without install:
45 | If you use [uv](https://github.com/astral-sh/uv) package manager, you can nbmetaclean without install.
46 | To clean notebooks:
47 | ```bash
48 | uvx nbmetaclean
49 | ```
50 | To check notebooks:
51 | ```bash
52 | uvx --from nbmetaclean nbcheck --ec --err --warn
53 | ```
54 |
55 | #### Install:
56 | ```bash
57 | pip install nbmetaclean
58 | ```
59 |
60 | Usage:
61 | run `nbmetaclean` or `nbcheck` command with `path` to notebook or folder with notebooks.
62 | If no `path` is provided, current directory will be used as `path`.
63 |
64 | It is possible to use `nbclean` command instead of `nbmetaclean`.
65 | `nbmetaclean` will be used by defaults in favour of usage with `uvx`
66 |
67 |
68 |
69 | ```bash
70 | nbmetaclean
71 | ```
72 |
73 | `nbcheck` should be run with flags:
74 | - `--ec` for execution_count check
75 | - `--err` for check errors in outputs
76 | - `--warn` for check warnings in outputs
77 | ```bash
78 | nbcheck --ec --err --warn
79 | ```
80 |
81 |
82 | ## Nbmetaclean
83 | ### Default settings
84 | By default, the following settings are used:
85 |
86 | - Clean notebook metadata, except `authors` and `language_info / name`.
87 | - Clean cells execution_count.
88 | - Preserve metadata at cells.
89 | - Preserve cells outputs.
90 | - After cleaning notebook, timestamp for file will be set to previous values.
91 |
92 |
93 |
94 |
95 |
96 |
97 | ### Arguments
98 | Check available arguments:
99 |
100 | ```bash
101 | nbmetaclean -h
102 |
103 | usage: nbmetaclean [-h] [-s] [--not_ec] [--not-pt] [--dont_clear_nb_metadata] [--clear_cell_metadata] [--clear_outputs]
104 | [--nb_metadata_preserve_mask NB_METADATA_PRESERVE_MASK [NB_METADATA_PRESERVE_MASK ...]]
105 | [--cell_metadata_preserve_mask CELL_METADATA_PRESERVE_MASK [CELL_METADATA_PRESERVE_MASK ...]] [--dont_merge_masks] [--clean_hidden_nbs] [-D] [-V]
106 | [path ...]
107 |
108 | Clean metadata and execution_count from Jupyter notebooks.
109 |
110 | positional arguments:
111 | path Path for nb or folder with notebooks.
112 |
113 | options:
114 | -h, --help show this help message and exit
115 | -s, --silent Silent mode.
116 | --not_ec Do not clear execution_count.
117 | --not-pt Do not preserve timestamp.
118 | --dont_clear_nb_metadata
119 | Do not clear notebook metadata.
120 | --clear_cell_metadata
121 | Clear cell metadata.
122 | --clear_outputs Clear outputs.
123 | --nb_metadata_preserve_mask NB_METADATA_PRESERVE_MASK [NB_METADATA_PRESERVE_MASK ...]
124 | Preserve mask for notebook metadata.
125 | --cell_metadata_preserve_mask CELL_METADATA_PRESERVE_MASK [CELL_METADATA_PRESERVE_MASK ...]
126 | Preserve mask for cell metadata.
127 | --dont_merge_masks Do not merge masks.
128 | --clean_hidden_nbs Clean hidden notebooks.
129 | -D, --dry_run perform a trial run, don't write results
130 | -V, --verbose Verbose mode. Print extra information.
131 | ```
132 |
133 | ### Execution_count
134 | If you want to leave execution_count add `--not_ec` flag at command line or `args: [--not_ec]` line to `.pre-commit-config.yaml`.
135 |
136 | ```yaml
137 | repos:
138 | - repo: https://github.com/ayasyrev/nbmetaclean
139 | rev: 0.1.1
140 | hooks:
141 | - id: nbmetaclean
142 | args: [ --not_ec ]
143 | ```
144 |
145 | ```bash
146 | nbmetaclean --not_ec
147 | ```
148 |
149 | ### Clear outputs
150 | If you want to clear outputs, add `--clear_outputs` at command line or `[ --clean_outputs ]` line to `.pre-commit-config.yaml`.
151 | ```yaml
152 | repos:
153 | - repo: https://github.com/ayasyrev/nbmetaclean
154 | rev: 0.1.1
155 | hooks:
156 | - id: nbmetaclean
157 | args: [ --clean_outputs ]
158 | ```
159 |
160 | ```bash
161 | nbmetaclean --clean_outputs
162 | ```
163 |
164 | ## Nbcheck
165 | Check Jupyter Notebooks for correct execution_count, errors and (or) warnings in outputs.
166 |
167 | ### Execution_count
168 | Check that all code cells executed one after another.
169 |
170 | #### Strict mode
171 | By default, execution_count check in `strict` mode.
172 | All cells must be executed, one after another.
173 |
174 | pre-commit config example:
175 | ```yaml
176 | repos:
177 | - repo: https://github.com/ayasyrev/nbmetaclean
178 | rev: 0.1.1
179 | hooks:
180 | - id: nbcheck
181 | args: [ --ec ]
182 | ```
183 |
184 | command line example:
185 | ```bash
186 | nbcheck --ec
187 | ```
188 |
189 | #### Not strict mode
190 | `--not_strict` flag can be used to check that next cell executed after previous one, but execution number can be more than `+1`.
191 |
192 | pre-commit config example:
193 | ```yaml
194 | repos:
195 | - repo: https://github.com/ayasyrev/nbmetaclean
196 | rev: 0.1.1
197 | hooks:
198 | - id: nbcheck
199 | args: [ --ec, --not_strict ]
200 | ```
201 |
202 | command line example:
203 | ```bash
204 | nbcheck --ec --not_strict
205 | ```
206 |
207 | #### Allow notebooks with no execution_count
208 |
209 | `--no_exec` flag allows notebooks with all cells without execution_count.
210 | If notebook has cells with execution_count and without execution_count, pre-commit will return error.
211 |
212 | pre-commit config example:
213 | ```yaml
214 | repos:
215 | - repo: https://github.com/ayasyrev/nbmetaclean
216 | rev: 0.1.1
217 | - id: nbcheck
218 | args: [ --ec, --no_exec ]
219 | ```
220 |
221 | command line example:
222 | ```bash
223 | nbcheck --ec --no_exec
224 | ```
225 |
226 |
227 |
228 | ### Errors and Warnings
229 |
230 | `--err` and `--warn` flags can be used to check for errors and warnings in outputs.
231 |
232 | pre-commit config example:
233 | ```yaml
234 | repos:
235 | - repo: https://github.com/ayasyrev/nbmetaclean
236 | rev: 0.1.1
237 | hooks:
238 | - id: nbcheck
239 | args: [ --err, --warn ]
240 | ```
241 |
242 | command line example:
243 | ```bash
244 | nbcheck --err --warn
245 | ```
246 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | hide:
3 | - navigation
4 | ---
5 |
6 | # nbmetaclean
7 | Collections of python scripts for checking and cleaning Jupyter Notebooks metadata, execution_count and optionally output.
8 | Can be used as command line tool or pre-commit hook.
9 |
10 |
11 | Pure Python, no dependencies.
12 |
13 | Can be used as a pre-commit hook or as a command line tool.
14 |
15 |
16 | [](https://pypi.org/project/nbmetaclean/)
17 | [](https://badge.fury.io/py/nbmetaclean)
18 | [](https://github.com/ayasyrev/nbmetaclean/actions?workflow=Tests) [](https://codecov.io/gh/ayasyrev/nbmetaclean)
19 |
20 | ## nbmetaclean
21 |
22 | Clean Jupyter Notebooks metadata, execution_count and optionally output.
23 |
24 | ## nbcheck
25 | Check Jupyter Notebooks for errors and (or) warnings in outputs.
26 |
27 |
28 | ## Base usage
29 |
30 | ### Pre-commit hook
31 | Nbmetaclean can be used as a pre-commit hook, with [pre-commit](https://pre-commit.com/pre-commit)
32 | You do not need to install nbmetaclean, it will be installed automatically.
33 | add to `.pre-commit-config.yaml`:
34 | ```yaml
35 | repos:
36 | - repo: https://github.com/ayasyrev/nbmetaclean
37 | rev: 0.1.1
38 | hooks:
39 | - id: nbmetaclean
40 | - id: nbcheck
41 | args: [ --ec, --err, --warn ]
42 | ```
43 |
44 |
45 |
46 | ### Command line tool
47 |
48 | #### Without install:
49 | If you use [uv](https://github.com/astral-sh/uv) package manager, you can nbmetaclean without install.
50 | To clean notebooks:
51 | ```bash
52 | uvx nbmetaclean
53 | ```
54 | To check notebooks:
55 | ```bash
56 | uvx --from nbmetaclean nbcheck --ec --err --warn
57 | ```
58 |
59 |
60 | #### Install:
61 | ```bash
62 | pip install nbmetaclean
63 | ```
64 |
65 | Usage:
66 | run `nbmetaclean` or `nbcheck` command with `path` to notebook or folder with notebooks.
67 | If no `path` is provided, current directory will be used as `path`.
68 |
69 | It is possible to use `nbclean` command instead of `nbmetaclean`.
70 | `nbmetaclean` will be used by defaults in favour of usage with `uvx`
71 |
72 |
73 |
74 | ```bash
75 | nbmetaclean
76 | ```
77 |
78 | `nbcheck` should be run with flags:
79 | - `--ec` for execution_count check
80 | - `--err` for check errors in outputs
81 | - `--warn` for check warnings in outputs
82 | ```bash
83 | nbcheck --ec --err --warn
84 | ```
85 |
86 |
87 | ## Nbmetaclean
88 | ### Default settings
89 | By default, the following settings are used:
90 |
91 | - Clean notebook metadata, except `authors` and `language_info / name`.
92 | - Clean cells execution_count.
93 | - Preserve metadata at cells.
94 | - Preserve cells outputs.
95 | - After cleaning notebook, timestamp for file will be set to previous values.
96 |
97 |
98 |
99 |
100 |
101 |
102 | ### Arguments
103 | Check available arguments:
104 |
105 | ```bash
106 | nbmetaclean -h
107 |
108 | usage: nbmetaclean [-h] [-s] [--not_ec] [--not-pt] [--dont_clear_nb_metadata] [--clear_cell_metadata] [--clear_outputs]
109 | [--nb_metadata_preserve_mask NB_METADATA_PRESERVE_MASK [NB_METADATA_PRESERVE_MASK ...]]
110 | [--cell_metadata_preserve_mask CELL_METADATA_PRESERVE_MASK [CELL_METADATA_PRESERVE_MASK ...]] [--dont_merge_masks] [--clean_hidden_nbs] [-D] [-V]
111 | [path ...]
112 |
113 | Clean metadata and execution_count from Jupyter notebooks.
114 |
115 | positional arguments:
116 | path Path for nb or folder with notebooks.
117 |
118 | options:
119 | -h, --help show this help message and exit
120 | -s, --silent Silent mode.
121 | --not_ec Do not clear execution_count.
122 | --not-pt Do not preserve timestamp.
123 | --dont_clear_nb_metadata
124 | Do not clear notebook metadata.
125 | --clear_cell_metadata
126 | Clear cell metadata.
127 | --clear_outputs Clear outputs.
128 | --nb_metadata_preserve_mask NB_METADATA_PRESERVE_MASK [NB_METADATA_PRESERVE_MASK ...]
129 | Preserve mask for notebook metadata.
130 | --cell_metadata_preserve_mask CELL_METADATA_PRESERVE_MASK [CELL_METADATA_PRESERVE_MASK ...]
131 | Preserve mask for cell metadata.
132 | --dont_merge_masks Do not merge masks.
133 | --clean_hidden_nbs Clean hidden notebooks.
134 | -D, --dry_run perform a trial run, don't write results
135 | -V, --verbose Verbose mode. Print extra information.
136 | ```
137 |
138 | ### Execution_count
139 | If you want to leave execution_count add `--not_ec` flag at command line or `args: [--not_ec]` line to `.pre-commit-config.yaml`.
140 |
141 | ```yaml
142 | repos:
143 | - repo: https://github.com/ayasyrev/nbmetaclean
144 | rev: 0.1.1
145 | hooks:
146 | - id: nbmetaclean
147 | args: [ --not_ec ]
148 | ```
149 |
150 | ```bash
151 | nbmetaclean --not_ec
152 | ```
153 |
154 | ### Clear outputs
155 | If you want to clear outputs, add `--clear_outputs` at command line or `[ --clean_outputs ]` line to `.pre-commit-config.yaml`.
156 | ```yaml
157 | repos:
158 | - repo: https://github.com/ayasyrev/nbmetaclean
159 | rev: 0.1.1
160 | hooks:
161 | - id: nbmetaclean
162 | args: [ --clean_outputs ]
163 | ```
164 |
165 | ```bash
166 | nbmetaclean --clean_outputs
167 | ```
168 |
169 | ## Nbcheck
170 | Check Jupyter Notebooks for correct execution_count, errors and (or) warnings in outputs.
171 |
172 | ### Execution_count
173 | Check that all code cells executed one after another.
174 |
175 | #### Strict mode
176 | By default, execution_count check in `strict` mode.
177 | All cells must be executed, one after another.
178 |
179 | pre-commit config example:
180 | ```yaml
181 | repos:
182 | - repo: https://github.com/ayasyrev/nbmetaclean
183 | rev: 0.1.1
184 | hooks:
185 | - id: nbcheck
186 | args: [ --ec ]
187 | ```
188 |
189 | command line example:
190 | ```bash
191 | nbcheck --ec
192 | ```
193 |
194 | #### Not strict mode
195 | `--not_strict` flag can be used to check that next cell executed after previous one, but execution number can be more than `+1`.
196 |
197 | pre-commit config example:
198 | ```yaml
199 | repos:
200 | - repo: https://github.com/ayasyrev/nbmetaclean
201 | rev: 0.1.1
202 | hooks:
203 | - id: nbcheck
204 | args: [ --ec, --not_strict ]
205 | ```
206 |
207 | command line example:
208 | ```bash
209 | nbcheck --ec --not_strict
210 | ```
211 |
212 | #### Allow notebooks with no execution_count
213 |
214 | `--no_exec` flag allows notebooks with all cells without execution_count.
215 | If notebook has cells with execution_count and without execution_count, pre-commit will return error.
216 |
217 | pre-commit config example:
218 | ```yaml
219 | repos:
220 | - repo: https://github.com/ayasyrev/nbmetaclean
221 | rev: 0.1.1
222 | - id: nbcheck
223 | args: [ --ec, --no_exec ]
224 | ```
225 |
226 | command line example:
227 | ```bash
228 | nbcheck --ec --no_exec
229 | ```
230 |
231 |
232 |
233 | ### Errors and Warnings
234 |
235 | `--err` and `--warn` flags can be used to check for errors and warnings in outputs.
236 |
237 | pre-commit config example:
238 | ```yaml
239 | repos:
240 | - repo: https://github.com/ayasyrev/nbmetaclean
241 | rev: 0.1.1
242 | hooks:
243 | - id: nbcheck
244 | args: [ --err, --warn ]
245 | ```
246 |
247 | command line example:
248 | ```bash
249 | nbcheck --err --warn
250 | ```
251 |
--------------------------------------------------------------------------------
/docs/overrides/partials/copyright.html:
--------------------------------------------------------------------------------
1 |
2 | {% if config.copyright %}
3 |
4 | {{ config.copyright }}
5 |
6 | {% endif %}
7 | {% if not config.extra.generator == false %}
8 | Made with
9 |
10 | NbDocs
11 |
12 | and
13 |
14 | Material for MkDocs
15 |
16 | {% endif %}
17 |
18 |
--------------------------------------------------------------------------------
/mkdocs.yaml:
--------------------------------------------------------------------------------
1 | site_name: Nbmetaclean
2 | repo_url: https://github.com/ayasyrev/nbmetaclean
3 | repo_name: nbmetaclean
4 | docs_dir: docs
5 |
6 | # copyright:
7 | theme:
8 | name: material
9 | custom_dir: docs/overrides
10 |
11 | palette:
12 | - scheme: default
13 | toggle:
14 | icon: material/toggle-switch-off-outline
15 | name: Switch to dark mode
16 | - scheme: slate
17 | toggle:
18 | icon: material/toggle-switch
19 | name: Switch to light mode
20 | markdown_extensions:
21 | - admonition
22 | - pymdownx.details
23 | - pymdownx.superfences
24 |
25 | extra:
26 | analytics:
27 | provider: google
28 | property: G-0F3FK713C2
29 | copyright: Copyright © 2023-2024 Andrei Yasyrev.
30 |
--------------------------------------------------------------------------------
/noxfile.py:
--------------------------------------------------------------------------------
1 | import nox
2 |
3 |
4 | nox.options.default_venv_backend = "uv|virtualenv"
5 | nox.options.reuse_existing_virtualenvs = True
6 |
7 |
8 | @nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"])
9 | def tests(session: nox.Session) -> None:
10 | args = session.posargs or ["--cov"]
11 | session.install("-e", ".[test]")
12 | session.run("pytest", *args)
13 |
--------------------------------------------------------------------------------
/noxfile_conda.py:
--------------------------------------------------------------------------------
1 | import nox
2 |
3 | nox.options.default_venv_backend = "mamba|conda"
4 | nox.options.reuse_existing_virtualenvs = True
5 |
6 |
7 | @nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"])
8 | def conda_tests(session: nox.Session) -> None:
9 | args = session.posargs or ["--cov"]
10 | session.conda_install("uv")
11 | session.run("uv", "pip", "install", "-e", ".[test]")
12 | session.run("pytest", *args)
13 |
--------------------------------------------------------------------------------
/noxfile_conda_lint.py:
--------------------------------------------------------------------------------
1 | import nox
2 |
3 | locations = "."
4 | nox.options.default_venv_backend = "mamba|conda"
5 | nox.options.reuse_existing_virtualenvs = True
6 |
7 |
8 | @nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"])
9 | def conda_lint(session: nox.Session) -> None:
10 | args = session.posargs or locations
11 | session.conda_install("ruff")
12 | session.run("ruff", "check", *args)
13 |
--------------------------------------------------------------------------------
/noxfile_lint.py:
--------------------------------------------------------------------------------
1 | import nox
2 |
3 | locations = "."
4 | nox.options.default_venv_backend = "uv|virtualenv"
5 | nox.options.reuse_existing_virtualenvs = True
6 |
7 |
8 | @nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"])
9 | def lint(session: nox.Session) -> None:
10 | args = session.posargs or locations
11 | session.install("ruff")
12 | session.run("ruff", "check", *args)
13 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.coverage.paths]
2 | source = ["src"]
3 |
4 | [tool.coverage.run]
5 | branch = true
6 | source = ["nbmetaclean"]
7 |
8 | [tool.coverage.report]
9 | show_missing = true
10 |
11 | [tool.ruff]
12 | extend-include = ["*.ipynb"]
13 | indent-width = 4
14 |
15 | [tool.ruff.lint]
16 | explicit-preview-rules = true
17 |
18 | [tool.ruff.format]
19 | quote-style = "double"
20 | indent-style = "space"
21 | skip-magic-trailing-comma = false
22 | line-ending = "auto"
23 |
--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | black
2 | black[jupyter]
3 | coverage[toml]
4 | flake8
5 | isort
6 | mypy
7 | nox
8 | pre-commit
9 | ruff
10 |
--------------------------------------------------------------------------------
/requirements_test.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-cov
3 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | name = nbmetaclean
3 | version = attr: nbmetaclean.version.__version__
4 | author = Yasyrev Andrei
5 | author_email = a.yasyrev@gmail.com
6 | description = Clean jupyter notebooks. Remove metadata and execution counts.
7 | long_description = file: README.md
8 | long_description_content_type = text/markdown
9 | url = https://github.com/ayasyrev/nbmetaclean
10 | license = apache2
11 | classifiers =
12 | Programming Language :: Python :: 3.8
13 | Programming Language :: Python :: 3.9
14 | Programming Language :: Python :: 3.10
15 | Programming Language :: Python :: 3.11
16 | Programming Language :: Python :: 3.12
17 | Programming Language :: Python :: 3.13
18 | License :: OSI Approved :: Apache Software License
19 | Operating System :: OS Independent
20 |
21 | [options]
22 | package_dir =
23 | = src
24 | packages = find:
25 | python_requires = >=3.8
26 |
27 | [options.packages.find]
28 | where = src
29 |
30 | [options.entry_points]
31 | console_scripts =
32 | nbmetaclean=nbmetaclean.app_clean:app_clean
33 | nbclean=nbmetaclean.app_clean:app_clean
34 | nbcheck=nbmetaclean.app_check:app_check
35 | pipx.run =
36 | nbmetaclean=nbmetaclean.app_clean:app_clean
37 | nbclean=nbmetaclean.app_clean:app_clean
38 | nbcheck=nbmetaclean.app_check:app_check
39 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 |
4 | REQUIREMENTS_TEST_FILENAME = "requirements_test.txt"
5 | REQUIREMENTS_DEV_FILENAME = "requirements_dev.txt"
6 |
7 |
8 | def load_requirements(filename: str) -> list[str]:
9 | """Load requirements from file"""
10 | try:
11 | with open(filename, encoding="utf-8") as fh:
12 | return fh.read().splitlines()
13 | except FileNotFoundError:
14 | return []
15 |
16 |
17 | TEST_REQUIRED = load_requirements(REQUIREMENTS_TEST_FILENAME)
18 | DEV_REQUIRED = load_requirements(REQUIREMENTS_DEV_FILENAME)
19 |
20 |
21 | # What packages are optional?
22 | EXTRAS = {
23 | "test": TEST_REQUIRED,
24 | "dev": DEV_REQUIRED + TEST_REQUIRED,
25 | }
26 |
27 |
28 | setup(
29 | extras_require=EXTRAS,
30 | )
31 |
--------------------------------------------------------------------------------
/src/nbmetaclean/__init__.py:
--------------------------------------------------------------------------------
1 | from .check import check_nb_ec, check_nb_errors
2 | from .clean import clean_nb_file, CleanConfig, clean_nb
3 | from .helpers import read_nb, write_nb, get_nb_names, get_nb_names_from_list
4 |
5 |
6 | __all__ = [
7 | "get_nb_names",
8 | "get_nb_names_from_list",
9 | "check_nb_ec",
10 | "check_nb_errors",
11 | "clean_nb",
12 | "clean_nb_file",
13 | "CleanConfig",
14 | "read_nb",
15 | "write_nb",
16 | ]
17 |
--------------------------------------------------------------------------------
/src/nbmetaclean/app_check.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import argparse
4 | from pathlib import Path
5 | import sys
6 |
7 | from nbmetaclean.check import check_nb_ec, check_nb_errors, check_nb_warnings
8 | from nbmetaclean.helpers import get_nb_names_from_list, read_nb
9 | from nbmetaclean.version import __version__
10 |
11 |
12 | parser = argparse.ArgumentParser(
13 | prog="nbcheck",
14 | description="Check Jupyter notebooks for correct sequence of execution_count and (or) errors in outputs.",
15 | )
16 | parser.add_argument(
17 | "path",
18 | default=".",
19 | nargs="*",
20 | help="Path for nb or folder with notebooks.",
21 | )
22 | parser.add_argument(
23 | "--ec",
24 | action="store_true",
25 | help="Check execution_count.",
26 | )
27 | parser.add_argument(
28 | "--err",
29 | action="store_true",
30 | help="Check errors in outputs.",
31 | )
32 | parser.add_argument(
33 | "--warn",
34 | action="store_true",
35 | help="Check warnings in outputs.",
36 | )
37 | parser.add_argument(
38 | "--not_strict",
39 | action="store_true",
40 | help="Not strict mode.",
41 | )
42 | parser.add_argument(
43 | "--no_exec",
44 | action="store_true",
45 | help="Ignore notebooks with all code cells without execution_count.",
46 | )
47 | parser.add_argument(
48 | "-V",
49 | "--verbose",
50 | action="store_true",
51 | help="Verbose mode. Print extra information.",
52 | )
53 | parser.add_argument(
54 | "-v",
55 | "--version",
56 | action="store_true",
57 | help="Print version information.",
58 | )
59 |
60 |
61 | def print_error(
62 | nbs: list[Path],
63 | message: str,
64 | ) -> None:
65 | """Print error message."""
66 | print(f"{len(nbs)} notebooks with {message}:")
67 | for nb in nbs:
68 | print("- ", nb)
69 |
70 |
71 | def print_results(
72 | wrong_ec: list[Path],
73 | nb_errors: list[Path],
74 | nb_warnings: list[Path],
75 | read_error: list[Path],
76 | ) -> None:
77 | """Print results."""
78 | if wrong_ec:
79 | print_error(wrong_ec, "wrong execution_count")
80 | if nb_errors:
81 | print_error(nb_errors, "errors in outputs")
82 | if nb_warnings:
83 | print_error(nb_warnings, "warnings in outputs")
84 | if read_error:
85 | print_error(read_error, "read error")
86 |
87 |
88 | def app_check() -> None:
89 | """Check notebooks for correct sequence of execution_count and errors in outputs."""
90 | cfg = parser.parse_args()
91 |
92 | if cfg.version:
93 | print(f"nbcheck from nbmetaclean, version: {__version__}")
94 | sys.exit(0)
95 |
96 | if not cfg.ec and not cfg.err and not cfg.warn:
97 | print(
98 | "No checks are selected. Please select at least one check: "
99 | "--ec (for execution_count) or "
100 | "--err (for errors in outputs) or "
101 | "--warn (for warnings in outputs)."
102 | )
103 | sys.exit(1)
104 |
105 | nb_files = get_nb_names_from_list(cfg.path)
106 | read_error: list[Path] = []
107 | if cfg.verbose:
108 | print(f"Checking {len(nb_files)} notebooks.")
109 |
110 | wrong_ec: list[Path] = []
111 | nb_errors: list[Path] = []
112 | nb_warnings: list[Path] = []
113 | for nb_name in nb_files:
114 | nb = read_nb(nb_name)
115 | if nb is None:
116 | read_error.append(nb_name)
117 | continue
118 |
119 | if cfg.ec and not check_nb_ec(nb, not cfg.not_strict, cfg.no_exec):
120 | wrong_ec.append(nb_name)
121 |
122 | if cfg.err and not check_nb_errors(nb):
123 | nb_errors.append(nb_name)
124 |
125 | if cfg.warn and not check_nb_warnings(nb):
126 | nb_warnings.append(nb_name)
127 |
128 | print_results(wrong_ec, nb_errors, nb_warnings, read_error)
129 |
130 | if wrong_ec or nb_errors or nb_warnings or read_error:
131 | sys.exit(1)
132 |
133 |
134 | if __name__ == "__main__": # pragma: no cover
135 | app_check()
136 |
--------------------------------------------------------------------------------
/src/nbmetaclean/app_clean.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import argparse
4 | import sys
5 | from pathlib import Path
6 | from typing import Union
7 |
8 | from nbmetaclean.clean import CleanConfig, TupleStr, clean_nb_file
9 | from nbmetaclean.helpers import get_nb_names_from_list
10 | from nbmetaclean.version import __version__
11 |
12 |
13 | parser = argparse.ArgumentParser(
14 | prog="nbmetaclean",
15 | description="Clean metadata and execution_count from Jupyter notebooks.",
16 | )
17 | parser.add_argument(
18 | "path",
19 | default=".",
20 | nargs="*",
21 | help="Path for nb or folder with notebooks.",
22 | )
23 | parser.add_argument(
24 | "-s",
25 | "--silent",
26 | action="store_true",
27 | help="Silent mode.",
28 | )
29 | parser.add_argument(
30 | "--not_ec",
31 | action="store_false",
32 | help="Do not clear execution_count.",
33 | )
34 | parser.add_argument(
35 | "--not-pt",
36 | action="store_true",
37 | help="Do not preserve timestamp.",
38 | )
39 | parser.add_argument(
40 | "--dont_clear_nb_metadata",
41 | action="store_true",
42 | help="Do not clear notebook metadata.",
43 | )
44 | parser.add_argument(
45 | "--clear_cell_metadata",
46 | action="store_true",
47 | help="Clear cell metadata.",
48 | )
49 | parser.add_argument(
50 | "--clear_outputs",
51 | action="store_true",
52 | help="Clear outputs.",
53 | )
54 | parser.add_argument(
55 | "--nb_metadata_preserve_mask",
56 | nargs="+",
57 | help="Preserve mask for notebook metadata.",
58 | )
59 | parser.add_argument(
60 | "--cell_metadata_preserve_mask",
61 | nargs="+",
62 | help="Preserve mask for cell metadata.",
63 | )
64 | parser.add_argument(
65 | "--dont_merge_masks",
66 | action="store_true",
67 | help="Do not merge masks.",
68 | )
69 | parser.add_argument(
70 | "--clean_hidden_nbs",
71 | action="store_true",
72 | help="Clean hidden notebooks.",
73 | )
74 | parser.add_argument(
75 | "-D",
76 | "--dry_run",
77 | action="store_true",
78 | help="perform a trial run, don't write results",
79 | )
80 | parser.add_argument(
81 | "-V",
82 | "--verbose",
83 | action="store_true",
84 | help="Verbose mode. Print extra information.",
85 | )
86 | parser.add_argument(
87 | "-v",
88 | "--version",
89 | action="store_true",
90 | help="Print version information.",
91 | )
92 |
93 |
94 | def process_mask(mask: Union[list[str], None]) -> Union[tuple[TupleStr, ...], None]:
95 | if mask is None:
96 | return None
97 | return tuple(tuple(item.split(".")) for item in mask)
98 |
99 |
100 | def print_result(
101 | cleaned: list[Path],
102 | errors: list[Path],
103 | clean_config: CleanConfig,
104 | path: list[str],
105 | num_nbs: int,
106 | ) -> None:
107 | if clean_config.verbose:
108 | print(
109 | f"Path: {', '.join(path)}, preserve timestamp: {clean_config.preserve_timestamp}"
110 | )
111 | print(f"checked: {num_nbs} notebooks")
112 | if cleaned:
113 | if len(cleaned) == 1:
114 | print(f"cleaned: {cleaned[0]}")
115 | else:
116 | print(f"cleaned: {len(cleaned)} notebooks")
117 | for nb in cleaned:
118 | print("- ", nb)
119 | if errors:
120 | print(f"with errors: {len(errors)}")
121 | for nb in errors:
122 | print("- ", nb)
123 |
124 |
125 | def app_clean() -> None:
126 | """Clean metadata and execution_count from Jupyter notebook."""
127 | cfg = parser.parse_args()
128 |
129 | if cfg.version:
130 | print(f"nbmetaclean version: {__version__}")
131 | sys.exit(0)
132 |
133 | clean_config = CleanConfig(
134 | clear_nb_metadata=not cfg.dont_clear_nb_metadata,
135 | clear_cell_metadata=cfg.clear_cell_metadata,
136 | clear_execution_count=cfg.not_ec,
137 | clear_outputs=cfg.clear_outputs,
138 | preserve_timestamp=not cfg.not_pt,
139 | silent=cfg.silent,
140 | nb_metadata_preserve_mask=process_mask(cfg.nb_metadata_preserve_mask),
141 | cell_metadata_preserve_mask=process_mask(cfg.cell_metadata_preserve_mask),
142 | mask_merge=not cfg.dont_merge_masks,
143 | dry_run=cfg.dry_run,
144 | verbose=cfg.verbose if not cfg.silent else False,
145 | )
146 | path_list: list[str] = cfg.path if isinstance(cfg.path, list) else [cfg.path]
147 | nb_files = get_nb_names_from_list(path_list, hidden=cfg.clean_hidden_nbs)
148 |
149 | cleaned, errors = clean_nb_file(
150 | nb_files,
151 | clean_config,
152 | )
153 | # print(cfg)
154 | if cfg.path == ".": # if running without arguments add some info.
155 | if not nb_files:
156 | print("No notebooks found at current directory.")
157 | sys.exit(0)
158 | elif not cfg.silent and not cleaned and not errors:
159 | print(f"Checked: {len(nb_files)} notebooks. All notebooks are clean.")
160 |
161 | if not cfg.silent:
162 | print_result(cleaned, errors, clean_config, path_list, len(nb_files))
163 |
164 |
165 | if __name__ == "__main__": # pragma: no cover
166 | app_clean()
167 |
--------------------------------------------------------------------------------
/src/nbmetaclean/check.py:
--------------------------------------------------------------------------------
1 | from typing import cast
2 | from .nb_types import CodeCell, Nb
3 |
4 |
5 | __all__ = ["check_nb_ec", "check_nb_errors"]
6 |
7 |
8 | def check_nb_ec(nb: Nb, strict: bool = True, no_exec: bool = False) -> bool:
9 | """Check nb for correct sequence of execution_count.
10 | Expecting all code cells executed one after another.
11 | If `strict` is False, check that next cell executed after previous one, number can be more than `+1`
12 | If `no_exec` is True, ignore notebooks with all code cells without execution_count.
13 |
14 | Args:
15 | nb (Nb): Notebook to check.
16 | strict (bool, optional): Strict mode. Defaults to True.
17 | no_exec (bool): Ignore notebooks with all code cells without execution_count.
18 |
19 | Returns:
20 | bool: True if correct.
21 | """
22 |
23 | current = 0
24 | no_exec_cells = 0
25 | for cell in nb["cells"]:
26 | if cell["cell_type"] == "code":
27 | cell = cast(CodeCell, cell)
28 | if not cell["source"]:
29 | if cell[
30 | "execution_count"
31 | ]: # if cell without code but with execution_count
32 | return False
33 | continue
34 |
35 | if not cell["execution_count"]:
36 | if not no_exec:
37 | return False
38 | else:
39 | no_exec_cells += 1
40 | else:
41 | if cell["execution_count"] != current + 1 and strict:
42 | return False
43 | if cell["execution_count"] <= current:
44 | return False
45 | current = cell["execution_count"]
46 | if no_exec_cells and current: # if we got not executed cells and executed.
47 | return False
48 | return True
49 |
50 |
51 | def check_nb_errors(nb: Nb) -> bool:
52 | """Check nb for cells with errors.
53 |
54 | Args:
55 | nb (Nb): Notebook to check.
56 |
57 | Returns:
58 | bool: True if no errors.
59 | """
60 | for cell in nb["cells"]:
61 | if cell["cell_type"] == "code" and "outputs" in cell:
62 | cell = cast(CodeCell, cell)
63 | for output in cell["outputs"]:
64 | if output["output_type"] == "error":
65 | return False
66 | return True
67 |
68 |
69 | def check_nb_warnings(nb: Nb) -> bool:
70 | """Check nb for cells with warnings.
71 |
72 | Args:
73 | nb (Nb): Notebook to check.
74 |
75 | Returns:
76 | bool: True if no warnings.
77 | """
78 | for cell in nb["cells"]:
79 | if cell["cell_type"] == "code" and "outputs" in cell:
80 | cell = cast(CodeCell, cell)
81 | for output in cell["outputs"]:
82 | if output["output_type"] == "stream" and output["name"] == "stderr":
83 | return False
84 | return True
85 |
--------------------------------------------------------------------------------
/src/nbmetaclean/clean.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import copy
4 | from dataclasses import dataclass
5 | from pathlib import Path
6 | from typing import Optional, Tuple, Union
7 |
8 | from .helpers import read_nb, write_nb
9 |
10 | from .nb_types import Cell, CodeCell, Metadata, Nb, Output
11 |
12 |
13 | __all__ = [
14 | "CleanConfig",
15 | "clean_cell",
16 | "clean_nb",
17 | "clean_nb_file",
18 | "clean_outputs",
19 | "filter_metadata",
20 | "filter_meta_mask",
21 | "NB_METADATA_PRESERVE_MASKS",
22 | "TupleStr",
23 | ]
24 |
25 | TupleStr = Tuple[str, ...]
26 |
27 | NB_METADATA_PRESERVE_MASKS = (
28 | ("language_info", "name"),
29 | ("authors",),
30 | )
31 |
32 |
33 | @dataclass
34 | class CleanConfig:
35 | """Clean config.
36 |
37 | Args:
38 | clear_nb_metadata (bool, optional): Clear notebook metadata. Defaults to True.
39 | clear_cell_metadata (bool, optional): Clear cell metadata. Defaults to False.
40 | clear_execution_count (bool, optional): Clear cell execution count. Defaults to True.
41 | clear_outputs (bool, optional): Clear cell outputs. Defaults to False.
42 | preserve_timestamp (bool, optional): Preserve timestamp. Defaults to True.
43 | silent (bool, optional): Silent mode. Defaults to False.
44 | nb_metadata_preserve_mask (Optional[tuple[str, ...]], optional):
45 | Preserve mask for notebook metadata. Defaults to None.
46 | cell_metadata_preserve_mask (Optional[tuple[str, ...]], optional):
47 | Preserve mask for cell metadata. Defaults to None.
48 | mask_merge (bool): Merge masks. Add new mask to default.
49 | If False - use new mask. Defaults to True.
50 | dry_run (bool): perform a trial run, don't write results. Defaults to False.
51 | verbose (bool): Verbose mode. Print extra information. Defaults to False.
52 | """
53 |
54 | clear_nb_metadata: bool = True
55 | clear_cell_metadata: bool = False
56 | clear_execution_count: bool = True
57 | clear_outputs: bool = False
58 | preserve_timestamp: bool = True
59 | silent: bool = False
60 | nb_metadata_preserve_mask: Optional[tuple[TupleStr, ...]] = None
61 | cell_metadata_preserve_mask: Optional[tuple[TupleStr, ...]] = None
62 | mask_merge: bool = True
63 | dry_run: bool = False
64 | verbose: bool = False
65 |
66 |
67 | def filter_meta_mask(
68 | nb_meta: Union[str, int, Metadata],
69 | mask: Optional[tuple[str, ...]] = None,
70 | ) -> Union[str, int, Metadata]:
71 | """Filter metadata by mask. If no mask return empty dict."""
72 | if isinstance(nb_meta, (str, int)) or mask == ():
73 | return nb_meta
74 | if mask is None:
75 | return {}
76 | new_meta = {}
77 | value = nb_meta.get(mask[0])
78 | if value is not None:
79 | new_mask = tuple(mask[1:])
80 | new_meta[mask[0]] = filter_meta_mask(value, new_mask) or value
81 | return new_meta
82 |
83 |
84 | def filter_metadata(
85 | nb_meta: Metadata,
86 | masks: Optional[tuple[TupleStr, ...]] = None,
87 | ) -> Metadata:
88 | """Clean notebooknode metadata."""
89 | if masks is None:
90 | return {}
91 | filtered_meta: Metadata = {}
92 | for mask in masks:
93 | filtered_meta.update(filter_meta_mask(nb_meta, mask)) # type: ignore
94 | return filtered_meta
95 |
96 |
97 | def clean_cell(
98 | cell: Cell | CodeCell,
99 | cfg: CleanConfig,
100 | ) -> bool:
101 | """Clean cell: optionally metadata, execution_count and outputs."""
102 | changed = False
103 |
104 | if cfg.clear_cell_metadata:
105 | if cell.get("metadata", None):
106 | metadata = cell["metadata"]
107 | old_metadata = copy.deepcopy(metadata)
108 | cell["metadata"] = filter_metadata(
109 | metadata, cfg.cell_metadata_preserve_mask
110 | )
111 | if cell["metadata"] != old_metadata:
112 | changed = True
113 |
114 | if cell["cell_type"] == "code":
115 | if cfg.clear_execution_count and cell.get("execution_count"):
116 | cell["execution_count"] = None # type: ignore # it's code cell
117 | changed = True
118 |
119 | if cell.get("outputs"):
120 | if cfg.clear_outputs:
121 | cell["outputs"] = [] # type: ignore # it's code cell
122 | changed = True
123 | elif cfg.clear_cell_metadata or cfg.clear_execution_count:
124 | result = clean_outputs(cell["outputs"], cfg) # type: ignore # it's code cell
125 | if result:
126 | changed = True
127 |
128 | return changed
129 |
130 |
131 | def clean_outputs(outputs: list[Output], cfg: CleanConfig) -> bool:
132 | """Clean outputs."""
133 | changed = False
134 | for output in outputs:
135 | if cfg.clear_execution_count and output.get("execution_count", None):
136 | output["execution_count"] = None
137 | changed = True
138 | if cfg.clear_cell_metadata and (metadata := output.get("metadata", None)):
139 | old_metadata = copy.deepcopy(metadata)
140 | output["metadata"] = filter_metadata(
141 | metadata, cfg.cell_metadata_preserve_mask
142 | )
143 | if output["metadata"] != old_metadata:
144 | changed = True
145 | return changed
146 |
147 |
148 | def clean_nb(
149 | nb: Nb,
150 | cfg: CleanConfig,
151 | ) -> bool:
152 | """Clean notebook - metadata, execution_count, outputs.
153 |
154 | Args:
155 | nb (Notebook): Notebook to clean.
156 | clear_execution_count (bool, optional): Clear execution_count. Defaults to True.
157 | clear_outputs (bool, optional): Clear outputs. Defaults to False.
158 |
159 | Returns:
160 | bool: True if changed.
161 | """
162 | changed = False
163 | if cfg.clear_nb_metadata and (metadata := nb.get("metadata")):
164 | old_metadata = copy.deepcopy(metadata)
165 | if cfg.nb_metadata_preserve_mask:
166 | if not cfg.mask_merge:
167 | masks = cfg.nb_metadata_preserve_mask
168 | else:
169 | masks = cfg.nb_metadata_preserve_mask + NB_METADATA_PRESERVE_MASKS
170 | else:
171 | masks = NB_METADATA_PRESERVE_MASKS
172 | nb["metadata"] = filter_metadata(metadata, masks=masks)
173 | if nb["metadata"] != old_metadata:
174 | changed = True
175 | if cfg.clear_cell_metadata or cfg.clear_execution_count or cfg.clear_outputs:
176 | for cell in nb["cells"]:
177 | result = clean_cell(
178 | cell,
179 | cfg,
180 | )
181 | if result:
182 | changed = True
183 |
184 | return changed
185 |
186 |
187 | def clean_nb_file(
188 | path: Union[Path, list[Path]],
189 | cfg: Optional[CleanConfig] = None,
190 | ) -> tuple[list[Path], list[Path]]:
191 | """Clean metadata and execution count from notebook.
192 |
193 | Args:
194 | path (Union[str, PosixPath]): Notebook filename or list of names.
195 | cfg (CleanConfig, optional): Config for job, if None, used default settings. Default is None.
196 |
197 | Returns:
198 | tuple[List[Path], List[TuplePath]]: List of cleaned notebooks, list of notebooks with errors.
199 | """
200 | cfg = cfg or CleanConfig()
201 | if not isinstance(path, list):
202 | path = [path]
203 | cleaned: list[Path] = []
204 | errors: list[Path] = []
205 | for filename in path:
206 | nb = read_nb(filename)
207 | if nb is None:
208 | errors.append(filename)
209 | continue
210 | result = clean_nb(
211 | nb,
212 | cfg,
213 | )
214 | if result:
215 | cleaned.append(filename)
216 | if cfg.dry_run:
217 | continue
218 | if cfg.preserve_timestamp:
219 | stat = filename.stat()
220 | timestamp = (stat.st_atime, stat.st_mtime)
221 | else:
222 | timestamp = None
223 | write_nb(nb, filename, timestamp)
224 |
225 | return cleaned, errors
226 |
--------------------------------------------------------------------------------
/src/nbmetaclean/helpers.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import json
4 | import os
5 | from pathlib import Path
6 | from typing import Optional
7 |
8 | from .nb_types import Nb, PathOrStr
9 |
10 | __all__ = [
11 | "get_nb_names",
12 | "get_nb_names_from_list",
13 | "is_notebook",
14 | "read_nb",
15 | "write_nb",
16 | ]
17 |
18 |
19 | def read_nb(path: PathOrStr) -> Nb | None:
20 | """Read notebook from filename.
21 | If file does not exist or is not a valid notebook, return None.
22 | Args:
23 | path (Union[str, PosixPath): Notebook filename.
24 |
25 | Returns:
26 | Notebook Union[None, Notebook]: Jupyter Notebook as dict or None if not valid or does not exist.
27 | """
28 | nb_path = Path(path)
29 | if not nb_path.exists() or not nb_path.is_file():
30 | return None
31 | try:
32 | nb = json.load(open(nb_path, "r", encoding="utf-8"))
33 | return nb
34 | except Exception:
35 | return None
36 |
37 |
38 | def write_nb(
39 | nb: Nb,
40 | path: PathOrStr,
41 | timestamp: Optional[tuple[float, float]] = None,
42 | ) -> Path:
43 | """Write notebook to file, optionally set timestamp.
44 |
45 | Args:
46 | nb (Notebook): Notebook to write
47 | path (Union[str, PosixPath]): filename to write
48 | timestamp (Optional[tuple[float, float]]): timestamp to set, (st_atime, st_mtime) defaults to None
49 | Returns:
50 | Path: Filename of written notebook.
51 | """
52 | filename = Path(path)
53 | if filename.suffix != ".ipynb":
54 | filename = filename.with_suffix(".ipynb")
55 | with filename.open("w", encoding="utf-8") as fh:
56 | fh.write(
57 | json.dumps(
58 | nb,
59 | indent=1,
60 | separators=(",", ": "),
61 | ensure_ascii=False,
62 | sort_keys=True,
63 | )
64 | + "\n",
65 | )
66 | if timestamp is not None:
67 | os.utime(filename, timestamp)
68 | return filename
69 |
70 |
71 | def is_notebook(path: Path, hidden: bool = False) -> bool:
72 | """Check if `path` is a notebook and not hidden. If `hidden` is True check also hidden files.
73 |
74 | Args:
75 | path (Union[Path, str]): Path to check.
76 | hidden bool: If True also check hidden files, defaults to False.
77 |
78 | Returns:
79 | bool: True if `path` is a notebook and not hidden.
80 | """
81 | if path.suffix == ".ipynb":
82 | if path.name.startswith(".") and not hidden:
83 | return False
84 | return True
85 | return False
86 |
87 |
88 | def get_nb_names(
89 | path: Optional[PathOrStr] = None,
90 | recursive: bool = True,
91 | hidden: bool = False,
92 | ) -> list[Path]:
93 | """Return list of notebooks from `path`. If no `path` return notebooks from current folder.
94 |
95 | Args:
96 | path (Union[Path, str, None]): Path for nb or folder with notebooks.
97 | recursive bool: Recursive search.
98 | hidden bool: Skip or not hidden paths, defaults to False.
99 |
100 | Raises:
101 | sys.exit: If filename or dir not exists or not nb file.
102 |
103 | Returns:
104 | List[Path]: List of notebooks names.
105 | """
106 | nb_path = Path(path or ".")
107 |
108 | if not nb_path.exists():
109 | raise FileNotFoundError(f"{nb_path} not exists!")
110 |
111 | if nb_path.is_file():
112 | if is_notebook(nb_path, hidden):
113 | return [nb_path]
114 |
115 | if nb_path.is_dir():
116 | result = []
117 | for item in nb_path.iterdir():
118 | if item.is_file() and is_notebook(item, hidden):
119 | result.append(item)
120 | if item.is_dir() and recursive:
121 | if item.name.startswith(".") and not hidden:
122 | continue
123 | if "checkpoint" in item.name:
124 | continue
125 | result.extend(get_nb_names(item, recursive, hidden))
126 |
127 | return result
128 |
129 | return []
130 |
131 |
132 | def get_nb_names_from_list(
133 | path_list: list[PathOrStr] | PathOrStr,
134 | recursive: bool = True,
135 | hidden: bool = False,
136 | ) -> list[Path]:
137 | """Return list of notebooks from `path_list`.
138 |
139 | Args:
140 | path_list (Union[Path, str, None]): Path for nb or folder with notebooks.
141 | recursive (bool): Recursive search.
142 | hidden (bool): Skip or not hidden paths, defaults to False.
143 |
144 | Returns:
145 | List[Path]: List of notebooks names.
146 | """
147 | path_list = [path_list] if isinstance(path_list, (str, Path)) else path_list
148 | nb_files: list[Path] = []
149 | for path in path_list:
150 | if Path(path).exists():
151 | nb_files.extend(get_nb_names(path, recursive, hidden))
152 | else:
153 | print(f"{path} not exists!")
154 |
155 | return nb_files
156 |
--------------------------------------------------------------------------------
/src/nbmetaclean/nb_types.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path, PosixPath
2 | from typing import Dict, List, Literal, Optional, TypedDict, TypeVar, Union
3 |
4 |
5 | __all__ = [
6 | "Cell",
7 | "CodeCell",
8 | "DisplayData",
9 | "Error",
10 | "ExecuteResult",
11 | "Metadata",
12 | "MultilineText",
13 | "Nb",
14 | "NbMetadata",
15 | "NbNode",
16 | "PathOrStr",
17 | "Output",
18 | "Stream",
19 | ]
20 |
21 | PathOrStr = TypeVar("PathOrStr", Path, PosixPath, str)
22 |
23 | NbNode = Dict[str, Union[str, int, "NbNode"]]
24 | Metadata = Dict[str, Union[str, int, "Metadata"]]
25 | MultilineText = Union[str, List[str]]
26 |
27 |
28 | class NbMetadata(TypedDict):
29 | language_info: Metadata
30 | kernelspec: Metadata
31 | authors: Metadata
32 |
33 |
34 | class Output(TypedDict):
35 | output_type: Literal[
36 | "execute_result",
37 | "display_data",
38 | "stream",
39 | "error",
40 | ]
41 | execution_count: Optional[int]
42 | metadata: Metadata
43 |
44 |
45 | class ExecuteResult(Output): # output_type = "execute_result"
46 | data: Dict[str, MultilineText]
47 |
48 |
49 | class DisplayData(Output): # output_type = "display_data"
50 | data: Dict[str, MultilineText] # fix it - mimebundle
51 |
52 |
53 | class Stream(Output): # output_type = "stream"
54 | name: Literal["stdout", "stderr"] # "The name of the stream (stdout, stderr)."
55 | text: MultilineText
56 |
57 |
58 | class Error(Output): # output_type = "error"
59 | ename: str # "The name of the error."
60 | evalue: str # "The value, or message, of the error."
61 | traceback: List[str]
62 |
63 |
64 | class Cell(TypedDict):
65 | """Notebook cell base."""
66 |
67 | id: int # from nbformat 4.5
68 | cell_type: Literal["code", "markdown", "raw"]
69 | metadata: Metadata
70 | source: MultilineText
71 | attachments: Optional[Dict[str, MultilineText]]
72 |
73 |
74 | class CodeCell(Cell): # cell_type = "code"
75 | """Code cell."""
76 |
77 | outputs: List[Output]
78 | execution_count: Optional[int]
79 |
80 |
81 | class Nb(TypedDict):
82 | """Notebook."""
83 |
84 | nbformat: int
85 | nbformat_minor: int
86 | cells: List[Cell]
87 | metadata: Metadata
88 |
--------------------------------------------------------------------------------
/src/nbmetaclean/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.4" # pragma: no cover
2 |
3 | __all__ = ["__version__"] # pragma: no cover
4 |
--------------------------------------------------------------------------------
/tests/test_app_check.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from pathlib import Path
4 | import subprocess
5 |
6 | import pytest
7 |
8 | from nbmetaclean.helpers import read_nb, write_nb
9 | from nbmetaclean.version import __version__
10 |
11 |
12 | def run_app(
13 | nb_path: Path,
14 | args: list[str] = [],
15 | ) -> tuple[str, str]:
16 | """run app"""
17 | run_result = subprocess.run(
18 | ["python", "-m", "nbmetaclean.app_check", str(nb_path), *args],
19 | capture_output=True,
20 | check=False,
21 | )
22 | return run_result.stdout.decode("utf-8"), run_result.stderr.decode("utf-8")
23 |
24 |
25 | example_nbs_path = Path("tests/test_nbs")
26 | nb_name = "test_nb_3_ec.ipynb"
27 |
28 |
29 | def test_run_script():
30 | """test run script"""
31 | app_path = Path("src/nbmetaclean/app_check.py")
32 | run_result = subprocess.run(
33 | ["python", app_path, "-h"], capture_output=True, check=False
34 | )
35 | assert run_result.returncode == 0
36 | res_out = run_result.stdout.decode("utf-8")
37 | assert res_out.startswith(
38 | "usage: nbcheck [-h] [--ec] [--err] [--warn] [--not_strict] [--no_exec]"
39 | )
40 | res_err = run_result.stderr.decode("utf-8")
41 | assert not res_err
42 |
43 |
44 | def test_check_nb_ec(tmp_path: Path):
45 | """test check `--ec`"""
46 | # base notebook - no execution_count
47 |
48 | test_nb = read_nb(example_nbs_path / nb_name)
49 | test_nb_path = tmp_path / nb_name
50 | write_nb(test_nb, test_nb_path)
51 |
52 | # check if no args
53 | res_out, res_err = run_app(test_nb_path, [])
54 | assert res_out.startswith(
55 | "No checks are selected. Please select at least one check: "
56 | "--ec (for execution_count) or --err (for errors in outputs) or "
57 | "--warn (for warnings in outputs)."
58 | )
59 | assert not res_err
60 |
61 | # default execution_count
62 | res_out, res_err = run_app(test_nb_path, ["--ec"])
63 | assert res_out.startswith("1 notebooks with wrong execution_count:\n")
64 | assert res_out.endswith("test_nb_3_ec.ipynb\n")
65 | assert not res_err
66 |
67 | # `-V` option
68 | res_out, res_err = run_app(test_nb_path, ["--ec", "-V"])
69 | assert res_out.startswith("Checking 1 notebooks.\n")
70 | assert not res_err
71 |
72 | # check with `no_exec` option
73 | res_out, res_err = run_app(test_nb_path, ["--ec", "--no_exec"])
74 | assert not res_out
75 | assert not res_err
76 |
77 | # set correct execution_count
78 | test_nb["cells"][2]["execution_count"] = 1
79 | test_nb["cells"][3]["execution_count"] = 2
80 | test_nb["cells"][5]["execution_count"] = 3
81 | write_nb(test_nb, test_nb_path)
82 |
83 | res_out, res_err = run_app(test_nb_path, ["--ec"])
84 | assert not res_out
85 | assert not res_err
86 |
87 | # test strict
88 | test_nb["cells"][5]["execution_count"] = 4
89 | write_nb(test_nb, test_nb_path)
90 | res_out, res_err = run_app(test_nb_path, ["--ec"])
91 | assert res_out.startswith("1 notebooks with wrong execution_count:\n")
92 | assert res_out.endswith("test_nb_3_ec.ipynb\n")
93 | assert not res_err
94 |
95 | res_out, res_err = run_app(test_nb_path, ["--ec", "--not_strict"])
96 | assert not res_out
97 | assert not res_err
98 |
99 | # empty source, but with execution_count
100 | test_nb["cells"][5]["execution_count"] = 3
101 | test_nb["cells"][6]["execution_count"] = 4
102 | write_nb(test_nb, test_nb_path)
103 |
104 | res_out, res_err = run_app(test_nb_path, ["--ec"])
105 | assert res_out.startswith("1 notebooks with wrong execution_count:\n")
106 | assert res_out.endswith("test_nb_3_ec.ipynb\n")
107 | assert not res_err
108 | res_out, res_err = run_app(test_nb_path, ["--ec", "--not_strict"])
109 | assert res_out.startswith("1 notebooks with wrong execution_count:\n")
110 | assert res_out.endswith("test_nb_3_ec.ipynb\n")
111 | assert not res_err
112 |
113 | # start not from 1
114 | test_nb = read_nb(example_nbs_path / nb_name)
115 | test_nb["cells"][2]["execution_count"] = 2
116 | test_nb["cells"][3]["execution_count"] = 3
117 | test_nb["cells"][5]["execution_count"] = 4
118 | write_nb(test_nb, test_nb_path)
119 |
120 | res_out, res_err = run_app(test_nb_path, ["--ec"])
121 | assert res_out.startswith("1 notebooks with wrong execution_count:\n")
122 | assert res_out.endswith("test_nb_3_ec.ipynb\n")
123 | assert not res_err
124 | res_out, res_err = run_app(test_nb_path, ["--ec", "--not_strict"])
125 | assert not res_out
126 | assert not res_err
127 |
128 | # next is less
129 | test_nb["cells"][3]["execution_count"] = 5
130 | write_nb(test_nb, test_nb_path)
131 | res_out, res_err = run_app(test_nb_path, ["--ec"])
132 | assert res_out.startswith("1 notebooks with wrong execution_count:\n")
133 | assert res_out.endswith("test_nb_3_ec.ipynb\n")
134 | assert not res_err
135 |
136 | # code cell without execution_count
137 | test_nb = read_nb("tests/test_nbs/test_nb_3_ec.ipynb")
138 | test_nb["cells"][2]["execution_count"] = 1
139 | write_nb(test_nb, test_nb_path)
140 |
141 | res_out, res_err = run_app(test_nb_path, ["--ec"])
142 | assert res_out.startswith("1 notebooks with wrong execution_count:\n")
143 | assert res_out.endswith("test_nb_3_ec.ipynb\n")
144 | assert not res_err
145 |
146 | # check with `no_exec` option should be False
147 | res_out, res_err = run_app(test_nb_path, ["--ec", "--no_exec"])
148 | assert res_out.startswith("1 notebooks with wrong execution_count:\n")
149 | assert res_out.endswith("test_nb_3_ec.ipynb\n")
150 | assert not res_err
151 |
152 |
153 | def test_check_nb_errors(tmp_path: Path):
154 | """test check `--err` option."""
155 | nb_name = "test_nb_3_ec.ipynb"
156 | test_nb = read_nb(example_nbs_path / nb_name)
157 | assert test_nb is not None
158 |
159 | test_nb_path = tmp_path / nb_name
160 | write_nb(test_nb, test_nb_path)
161 | res_out, res_err = run_app(test_nb_path, ["--err"])
162 | assert not res_out
163 | assert not res_err
164 |
165 | test_nb["cells"][2]["outputs"][0]["output_type"] = "error"
166 | write_nb(test_nb, test_nb_path)
167 | res_out, res_err = run_app(test_nb_path, ["--err"])
168 | assert res_out.startswith("1 notebooks with errors in outputs:\n")
169 | assert res_out.endswith("test_nb_3_ec.ipynb\n")
170 | assert not res_err
171 |
172 |
173 | def test_check_nb_warnings(tmp_path):
174 | """test check `--warn` option."""
175 | test_nb = read_nb(example_nbs_path / nb_name)
176 | test_nb_path = tmp_path / nb_name
177 | write_nb(test_nb, test_nb_path)
178 | res_out, res_err = run_app(test_nb_path, ["--warn"])
179 | assert not res_out
180 | assert not res_err
181 |
182 | # if error, result is OK
183 | test_nb["cells"][2]["outputs"][0]["output_type"] = "error"
184 | write_nb(test_nb, test_nb_path)
185 | res_out, res_err = run_app(test_nb_path, ["--warn"])
186 | assert not res_out
187 | assert not res_err
188 |
189 | test_nb["cells"][2]["outputs"][0]["output_type"] = "stream"
190 | test_nb["cells"][2]["outputs"][0]["name"] = "stderr"
191 | write_nb(test_nb, test_nb_path)
192 | res_out, res_err = run_app(test_nb_path, ["--warn"])
193 | assert res_out.startswith("1 notebooks with warnings in outputs:\n")
194 | assert res_out.endswith("test_nb_3_ec.ipynb\n")
195 | assert not res_err
196 |
197 |
198 | def test_check_app_version():
199 | """test check `--version` option."""
200 | res_out, res_err = run_app("--version")
201 | assert res_out == f"nbcheck from nbmetaclean, version: {__version__}\n"
202 | assert not res_err
203 |
204 | res_out, res_err = run_app("-v")
205 | assert res_out == f"nbcheck from nbmetaclean, version: {__version__}\n"
206 | assert not res_err
207 |
208 |
209 | @pytest.mark.parametrize("arg", ["--ec", "--err", "--warn"])
210 | def test_check_app_read_error(tmp_path: Path, arg: str):
211 | """test check_app with wrong nb file."""
212 | test_nb_path = tmp_path / "test_nb.ipynb"
213 | with open(test_nb_path, "w") as fh:
214 | fh.write("")
215 |
216 | res_out, res_err = run_app(test_nb_path, [arg])
217 | assert res_out.startswith("1 notebooks with read error:\n")
218 | assert res_out.endswith("test_nb.ipynb\n")
219 | assert not res_err
220 |
--------------------------------------------------------------------------------
/tests/test_app_clean.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from pathlib import Path
4 |
5 | import subprocess
6 |
7 | from nbmetaclean.helpers import read_nb, write_nb
8 |
9 |
10 | def run_app(
11 | nb_path: Path | list[Path] | None = None,
12 | args: list[str] | None = None,
13 | cwd: Path | None = None,
14 | ) -> tuple[str, str]:
15 | """run app"""
16 | args = args or []
17 | if isinstance(nb_path, Path):
18 | args.insert(0, str(nb_path))
19 | elif isinstance(nb_path, list):
20 | args = [str(nb) for nb in nb_path] + args
21 |
22 | run_result = subprocess.run(
23 | ["python", "-m", "nbmetaclean.app_clean", *args],
24 | capture_output=True,
25 | check=False,
26 | cwd=cwd,
27 | )
28 | return run_result.stdout.decode("utf-8"), run_result.stderr.decode("utf-8")
29 |
30 |
31 | example_nbs_path = Path("tests/test_nbs")
32 |
33 | # this test conflict with coverage - need to be fixed
34 | # def test_app_clean_no_args(tmp_path: Path) -> None:
35 | # """test app_clean with no args"""
36 | # res_out, res_err = run_app(cwd=tmp_path)
37 | # assert res_out == "No notebooks found at current directory.\n"
38 | # assert not res_err
39 |
40 | # # prepare test clean notebook
41 | # nb_name_clean = "test_nb_2_clean.ipynb"
42 | # test_nb = read_nb(example_nbs_path / nb_name_clean)
43 | # test_nb_path = tmp_path / nb_name_clean
44 | # write_nb(test_nb, test_nb_path)
45 |
46 | # res_out, res_err = run_app(cwd=tmp_path)
47 | # assert res_out == "Checked: 1 notebooks. All notebooks are clean.\n"
48 | # assert not res_err
49 |
50 | # # add metadata
51 | # test_nb["metadata"]["some key"] = "some value"
52 | # write_nb(test_nb, test_nb_path)
53 |
54 | # res_out, res_err = run_app(cwd=tmp_path)
55 | # assert res_out == "cleaned: test_nb_2_clean.ipynb\n"
56 | # assert not res_err
57 |
58 |
59 | def test_clean_nb_metadata(tmp_path: Path) -> None:
60 | """test clean_nb_metadata"""
61 | nb_name_clean = "test_nb_2_clean.ipynb"
62 | test_nb = read_nb(example_nbs_path / nb_name_clean)
63 | test_nb_path = tmp_path / nb_name_clean
64 | write_nb(test_nb, test_nb_path)
65 |
66 | # default run no args, clean notebooks
67 | res_out, res_err = run_app(test_nb_path, [])
68 | assert not res_out
69 | assert not res_err
70 |
71 | # add metadata, new filter, mask not merged
72 | test_nb["metadata"]["some key"] = "some value"
73 | write_nb(test_nb, test_nb_path)
74 |
75 | # check with preserve mask, expect no changes
76 | res_out, res_err = run_app(
77 | test_nb_path, ["--nb_metadata_preserve_mask", "some key"]
78 | )
79 | assert not res_out
80 | assert not res_err
81 | res_nb = read_nb(test_nb_path)
82 | assert res_nb["metadata"]["some key"] == "some value"
83 |
84 | # check without preserve mask, dry run
85 | res_out, res_err = run_app(test_nb_path, ["-D"])
86 | assert res_out
87 | assert not res_err
88 | res_nb = read_nb(test_nb_path)
89 | assert res_nb["metadata"]["some key"] == "some value"
90 |
91 | # check without preserve mask, expect changes
92 | res_out, res_err = run_app(test_nb_path, [])
93 | assert res_out
94 | assert not res_err
95 | res_nb = read_nb(test_nb_path)
96 | nb_metadata = res_nb.get("metadata")
97 | assert nb_metadata
98 | assert not nb_metadata.get("some key")
99 |
100 | # verbose flag.
101 | # nb now cleaned
102 | res_out, res_err = run_app(test_nb_path, ["-V"])
103 | assert res_out.startswith("Path: ")
104 | assert res_out.endswith(
105 | "test_nb_2_clean.ipynb, preserve timestamp: True\nchecked: 1 notebooks\n"
106 | )
107 | assert not res_err
108 |
109 | # rewrite notebook
110 | write_nb(test_nb, test_nb_path)
111 | res_out, res_err = run_app(test_nb_path, ["-V"])
112 | assert res_out.startswith("Path: ")
113 | assert "cleaned:" in res_out
114 | assert res_out.endswith("test_nb_2_clean.ipynb\n")
115 | assert not res_err
116 |
117 |
118 | def test_clean_nb_ec_output(tmp_path: Path):
119 | """test execution count and output"""
120 | nb_name_clean = "test_nb_2_clean.ipynb"
121 | test_nb = read_nb(example_nbs_path / nb_name_clean)
122 | test_nb_path = tmp_path / nb_name_clean
123 |
124 | test_nb["cells"][1]["execution_count"] = 1
125 | test_nb["cells"][1]["outputs"][0]["execution_count"] = 1
126 | write_nb(test_nb, test_nb_path)
127 |
128 | # default settings
129 | res_out, res_err = run_app(test_nb_path, [])
130 | assert res_out.startswith("cleaned:")
131 | assert res_out.endswith("test_nb_2_clean.ipynb\n")
132 | assert not res_err
133 | nb = read_nb(test_nb_path)
134 | assert nb["cells"][1]["execution_count"] is None
135 | assert nb["cells"][1]["outputs"][0]["data"] == {"text/plain": ["2"]}
136 | assert nb["cells"][1]["outputs"][0]["execution_count"] is None
137 |
138 | # dry run
139 | write_nb(test_nb, test_nb_path)
140 | res_out, res_err = run_app(test_nb_path, ["-D"])
141 | assert res_out.startswith("cleaned:")
142 | assert res_out.endswith("test_nb_2_clean.ipynb\n")
143 | assert not res_err
144 | nb = read_nb(test_nb_path)
145 | assert nb["cells"][1]["execution_count"] == 1
146 | assert nb["cells"][1]["outputs"][0]["execution_count"] == 1
147 | # dry, verbose
148 | res_out, res_err = run_app(test_nb_path, ["-DV"])
149 | assert res_out.startswith("Path: ")
150 | assert nb_name_clean in res_out
151 | assert res_out.endswith("test_nb_2_clean.ipynb\n")
152 | assert not res_err
153 |
154 | # silent
155 | write_nb(test_nb, test_nb_path)
156 | res_out, res_err = run_app(test_nb_path, ["-s"])
157 | assert not res_out
158 | assert not res_err
159 | nb = read_nb(test_nb_path)
160 | assert nb["cells"][1]["execution_count"] is None
161 | assert nb["cells"][1]["outputs"][0]["execution_count"] is None
162 |
163 | # clean output
164 | write_nb(test_nb, test_nb_path)
165 | res_out, res_err = run_app(test_nb_path, ["--clear_outputs"])
166 | assert res_out.startswith("cleaned:")
167 | assert res_out.endswith("test_nb_2_clean.ipynb\n")
168 | assert not res_err
169 | nb = read_nb(test_nb_path)
170 | assert nb["cells"][1]["execution_count"] is None
171 | assert nb["cells"][1]["outputs"] == []
172 |
173 | # path as arg
174 | write_nb(test_nb, test_nb_path)
175 | res_out, res_err = run_app(test_nb_path, [])
176 | assert res_out.startswith("cleaned:")
177 | assert res_out.endswith("test_nb_2_clean.ipynb\n")
178 | assert not res_err
179 | nb = read_nb(test_nb_path)
180 | assert nb["metadata"]["authors"][0]["name"] == "Andrei Yasyrev"
181 | assert nb["cells"][1]["execution_count"] is None
182 | assert nb["cells"][1]["outputs"][0]["execution_count"] is None
183 |
184 | # two nbs
185 | write_nb(test_nb, test_nb_path)
186 | # add second notebook
187 | nb_name_clean_2 = "test_nb_3_ec.ipynb"
188 | test_nb_2 = read_nb(example_nbs_path / nb_name_clean_2)
189 | test_nb_2["metadata"]["some key"] = "some value"
190 | write_nb(test_nb_2, tmp_path / nb_name_clean_2)
191 |
192 | res_out, res_err = run_app(tmp_path, [])
193 | assert res_out.startswith("cleaned: 2 notebooks\n")
194 | assert nb_name_clean in res_out
195 | assert nb_name_clean_2 in res_out
196 | assert not res_err
197 |
198 |
199 | def test_clean_nb_wrong_file(tmp_path: Path):
200 | """test app_clean with wrong file"""
201 | nb_name = tmp_path / "wrong.ipynb"
202 | with nb_name.open("w", encoding="utf-8") as fh:
203 | fh.write("some text")
204 |
205 | res_out, res_err = run_app(nb_name, [])
206 | assert res_out.startswith("with errors: 1")
207 | assert str(nb_name) in res_out
208 | assert not res_err
209 |
210 |
211 | def test_app_clean_version():
212 | """test check `--version` option."""
213 | res_out, res_err = run_app(args=["--version"])
214 | assert res_out.startswith("nbmetaclean version: ")
215 | assert not res_err
216 |
217 | res_out, res_err = run_app(args=["-v"])
218 | assert res_out.startswith("nbmetaclean version: ")
219 | assert not res_err
220 |
--------------------------------------------------------------------------------
/tests/test_check.py:
--------------------------------------------------------------------------------
1 | from nbmetaclean.check import check_nb_ec, check_nb_errors, check_nb_warnings
2 | from nbmetaclean.helpers import read_nb
3 |
4 |
5 | def test_check_nb_ec():
6 | """test check_nb_ec"""
7 | # base notebook - no execution_count
8 | test_nb = read_nb("tests/test_nbs/test_nb_3_ec.ipynb")
9 | result = check_nb_ec(test_nb)
10 | assert not result
11 |
12 | # check with `no_exec` option
13 | result = check_nb_ec(test_nb, strict=False, no_exec=True)
14 | assert result
15 |
16 | test_nb["cells"][2]["execution_count"] = 1
17 | test_nb["cells"][3]["execution_count"] = 2
18 | test_nb["cells"][5]["execution_count"] = 3
19 |
20 | result = check_nb_ec(test_nb)
21 | assert result
22 |
23 | # test strict
24 | test_nb["cells"][5]["execution_count"] = 4
25 | result = check_nb_ec(test_nb)
26 | assert not result
27 | result = check_nb_ec(test_nb, strict=False)
28 | assert result
29 |
30 | # empty source, but with execution_count
31 | test_nb["cells"][5]["execution_count"] = 3
32 | test_nb["cells"][6]["execution_count"] = 4
33 |
34 | result = check_nb_ec(test_nb)
35 | assert not result
36 | result = check_nb_ec(test_nb, strict=False)
37 | assert not result
38 |
39 | # start not from 1
40 | test_nb = read_nb("tests/test_nbs/test_nb_3_ec.ipynb")
41 | test_nb["cells"][2]["execution_count"] = 2
42 | test_nb["cells"][3]["execution_count"] = 3
43 | test_nb["cells"][5]["execution_count"] = 4
44 |
45 | result = check_nb_ec(test_nb)
46 | assert not result
47 | result = check_nb_ec(test_nb, strict=False)
48 | assert result
49 |
50 | # next is less
51 | test_nb["cells"][3]["execution_count"] = 5
52 |
53 | result = check_nb_ec(test_nb, strict=False)
54 | assert not result
55 |
56 | # code cell without execution_count
57 | test_nb = read_nb("tests/test_nbs/test_nb_3_ec.ipynb")
58 | test_nb["cells"][2]["execution_count"] = 1
59 |
60 | result = check_nb_ec(test_nb, strict=False)
61 | assert not result
62 |
63 | # check with `no_exec` option should be False
64 | result = check_nb_ec(test_nb, strict=False, no_exec=True)
65 | assert not result
66 |
67 |
68 | def test_check_nb_errors():
69 | """test check_nb_errors"""
70 | test_nb = read_nb("tests/test_nbs/test_nb_3_ec.ipynb")
71 | result = check_nb_errors(test_nb)
72 | assert result
73 |
74 | test_nb["cells"][2]["outputs"][0]["output_type"] = "error"
75 | result = check_nb_errors(test_nb)
76 | assert not result
77 |
78 |
79 | def test_check_nb_warnings():
80 | """test check_nb_warnings"""
81 | test_nb = read_nb("tests/test_nbs/test_nb_3_ec.ipynb")
82 | result = check_nb_warnings(test_nb)
83 | assert result
84 |
85 | test_nb["cells"][2]["outputs"][0]["output_type"] = "error"
86 | result = check_nb_warnings(test_nb)
87 | assert result
88 |
89 | test_nb["cells"][2]["outputs"][0]["output_type"] = "stream"
90 | test_nb["cells"][2]["outputs"][0]["name"] = "stderr"
91 | result = check_nb_warnings(test_nb)
92 | assert not result
93 |
--------------------------------------------------------------------------------
/tests/test_clean.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import os
3 | from pathlib import Path
4 |
5 | from pytest import CaptureFixture
6 |
7 | from nbmetaclean.clean import (
8 | NB_METADATA_PRESERVE_MASKS,
9 | CleanConfig,
10 | clean_cell,
11 | clean_nb,
12 | clean_nb_file,
13 | filter_meta_mask,
14 | filter_metadata,
15 | )
16 | from nbmetaclean.helpers import read_nb, write_nb
17 |
18 |
19 | def test_get_meta_by_mask():
20 | """test get_meta_by_mask"""
21 | nb = read_nb(Path("tests/test_nbs/.test_nb_2_meta.ipynb"))
22 | nb_meta = nb.get("metadata")
23 |
24 | # string as nb_meta
25 | new_meta = filter_meta_mask("some string")
26 | assert new_meta == "some string"
27 |
28 | # no mask
29 | new_meta = filter_meta_mask(nb_meta)
30 | assert new_meta == {}
31 |
32 | # mask
33 | nb_meta["some key"] = "some value"
34 | new_meta = filter_meta_mask(nb_meta, ("some key",))
35 | assert new_meta == {"some key": "some value"}
36 | new_meta = filter_meta_mask(nb_meta, NB_METADATA_PRESERVE_MASKS[0])
37 | assert new_meta == {"language_info": {"name": "python"}}
38 |
39 | # mask for empty result
40 | new_meta = filter_meta_mask(nb_meta, ("some other key",))
41 | assert new_meta == {}
42 |
43 |
44 | def test_new_metadata():
45 | """test new_metadata"""
46 | nb_meta = read_nb("tests/test_nbs/.test_nb_2_meta.ipynb").get("metadata")
47 | new_meta = filter_metadata(nb_meta)
48 | assert isinstance(new_meta, dict)
49 | assert not new_meta
50 | new_meta = filter_metadata(nb_meta, [("language_info", "name")])
51 | assert new_meta == {"language_info": {"name": "python"}}
52 |
53 |
54 | def test_clean_nb_metadata():
55 | """test clean_nb_metadata"""
56 | test_nb = read_nb("tests/test_nbs/test_nb_2_clean.ipynb")
57 | cfg = CleanConfig()
58 | result = clean_nb(test_nb, cfg)
59 | assert not result
60 |
61 | # add metadata, new filter, mask not merged
62 | test_nb["metadata"]["some key"] = "some value"
63 | cfg.nb_metadata_preserve_mask = (("some key",),)
64 | cfg.mask_merge = False
65 | result = clean_nb(test_nb, cfg)
66 | assert result
67 | assert test_nb["metadata"] == {"some key": "some value"}
68 |
69 | # add metadata, new filter, mask merged
70 | test_nb = read_nb("tests/test_nbs/test_nb_2_clean.ipynb")
71 | test_nb["metadata"]["some_key"] = {"key_1": 1, "key_2": 2}
72 | cfg.nb_metadata_preserve_mask = (("some_key", "key_1"),)
73 | cfg.mask_merge = True
74 | result = clean_nb(test_nb, cfg)
75 | assert result
76 | assert test_nb["metadata"]["authors"][0]["name"] == "Andrei Yasyrev"
77 | assert test_nb["metadata"]["some_key"] == {"key_1": 1}
78 |
79 |
80 | def test_clean_cell_metadata():
81 | """test clean_cell_metadata"""
82 | test_nb = read_nb("tests/test_nbs/.test_nb_2_meta.ipynb")
83 |
84 | # clear outputs
85 | cell = copy.deepcopy(test_nb.get("cells")[1])
86 | assert cell["cell_type"] == "code"
87 | assert cell.get("outputs")
88 | assert not cell.get("metadata")
89 | assert cell.get("execution_count") == 1
90 | cell["metadata"] = {"some key": "some value"}
91 | changed = clean_cell(
92 | cell,
93 | cfg=CleanConfig(
94 | clear_outputs=True,
95 | clear_cell_metadata=True,
96 | ),
97 | )
98 | assert changed
99 | assert not cell.get("outputs")
100 | assert not cell.get("metadata")
101 | assert not cell.get("execution_count")
102 | # run again - no changes
103 | changed = clean_cell(
104 | cell,
105 | cfg=CleanConfig(
106 | clear_outputs=True,
107 | clear_cell_metadata=True,
108 | ),
109 | )
110 | assert not changed
111 |
112 | # dont clear outputs, execution_count, mask
113 | cell = copy.deepcopy(test_nb.get("cells")[1])
114 | cell["metadata"] = {"some key": "some value"}
115 | cell["outputs"][0]["metadata"] = {
116 | "some key": "some value",
117 | "some other key": "some value",
118 | }
119 | changed = clean_cell(
120 | cell,
121 | CleanConfig(
122 | clear_execution_count=False,
123 | clear_cell_metadata=True,
124 | cell_metadata_preserve_mask=(("some key",),),
125 | ),
126 | )
127 | assert changed
128 | assert cell["outputs"][0]["metadata"] == {"some key": "some value"}
129 | assert cell["metadata"] == {"some key": "some value"}
130 | assert cell["execution_count"] == 1
131 |
132 | # clear outputs, same mask -> no changes meta, clear execution_count
133 | changed = clean_cell(
134 | cell,
135 | cfg=CleanConfig(),
136 | )
137 | assert changed
138 | assert cell["execution_count"] is None
139 | assert cell["metadata"] == {"some key": "some value"}
140 |
141 | # clear execution_count, metadata
142 | changed = clean_cell(
143 | cell,
144 | cfg=CleanConfig(
145 | clear_cell_metadata=True,
146 | ),
147 | )
148 | assert changed
149 | assert not cell["outputs"][0]["metadata"]
150 | assert not cell["execution_count"]
151 | assert not cell["metadata"]
152 | assert not cell["outputs"][0]["metadata"]
153 |
154 |
155 | def test_clean_cell():
156 | """test clean_cel"""
157 | test_nb = read_nb("tests/test_nbs/.test_nb_2_meta.ipynb")
158 |
159 | # nothing to clean.
160 | cell = copy.deepcopy(test_nb.get("cells")[1])
161 | assert cell.get("outputs")
162 | assert not cell.get("metadata")
163 | assert cell.get("execution_count") == 1
164 | result = clean_cell(cell, CleanConfig(clear_execution_count=False))
165 | assert not result
166 |
167 | # clean cell metadata, cell without metadata
168 | cell["metadata"] = {}
169 | result = clean_cell(cell, CleanConfig(clear_cell_metadata=True))
170 | assert result
171 | assert not cell.get("metadata")
172 | assert cell.get("outputs")
173 |
174 | # clear output metadata
175 | cell["outputs"][0]["metadata"] = {"some key": "some value"}
176 | result = clean_cell(
177 | cell,
178 | CleanConfig(
179 | clear_cell_metadata=True,
180 | cell_metadata_preserve_mask=(("some key",),),
181 | ),
182 | )
183 | assert not result
184 | assert cell["outputs"][0].get("metadata") == {"some key": "some value"}
185 |
186 |
187 | def test_clean_cell_metadata_markdown():
188 | """test clean_cell_metadata with markdown cell"""
189 | test_nb = read_nb("tests/test_nbs/.test_nb_2_meta.ipynb")
190 | cell = copy.deepcopy(test_nb["cells"][0])
191 | cell["metadata"] = {"some key": "some value"}
192 | changed = clean_cell(
193 | cell,
194 | cfg=CleanConfig(
195 | clear_cell_metadata=True,
196 | ),
197 | )
198 | assert changed
199 | assert not cell["metadata"]
200 |
201 |
202 | def test_clean_nb():
203 | """test clean nb"""
204 | path = Path("tests/test_nbs")
205 | nb_path = path / ".test_nb_2_meta.ipynb"
206 | nb_clean = path / "test_nb_2_clean.ipynb"
207 | nb = read_nb(nb_path)
208 | assert nb["cells"][1]["execution_count"] == 1
209 | assert nb["cells"][1]["outputs"][0]["execution_count"] == 1
210 | assert nb["metadata"]
211 | result = clean_nb(nb, cfg=CleanConfig())
212 | assert result is True
213 | assert nb["cells"][1]["execution_count"] is None
214 | assert nb["cells"][1]["outputs"][0]["execution_count"] is None
215 | nb_clean = read_nb(nb_clean)
216 | assert nb == nb_clean
217 |
218 | # # try clean cleaned
219 | result = clean_nb(nb_clean, cfg=CleanConfig())
220 | assert not result
221 |
222 | # # clean metadata, leave execution_count
223 | nb = read_nb(nb_path)
224 | result = clean_nb(
225 | nb,
226 | cfg=CleanConfig(clear_execution_count=False),
227 | )
228 | assert result
229 | assert nb["cells"][1]["execution_count"] == 1
230 | assert nb["cells"][1]["outputs"][0]["execution_count"] == 1
231 | assert nb["metadata"] == nb_clean["metadata"]
232 |
233 | # clean nb metadata, leave cells metadata
234 | nb = read_nb(nb_path)
235 | nb["cells"][1]["metadata"] = {"some key": "some value"}
236 | result = clean_nb(nb, CleanConfig(clear_execution_count=False))
237 | assert result
238 | assert nb["metadata"] == nb_clean["metadata"]
239 | assert nb["cells"][1]["metadata"] == {"some key": "some value"}
240 | assert nb["cells"][1]["execution_count"] == 1
241 |
242 | # clean cells metadata, leave nb metadata
243 | nb = read_nb(nb_path)
244 | nb_meta = copy.deepcopy(nb["metadata"])
245 | result = clean_nb(nb, CleanConfig(clear_nb_metadata=False))
246 | assert result
247 | assert nb["metadata"] == nb_meta
248 | assert nb["cells"][1]["execution_count"] is None
249 |
250 |
251 | def test_clean_nb_file(tmp_path: Path, capsys: CaptureFixture[str]):
252 | """test clean nb file"""
253 | path = Path("tests/test_nbs")
254 | nb_name = ".test_nb_2_meta.ipynb"
255 | nb_clean = read_nb(path / "test_nb_2_clean.ipynb")
256 |
257 | # prepare temp test notebook
258 | nb_source = read_nb(path / nb_name)
259 | test_nb_path = write_nb(nb_source, tmp_path / nb_name)
260 |
261 | # clean meta, leave execution_count
262 | # first lets dry run
263 | cleaned, errors = clean_nb_file(
264 | test_nb_path,
265 | cfg=CleanConfig(
266 | clear_execution_count=False,
267 | dry_run=True,
268 | ),
269 | )
270 | assert len(cleaned) == 1
271 | assert len(errors) == 0
272 | nb = read_nb(cleaned[0])
273 | assert nb["metadata"] == nb_source["metadata"]
274 | assert nb["cells"][1]["execution_count"] == 1
275 | assert nb["cells"][1]["outputs"][0]["execution_count"] == 1
276 |
277 | # clean meta, leave execution_count
278 | cleaned, errors = clean_nb_file(
279 | test_nb_path,
280 | cfg=CleanConfig(clear_execution_count=False),
281 | )
282 | assert len(cleaned) == 1
283 | assert len(errors) == 0
284 | nb = read_nb(cleaned[0])
285 | assert nb["metadata"] == nb_clean["metadata"]
286 | assert nb["cells"][1]["execution_count"] == 1
287 | assert nb["cells"][1]["outputs"][0]["execution_count"] == 1
288 |
289 | # clean meta, execution_count
290 | # path as list
291 | cleaned, errors = clean_nb_file([test_nb_path], CleanConfig())
292 | assert len(cleaned) == 1
293 | nb = read_nb(cleaned[0])
294 | assert nb == nb_clean
295 |
296 | # try clean cleaned
297 | cleaned, errors = clean_nb_file(test_nb_path, CleanConfig())
298 | assert len(cleaned) == 0
299 | assert len(errors) == 0
300 |
301 |
302 | def test_clean_nb_file_errors(capsys: CaptureFixture[str], tmp_path: Path):
303 | """test clean_nb_file, errors"""
304 | # not existing nb
305 | path = tmp_path / "wrong_name"
306 | cleaned, errors = clean_nb_file(path)
307 | assert len(cleaned) == 0
308 | assert len(errors) == 1
309 | assert errors[0] == path
310 | captured = capsys.readouterr()
311 | assert not captured.out
312 | assert not captured.err
313 |
314 | # not valid nb
315 | with path.open("w", encoding="utf-8") as fh:
316 | fh.write("wrong nb")
317 | cleaned, errors = clean_nb_file(path)
318 | assert len(cleaned) == 0
319 | assert len(errors) == 1
320 | assert errors[0].name == "wrong_name"
321 |
322 | captured = capsys.readouterr()
323 | assert not captured.out
324 | assert not captured.err
325 |
326 |
327 | def test_clean_nb_file_timestamp(tmp_path: Path):
328 | """test clean_nb_file, timestamp"""
329 | path = Path("tests/test_nbs")
330 | nb_name = ".test_nb_2_meta.ipynb"
331 | nb_stat = (path / nb_name).stat()
332 |
333 | # prepare temp test notebook, set timestamp
334 | test_nb_path = write_nb(read_nb(path / nb_name), tmp_path / nb_name)
335 | os.utime(test_nb_path, (nb_stat.st_atime, nb_stat.st_mtime))
336 | test_nb_stat = test_nb_path.stat()
337 | assert test_nb_stat.st_atime == nb_stat.st_atime
338 | assert test_nb_stat.st_mtime == nb_stat.st_mtime
339 |
340 | cleaned, errors = clean_nb_file(test_nb_path)
341 | assert len(cleaned) == 1
342 | assert len(errors) == 0
343 | cleaned_stat = cleaned[0].stat()
344 | assert True
345 | assert cleaned_stat.st_mtime == test_nb_stat.st_mtime
346 |
347 | # dont preserve timestamp
348 | test_nb_path = write_nb(read_nb(path / nb_name), tmp_path / nb_name)
349 | os.utime(test_nb_path, (nb_stat.st_atime, nb_stat.st_mtime))
350 | cleaned, errors = clean_nb_file(test_nb_path, CleanConfig(preserve_timestamp=False))
351 | assert len(cleaned) == 1
352 | assert len(errors) == 0
353 | cleaned_stat = cleaned[0].stat()
354 | assert True
355 | assert cleaned_stat.st_mtime != nb_stat.st_mtime
356 |
--------------------------------------------------------------------------------
/tests/test_get_nbnames.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from nbmetaclean.helpers import get_nb_names, get_nb_names_from_list, is_notebook
4 |
5 |
6 | def test_is_notebook():
7 | """test is_notebook"""
8 | assert is_notebook(Path("tests/test_nbs/test_nb_1.ipynb"))
9 | assert not is_notebook(Path("tests/test_nbs/test_nb_1.py"))
10 | assert not is_notebook(Path("tests/test_nbs/.test_nb_2_meta.ipynb"))
11 | assert is_notebook(Path("tests/test_nbs/.test_nb_2_meta.ipynb"), hidden=True)
12 |
13 |
14 | def test_get_nb_names():
15 | """test get_nb_names"""
16 | path = Path("tests/test_nbs")
17 | # filename as argument
18 | file = path / "test_nb_1.ipynb"
19 | names = get_nb_names(file)
20 | assert len(names) == 1
21 | names.sort(key=lambda x: x.name)
22 | assert names[0] == file
23 | # filename but not nb
24 | names = get_nb_names("tests/test_clean.py")
25 | assert len(names) == 0
26 |
27 | # path as argument
28 | names = get_nb_names(path)
29 | assert len(names) == 3
30 | names.sort(key=lambda x: x.name)
31 | assert names[0] == file
32 | # path as argument. add hidden files
33 | names = get_nb_names(path, hidden=True)
34 | assert len(names) == 4
35 | try:
36 | get_nb_names("wrong_name")
37 | assert False
38 | except FileNotFoundError as ex:
39 | assert True
40 | assert str(ex) == "wrong_name not exists!"
41 |
42 |
43 | def test_get_nb_names_recursive_hidden(tmp_path: Path):
44 | """test get_nb_names recursive hidden"""
45 | suffix = ".ipynb"
46 | # add one nb
47 | with open((tmp_path / "tst").with_suffix(suffix), "w", encoding="utf-8") as _:
48 | pass
49 | files = get_nb_names(tmp_path)
50 | assert len(files) == 1
51 |
52 | # add hidden nb
53 | with open((tmp_path / ".tst").with_suffix(suffix), "w", encoding="utf-8") as _:
54 | pass
55 | files = get_nb_names(tmp_path)
56 | assert len(files) == 1
57 | files = get_nb_names(tmp_path, hidden=True)
58 | assert len(files) == 2
59 | # add simple file
60 | with open((tmp_path / "simple"), "w", encoding="utf-8") as _:
61 | pass
62 | files = get_nb_names(tmp_path)
63 | assert len(files) == 1
64 |
65 | # add dir with one nb, hidden nb
66 | new_dir = tmp_path / "new_dir"
67 | new_dir.mkdir()
68 | with open((new_dir / "tst").with_suffix(suffix), "w", encoding="utf-8") as _:
69 | pass
70 | with open((new_dir / ".tst").with_suffix(suffix), "w", encoding="utf-8") as _:
71 | pass
72 | files = get_nb_names(tmp_path)
73 | assert len(files) == 2
74 | files = get_nb_names(tmp_path, hidden=True)
75 | assert len(files) == 4
76 |
77 | files = get_nb_names(tmp_path, recursive=False)
78 | assert len(files) == 1
79 |
80 | # add hidden dir
81 | hid_dir = tmp_path / ".hid_dir"
82 | hid_dir.mkdir()
83 | with open((hid_dir / "tst").with_suffix(suffix), "w", encoding="utf-8") as _:
84 | pass
85 | with open((hid_dir / ".tst").with_suffix(suffix), "w", encoding="utf-8") as _:
86 | pass
87 | files = get_nb_names(tmp_path, hidden=True)
88 | assert len(files) == 6
89 | files = get_nb_names(tmp_path)
90 | assert len(files) == 2
91 |
92 | # add checkpoint dir and file
93 | # files at this dir will be skipped
94 | checkpoint_dir = tmp_path / ".ipynb_checkpoints"
95 | checkpoint_dir.mkdir()
96 | with open(
97 | (checkpoint_dir / "nb-checkpoint").with_suffix(suffix), "w", encoding="utf-8"
98 | ) as _:
99 | pass
100 | with open(
101 | (checkpoint_dir / "some_nb").with_suffix(suffix), "w", encoding="utf-8"
102 | ) as _:
103 | pass
104 | files = get_nb_names(tmp_path)
105 | assert len(files) == 2
106 | files = get_nb_names(tmp_path, hidden=True)
107 | assert len(files) == 6
108 |
109 |
110 | def test_get_nb_names_from_list():
111 | """test get_nb_names_from_list"""
112 | path = Path("tests/test_nbs")
113 | # filename as argument
114 | file = path / "test_nb_1.ipynb"
115 | names = get_nb_names_from_list(file)
116 | assert len(names) == 1
117 | assert names[0] == file
118 |
119 | # filename as list
120 | names = get_nb_names_from_list([file])
121 | assert len(names) == 1
122 | assert names[0] == file
123 |
124 | # filename but not nb
125 | names = get_nb_names_from_list("tests/test_clean.py")
126 | assert len(names) == 0
127 |
128 | # path as list, not all notebooks
129 | names = get_nb_names_from_list([file, "wrong_name", "tests/test_clean.py"])
130 | assert len(names) == 1
131 | assert names[0] == file
132 |
133 | # folder as argument
134 | names = get_nb_names_from_list(path)
135 | assert len(names) == 3
136 | names.sort(key=lambda x: x.name)
137 | assert names[0] == file
138 | # path as argument. add hidden files
139 | names = get_nb_names(path, hidden=True)
140 | assert len(names) == 4
141 | names = get_nb_names_from_list("wrong_name")
142 | assert len(names) == 0
143 |
--------------------------------------------------------------------------------
/tests/test_nbs/.test_nb_2_meta.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "markdown cell source"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [
15 | {
16 | "data": {
17 | "text/plain": [
18 | "2"
19 | ]
20 | },
21 | "execution_count": 1,
22 | "metadata": {},
23 | "output_type": "execute_result"
24 | }
25 | ],
26 | "source": [
27 | "1 + 1"
28 | ]
29 | }
30 | ],
31 | "metadata": {
32 | "authors": [
33 | {
34 | "github": "https://github.com/ayasyrev",
35 | "name": "Andrei Yasyrev"
36 | }
37 | ],
38 | "kernelspec": {
39 | "display_name": "nbmetaclean",
40 | "language": "python",
41 | "name": "python3"
42 | },
43 | "language_info": {
44 | "codemirror_mode": {
45 | "name": "ipython",
46 | "version": 3
47 | },
48 | "file_extension": ".py",
49 | "mimetype": "text/x-python",
50 | "name": "python",
51 | "nbconvert_exporter": "python",
52 | "pygments_lexer": "ipython3",
53 | "version": "3.11.6"
54 | }
55 | },
56 | "nbformat": 4,
57 | "nbformat_minor": 2
58 | }
59 |
--------------------------------------------------------------------------------
/tests/test_nbs/test_nb_1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": []
7 | },
8 | {
9 | "cell_type": "code",
10 | "execution_count": null,
11 | "metadata": {},
12 | "outputs": [],
13 | "source": []
14 | }
15 | ],
16 | "metadata": {
17 | "authors": [
18 | {
19 | "github": "https://github.com/ayasyrev",
20 | "name": "Andrei Yasyrev"
21 | }
22 | ],
23 | "language_info": {
24 | "name": "python"
25 | }
26 | },
27 | "nbformat": 4,
28 | "nbformat_minor": 2
29 | }
30 |
--------------------------------------------------------------------------------
/tests/test_nbs/test_nb_2_clean.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "markdown cell source"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [
15 | {
16 | "data": {
17 | "text/plain": [
18 | "2"
19 | ]
20 | },
21 | "execution_count": null,
22 | "metadata": {},
23 | "output_type": "execute_result"
24 | }
25 | ],
26 | "source": [
27 | "1 + 1"
28 | ]
29 | }
30 | ],
31 | "metadata": {
32 | "authors": [
33 | {
34 | "github": "https://github.com/ayasyrev",
35 | "name": "Andrei Yasyrev"
36 | }
37 | ],
38 | "language_info": {
39 | "name": "python"
40 | }
41 | },
42 | "nbformat": 4,
43 | "nbformat_minor": 2
44 | }
45 |
--------------------------------------------------------------------------------
/tests/test_nbs/test_nb_3_ec.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "nb for check execution count"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": []
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": null,
20 | "metadata": {},
21 | "outputs": [
22 | {
23 | "data": {
24 | "text/plain": [
25 | "2"
26 | ]
27 | },
28 | "execution_count": null,
29 | "metadata": {},
30 | "output_type": "execute_result"
31 | }
32 | ],
33 | "source": [
34 | "1 + 1"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "metadata": {},
41 | "outputs": [
42 | {
43 | "data": {
44 | "text/plain": [
45 | "4"
46 | ]
47 | },
48 | "execution_count": null,
49 | "metadata": {},
50 | "output_type": "execute_result"
51 | }
52 | ],
53 | "source": [
54 | "2 + 2"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": null,
60 | "metadata": {},
61 | "outputs": [],
62 | "source": []
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": null,
67 | "metadata": {},
68 | "outputs": [
69 | {
70 | "data": {
71 | "text/plain": [
72 | "6"
73 | ]
74 | },
75 | "execution_count": null,
76 | "metadata": {},
77 | "output_type": "execute_result"
78 | }
79 | ],
80 | "source": [
81 | "3 + 3"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "metadata": {},
88 | "outputs": [],
89 | "source": []
90 | }
91 | ],
92 | "metadata": {
93 | "authors": [
94 | {
95 | "github": "https://github.com/ayasyrev",
96 | "name": "Andrei Yasyrev"
97 | }
98 | ],
99 | "language_info": {
100 | "name": "python"
101 | }
102 | },
103 | "nbformat": 4,
104 | "nbformat_minor": 2
105 | }
106 |
--------------------------------------------------------------------------------
/tests/test_read_write.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from nbmetaclean.helpers import read_nb, write_nb
4 |
5 |
6 | def test_read_nb():
7 | """test read notebook"""
8 | file = Path("tests/test_nbs/test_nb_1.ipynb")
9 | nb = read_nb(file)
10 | assert isinstance(nb, dict)
11 | assert nb["metadata"]["language_info"] == {"name": "python"}
12 | assert nb["metadata"]["authors"][0]["name"] == "Andrei Yasyrev"
13 | assert nb["nbformat"] == 4
14 | assert nb["nbformat_minor"] == 2
15 | cells = nb["cells"]
16 | assert isinstance(cells, list)
17 | assert len(cells) == 2
18 | # markdown
19 | assert cells[0]["cell_type"] == "markdown"
20 | assert cells[0]["source"] == []
21 | assert cells[0]["metadata"] == {}
22 | # code
23 | assert cells[1]["cell_type"] == "code"
24 | assert cells[1]["source"] == []
25 | assert cells[1]["execution_count"] is None
26 | assert cells[1]["metadata"] == {}
27 | assert cells[1]["outputs"] == []
28 |
29 |
30 | def test_write_nb(tmp_path: Path):
31 | """test write notebook"""
32 | file = Path("tests/test_nbs/test_nb_1.ipynb")
33 | nb = read_nb(file)
34 | write_nb(nb, tmp_path / file.name)
35 | with open(tmp_path / file.name, "r", encoding="utf-8") as fh:
36 | res_text = fh.read()
37 | with open(file, "r", encoding="utf-8") as fh:
38 | org_text = fh.read()
39 | assert res_text == org_text
40 |
41 | # write with name w/o suffix
42 | result = write_nb(nb, tmp_path / "test_nb_1")
43 | assert result == tmp_path / "test_nb_1.ipynb"
44 |
45 | # write with stat
46 | stat = file.stat()
47 | timestamp = (stat.st_atime, stat.st_mtime)
48 | result = write_nb(nb, tmp_path / "test_nb_1", timestamp=timestamp)
49 | res_stat = result.stat()
50 | assert timestamp == (res_stat.st_atime, res_stat.st_mtime)
51 |
52 |
53 | def test_read_nb_errors(tmp_path: Path):
54 | """test read notebook not exist or invalid"""
55 | # not valid
56 | with open(tmp_path / "test.ipynb", "w", encoding="utf-8") as fh:
57 | fh.write("invalid")
58 | assert read_nb(tmp_path / "test.ipynb") is None
59 |
60 | # not exist
61 | assert read_nb(tmp_path / "test_nb_1.ipynb") is None
62 |
63 | # not file
64 | assert read_nb(tmp_path) is None
65 |
--------------------------------------------------------------------------------