├── tests ├── test_output │ └── .gitkeep ├── test_set_one │ ├── data.csv │ ├── plot.png │ ├── lalala.csv │ ├── output.xlsx │ ├── raw │ │ └── raw_data_in.dta │ ├── processing.py │ ├── report_gen.py │ ├── analysis.py │ └── ins_and_outs_file.py ├── test_set_two │ ├── data │ │ └── input.csv │ ├── visualisation.py │ ├── model_solver.py │ ├── data_processing.py │ ├── slides.qmd │ └── nb_example.ipynb ├── test_set_three │ └── db_analysis.py ├── test_all.py ├── test_reporter_integration.py ├── test_qmd_parser.py ├── test_qmd_integration.py ├── test_jupyter_integration.py └── test_analyser.py ├── docs ├── styles.css ├── favicon.png ├── _quarto.yml ├── logo.svg ├── objects.json ├── output_options.ipynb ├── contributing.qmd ├── index.ipynb └── output.svg ├── codecov.yml ├── .github ├── ISSUE_TEMPLATE.md ├── release-drafter.yml ├── workflows │ ├── labeler.yml │ ├── release.yml │ └── tests.yml └── labels.yml ├── Makefile ├── LICENSE ├── src └── smartrappy │ ├── __init__.py │ ├── notebook_parser.py │ ├── __main__.py │ ├── qmd_parser.py │ ├── models.py │ └── reporters.py ├── version_bumper.py ├── pyproject.toml ├── .pre-commit-config.yaml ├── create_readme.py ├── README.md ├── .gitignore └── noxfile.py /tests/test_output/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_set_one/data.csv: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_set_one/plot.png: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_set_one/lalala.csv: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_set_one/output.xlsx: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/styles.css: -------------------------------------------------------------------------------- 1 | /* css styles */ 2 | -------------------------------------------------------------------------------- /tests/test_set_one/raw/raw_data_in.dta: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_set_two/data/input.csv: -------------------------------------------------------------------------------- 1 | ,value 2 | 1,1 3 | 2,2 4 | 3,3 5 | -------------------------------------------------------------------------------- /docs/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aeturrell/smartrappy/HEAD/docs/favicon.png -------------------------------------------------------------------------------- /tests/test_set_one/processing.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | df = pd.read_csv("data.csv") 4 | df.to_excel("output.xlsx") 5 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | coverage: 3 | status: 4 | project: 5 | default: 6 | target: "96" 7 | patch: 8 | default: 9 | target: "96" 10 | -------------------------------------------------------------------------------- /tests/test_set_one/report_gen.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | 4 | df = pd.read_excel("output.xlsx") 5 | plt.plot(df["x"], df["y"]) 6 | plt.savefig("plot.png") 7 | -------------------------------------------------------------------------------- /tests/test_set_one/analysis.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pandas as pd 4 | 5 | df = pd.read_csv("data.csv") 6 | 7 | 8 | def an_example_that_is_imported(): 9 | print("hello") 10 | 11 | 12 | df_raw_in_data = pd.read_dta(Path("raw/raw_data_in.dta")) 13 | -------------------------------------------------------------------------------- /tests/test_set_two/visualisation.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | from data_processing import process_data 4 | 5 | 6 | def create_plots(): 7 | process_data() 8 | df = pd.read_csv("data/processed.csv") 9 | plt.plot(df["processed"]) 10 | plt.savefig("output.png") 11 | -------------------------------------------------------------------------------- /tests/test_set_one/ins_and_outs_file.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | 4 | df = pd.read_csv("lalala.csv") 5 | 6 | with open("text.txt", "w") as f: 7 | f.write("blah") 8 | 9 | df.to_csv("out.csv") 10 | 11 | fig, ax = plt.subplots() 12 | ax.plot([1, 2, 4], [3, 4, 5]) 13 | plt.savefig("out_figure.svg") 14 | -------------------------------------------------------------------------------- /tests/test_set_two/model_solver.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | with open(Path("equation.tex"), "w") as f: 4 | f.write( 5 | "$${\displaystyle {\frac {\partial f_{\alpha }}{\partial t}}+\mathbf {v} _{\alpha }\cdot {\frac {\partial f_{\alpha }}{\partial \mathbf {x} }}+{\frac {q_{\alpha }\mathbf {E} }{m_{\alpha }}}\cdot {\frac {\partial f_{\alpha }}{\partial \mathbf {v} }}=0,}$$" 6 | ) 7 | f.close() 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Issue 2 | 3 | - smartrappy version: 4 | - Python version: 5 | - Operating System: 6 | 7 | ### Description 8 | 9 | Describe what you were trying to get done. 10 | Tell us what happened, what went wrong, and what you expected to happen. 11 | 12 | ### What I Did 13 | 14 | ```python 15 | Paste the command(s) you ran and the output. 16 | If there was a crash, please include the traceback here. 17 | ``` 18 | -------------------------------------------------------------------------------- /tests/test_set_two/data_processing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pyodbc 4 | 5 | 6 | def process_data(): 7 | df = pd.read_csv("data/input.csv") 8 | df["processed"] = df["value"].apply(np.sqrt) 9 | df.to_csv("data/processed.csv") 10 | 11 | 12 | mssql_conn = pyodbc.connect( 13 | "DRIVER={SQL Server};SERVER=myserver;DATABASE=mydatabase;UID=user;PWD=password" 14 | ) 15 | df_db = pd.read_sql("SELECT TOP 10 * FROM customers", mssql_conn) 16 | -------------------------------------------------------------------------------- /tests/test_set_two/slides.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Example slides" 3 | subtitle: "" 4 | format: 5 | clean-revealjs: 6 | output-file: "paper_slides.html" 7 | --- 8 | 9 | # Motivation {background-color="#770077" transition="fade-in fade-out"} 10 | 11 | ## A slide with a figure 12 | 13 | ![](/output.png) 14 | 15 | ## A slide with a latex include 16 | 17 | {{< include /equation.tex >}} 18 | 19 | ## A slide with an alternative equation 20 | 21 | {{< include /alternative_equation.tex >}} 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # This makes the documentation for smartrappy 2 | # In practice, though, done in the GitHub Action (release) 3 | .PHONY: all site publish 4 | 5 | all: site 6 | 7 | # Build the github pages site 8 | site: 9 | uv pip install -e . 10 | uv run quartodoc build --config docs/_quarto.yml 11 | cd docs; uv run quarto render --execute 12 | rm docs/.gitignore 13 | uv run python create_readme.py 14 | uv run nbstripout docs/*.ipynb 15 | uv run pre-commit run --all-files 16 | 17 | 18 | publish: 19 | uv pip install -e . 20 | uv run quartodoc build --config docs/_quarto.yml 21 | cd docs;uv run quarto render --execute 22 | cd docs;uv run quarto publish gh-pages --no-render 23 | rm docs/.gitignore 24 | uv run python create_readme.py 25 | uv run nbstripout docs/*.ipynb 26 | uv run pre-commit run --all-files 27 | -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | categories: 2 | - title: ":boom: Breaking Changes" 3 | label: "breaking" 4 | - title: ":rocket: Features" 5 | label: "enhancement" 6 | - title: ":fire: Removals and Deprecations" 7 | label: "removal" 8 | - title: ":beetle: Fixes" 9 | label: "bug" 10 | - title: ":racehorse: Performance" 11 | label: "performance" 12 | - title: ":rotating_light: Testing" 13 | label: "testing" 14 | - title: ":construction_worker: Continuous Integration" 15 | label: "ci" 16 | - title: ":books: Documentation" 17 | label: "documentation" 18 | - title: ":hammer: Refactoring" 19 | label: "refactoring" 20 | - title: ":lipstick: Style" 21 | label: "style" 22 | - title: ":package: Dependencies" 23 | labels: 24 | - "dependencies" 25 | - "build" 26 | template: | 27 | ## Changes 28 | 29 | $CHANGES 30 | -------------------------------------------------------------------------------- /.github/workflows/labeler.yml: -------------------------------------------------------------------------------- 1 | 2 | name: labeler 3 | 4 | on: 5 | push: 6 | branches: 7 | - 'main' 8 | paths: 9 | - '.github/labels.yml' 10 | - '.github/workflows/labels.yml' 11 | pull_request: 12 | paths: 13 | - '.github/labels.yml' 14 | - '.github/workflows/labels.yml' 15 | 16 | jobs: 17 | labeler: 18 | runs-on: ubuntu-latest 19 | permissions: 20 | contents: read 21 | issues: write 22 | steps: 23 | - name: Checkout 24 | uses: actions/checkout@v4 25 | - name: Run Labeler 26 | uses: crazy-max/ghaction-github-labeler@v5 27 | with: 28 | skip-delete: true 29 | github-token: ${{ secrets.GITHUB_TOKEN }} 30 | yaml-file: .github/labels.yml 31 | dry-run: ${{ github.event_name == 'pull_request' }} 32 | exclude: | 33 | help* 34 | *issue 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Arthur Turrell 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docs/_quarto.yml: -------------------------------------------------------------------------------- 1 | project: 2 | type: website 3 | execute-dir: project 4 | 5 | website: 6 | title: "smartrappy" 7 | favicon: favicon.png 8 | twitter-card: true 9 | navbar: 10 | left: 11 | - href: index.ipynb 12 | text: Home 13 | - href: output_options.ipynb 14 | text: Other output options 15 | - text: "Reference" 16 | file: reference/index.qmd 17 | - contributing.qmd 18 | 19 | format: 20 | html: 21 | theme: flatly 22 | css: styles.css 23 | toc: true 24 | 25 | # tell quarto to read the generated sidebar 26 | metadata-files: 27 | - _sidebar.yml 28 | 29 | 30 | quartodoc: 31 | # the name used to import the package you want to create reference docs for 32 | package: smartrappy 33 | parser: google 34 | 35 | # write sidebar data to this file 36 | sidebar: _sidebar.yml 37 | 38 | sections: 39 | - title: "Function reference" 40 | desc: "What smartrappy's functions do" 41 | contents: 42 | # the functions being documented in the package. 43 | # you can refer to anything: class methods, modules, etc.. 44 | - analyse_project 45 | - ConsoleReporter 46 | - JsonReporter 47 | - MermaidReporter 48 | - GraphvizReporter 49 | -------------------------------------------------------------------------------- /tests/test_set_three/db_analysis.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | import pandas as pd 4 | import pyodbc 5 | from sqlalchemy import create_engine 6 | 7 | # SQLite connection (simplest to test with since it doesn't require a server) 8 | sqlite_conn = sqlite3.connect("example.db") 9 | df1 = pd.read_sql("SELECT * FROM users", sqlite_conn) 10 | 11 | # Write to SQLite using pandas 12 | df2 = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) 13 | df2.to_sql("new_table", sqlite_conn, if_exists="replace") 14 | 15 | # SQLAlchemy with SQLite 16 | engine = create_engine("sqlite:///another_example.db") 17 | df3 = pd.read_sql_table("some_table", engine) 18 | 19 | # MS SQL Server via pyodbc 20 | mssql_conn = pyodbc.connect( 21 | "DRIVER={SQL Server};SERVER=myserver;DATABASE=mydatabase;UID=user;PWD=password" 22 | ) 23 | df4 = pd.read_sql("SELECT TOP 10 * FROM customers", mssql_conn) 24 | 25 | # MS SQL Server via SQLAlchemy 26 | mssql_engine = create_engine( 27 | "mssql+pyodbc://user:password@myserver/mydatabase?driver=SQL+Server" 28 | ) 29 | df5 = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) 30 | df5.to_sql("new_mssql_table", mssql_engine, if_exists="replace") 31 | 32 | # PostgreSQL via SQLAlchemy 33 | pg_engine = create_engine("postgresql://user:password@localhost:5432/pgdb") 34 | df6 = pd.read_sql_query("SELECT * FROM pg_tables", pg_engine) 35 | 36 | df6.to_csv("out.csv") 37 | -------------------------------------------------------------------------------- /src/smartrappy/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | smartrappy 3 | ------------------------------------ 4 | Smart reproducible analytical pipeline execution 5 | """ 6 | 7 | from importlib.metadata import PackageNotFoundError, version 8 | 9 | try: 10 | __version__ = version("smartrappy") 11 | except PackageNotFoundError: 12 | __version__ = "unknown" 13 | 14 | # Import core components first 15 | # Import CLI functions last to avoid circular imports 16 | from smartrappy.analyser import analyse_project 17 | from smartrappy.models import ( 18 | DatabaseInfo, 19 | Edge, 20 | FileInfo, 21 | FileStatus, 22 | ModuleImport, 23 | Node, 24 | NodeType, 25 | ProjectModel, 26 | ) 27 | from smartrappy.qmd_parser import ( 28 | analyse_qmd_file, 29 | extract_markdown_resources, 30 | extract_python_chunks, 31 | ) 32 | from smartrappy.reporters import ( 33 | ConsoleReporter, 34 | GraphvizReporter, 35 | JsonReporter, 36 | MermaidReporter, 37 | Reporter, 38 | get_reporter, 39 | ) 40 | 41 | __all__ = [ 42 | # Main functions 43 | "analyse_project", 44 | "analyse_qmd_file", 45 | "extract_python_chunks", 46 | "extract_markdown_resources", # New export 47 | # Models 48 | "DatabaseInfo", 49 | "Edge", 50 | "FileInfo", 51 | "FileStatus", 52 | "ModuleImport", 53 | "Node", 54 | "NodeType", 55 | "ProjectModel", 56 | # Reporters 57 | "Reporter", 58 | "ConsoleReporter", 59 | "GraphvizReporter", 60 | "MermaidReporter", 61 | "JsonReporter", 62 | "get_reporter", 63 | ] 64 | -------------------------------------------------------------------------------- /version_bumper.py: -------------------------------------------------------------------------------- 1 | # /// script 2 | # dependencies = [ 3 | # "toml>=0.10.2" 4 | # ] 5 | # /// 6 | import subprocess 7 | from typing import Literal 8 | 9 | import toml 10 | 11 | 12 | def bump_version(part: Literal["major", "minor", "patch"] = "patch") -> None: 13 | """Bump version in pyproject.toml file. 14 | 15 | Args: 16 | part (Literal["major", "minor", "patch"], optional): Version part to increment. Defaults to "patch". 17 | 18 | Raises: 19 | ValueError: If part is not 'major', 'minor', or 'patch'. 20 | """ 21 | file_path = "pyproject.toml" 22 | 23 | with open(file_path, "r") as f: 24 | pyproject = toml.load(f) 25 | 26 | version = pyproject["project"]["version"] 27 | major, minor, patch = map(int, version.split(".")) 28 | 29 | if part == "major": 30 | major += 1 31 | minor = 0 32 | patch = 0 33 | elif part == "minor": 34 | minor += 1 35 | patch = 0 36 | elif part == "patch": 37 | patch += 1 38 | else: 39 | raise ValueError("Invalid part value. Choose 'major', 'minor', or 'patch'.") 40 | 41 | new_version = f"{major}.{minor}.{patch}" 42 | subprocess.run( 43 | [ 44 | "uvx", 45 | "--from=toml-cli", 46 | "toml", 47 | "set", 48 | "--toml-path=pyproject.toml", 49 | "project.version", 50 | new_version, 51 | ] 52 | ) 53 | 54 | print(f"Version bumped to {major}.{minor}.{patch}") 55 | 56 | 57 | if __name__ == "__main__": 58 | bump_version() 59 | -------------------------------------------------------------------------------- /tests/test_set_two/nb_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "3fb05507", 6 | "metadata": {}, 7 | "source": [ 8 | "This is a markdown cell." 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "af374a24", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "from pathlib import Path\n", 19 | "\n", 20 | "import pandas as pd\n", 21 | "\n", 22 | "df = pd.read_csv(Path(\"data/input.csv\"))" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "id": "dc383f23", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "with open(Path(\"alternative_equation.tex\"), \"w\") as f:\n", 33 | " f.write(\n", 34 | " \"$${\\displaystyle {\\frac {\\partial f_{\\alpha }}{\\partial t}}+\\mathbf {v} _{\\alpha }\\cdot {\\frac {\\partial f_{\\alpha }}{\\partial \\mathbf {x} }}+{\\frac {q_{\\alpha }\\mathbf {E} }{m_{\\alpha }}}\\cdot {\\frac {\\partial f_{\\alpha }}{\\partial \\mathbf {v} }}=0,}$$\"\n", 35 | " )\n", 36 | "f.close()" 37 | ] 38 | } 39 | ], 40 | "metadata": { 41 | "kernelspec": { 42 | "display_name": ".venv", 43 | "language": "python", 44 | "name": "python3" 45 | }, 46 | "language_info": { 47 | "codemirror_mode": { 48 | "name": "ipython", 49 | "version": 3 50 | }, 51 | "file_extension": ".py", 52 | "mimetype": "text/x-python", 53 | "name": "python", 54 | "nbconvert_exporter": "python", 55 | "pygments_lexer": "ipython3", 56 | "version": "3.12.0" 57 | } 58 | }, 59 | "nbformat": 4, 60 | "nbformat_minor": 5 61 | } 62 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "smartrappy" 3 | version = "0.0.8" 4 | description = "Smart reproducible analytical pipeline visualisation." 5 | authors = [{name="Arthur Turrell", email="anon@anon.gmail.com"}] 6 | readme = "README.md" 7 | license = "MIT" 8 | classifiers = [ 9 | "Development Status :: 2 - Pre-Alpha", 10 | "Programming Language :: Python :: 3.11", 11 | "Programming Language :: Python :: 3.12", 12 | "Programming Language :: Python :: 3.10", 13 | ] 14 | requires-python = ">=3.10" 15 | dependencies = [ 16 | "click>=8.1.8", 17 | "graphviz>=0.20.3", 18 | "matplotlib>=3.10.0", 19 | "pandas>=2.2.3", 20 | "rich>=13.9.4", 21 | ] 22 | 23 | [dependency-groups] 24 | dev = [ 25 | "autopep8>=2.3.1", 26 | "coverage[toml]>=7.6.9", 27 | "jupyter>=1.1.1", 28 | "nbstripout>=0.8.1", 29 | "nox>=2024.10.9", 30 | "pre-commit>=4.0.1", 31 | "pre-commit-hooks>=5.0.0", 32 | "pygments>=2.18.0", 33 | "pytest>=8.3.4", 34 | "quartodoc>=0.9.1", 35 | "ruff>=0.8.3", 36 | "toml>=0.10.2", 37 | "typing-extensions>=4.12.2", 38 | "xdoctest[colors]>=1.2.0", 39 | "ipykernel>=6.29.5", 40 | "pydoclint>=0.6.0", 41 | "typeguard>=4.4.2", 42 | "pyodbc>=5.2.0", 43 | "sqlalchemy>=2.0.40", 44 | ] 45 | 46 | [project.scripts] 47 | smartrappy = "smartrappy.__main__:main" 48 | 49 | [tool.uv] 50 | package = true 51 | 52 | [tool.mypy] 53 | strict = false 54 | pretty = true 55 | show_column_numbers = true 56 | show_error_codes = true 57 | show_error_context = true 58 | ignore_missing_imports = true 59 | disallow_untyped_calls = false 60 | 61 | [tool.pydoclint] 62 | style = 'google' 63 | exclude = ["noxfile.py", "tests/", "docs/"] 64 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | # Ruff version. 4 | rev: v0.11.6 5 | hooks: 6 | # Run the linter. 7 | - id: ruff 8 | types_or: [python, pyi, jupyter] 9 | args: [ --fix ] 10 | - id: ruff 11 | types_or: [python, pyi, jupyter] 12 | name: sort imports with ruff 13 | args: [--select, I, --fix] 14 | # Run the formatter. 15 | - id: ruff-format 16 | types_or: [python, pyi, jupyter] 17 | - repo: local 18 | hooks: 19 | - id: check-added-large-files 20 | name: Check for added large files 21 | entry: check-added-large-files 22 | language: system 23 | - id: check-toml 24 | name: Check Toml 25 | entry: check-toml 26 | language: system 27 | types: [toml] 28 | - id: check-yaml 29 | exclude: docs/ 30 | name: Check Yaml 31 | entry: check-yaml 32 | language: system 33 | types: [yaml] 34 | - id: end-of-file-fixer 35 | exclude: docs/ 36 | name: Fix End of Files 37 | entry: end-of-file-fixer 38 | language: system 39 | types: [text] 40 | stages: [pre-commit, pre-push, manual] 41 | - id: trailing-whitespace 42 | exclude: docs/ 43 | name: Trim Trailing Whitespace 44 | entry: trailing-whitespace-fixer 45 | language: system 46 | types: [text] 47 | stages: [pre-commit, pre-push, manual] 48 | - repo: https://github.com/kynan/nbstripout 49 | rev: 0.4.0 50 | hooks: 51 | - id: nbstripout 52 | name: nbstripout 53 | description: "nbstripout: strip output from Jupyter and IPython notebooks" 54 | entry: nbstripout 55 | language: python 56 | types: [jupyter] 57 | - repo: https://github.com/jsh9/pydoclint 58 | rev: 0.6.0 59 | hooks: 60 | - id: pydoclint 61 | args: [--style=google, --config=pyproject.toml, src/] 62 | -------------------------------------------------------------------------------- /.github/labels.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Labels names are important as they are used by Release Drafter to decide 3 | # regarding where to record them in changelog or if to skip them. 4 | # 5 | # The repository labels will be automatically configured using this file and 6 | # the GitHub Action https://github.com/marketplace/actions/github-labeler. 7 | - name: breaking 8 | description: Breaking Changes 9 | color: bfd4f2 10 | - name: bug 11 | description: Something isn't working 12 | color: d73a4a 13 | - name: build 14 | description: Build System and Dependencies 15 | color: bfdadc 16 | - name: ci 17 | description: Continuous Integration 18 | color: 4a97d6 19 | - name: dependencies 20 | description: Pull requests that update a dependency file 21 | color: 0366d6 22 | - name: documentation 23 | description: Improvements or additions to documentation 24 | color: 0075ca 25 | - name: duplicate 26 | description: This issue or pull request already exists 27 | color: cfd3d7 28 | - name: enhancement 29 | description: New feature or request 30 | color: a2eeef 31 | - name: github_actions 32 | description: Pull requests that update Github_actions code 33 | color: "000000" 34 | - name: good first issue 35 | description: Good for newcomers 36 | color: 7057ff 37 | - name: help wanted 38 | description: Extra attention is needed 39 | color: 008672 40 | - name: invalid 41 | description: This doesn't seem right 42 | color: e4e669 43 | - name: performance 44 | description: Performance 45 | color: "016175" 46 | - name: python 47 | description: Pull requests that update Python code 48 | color: 2b67c6 49 | - name: question 50 | description: Further information is requested 51 | color: d876e3 52 | - name: refactoring 53 | description: Refactoring 54 | color: ef67c4 55 | - name: removal 56 | description: Removals and Deprecations 57 | color: 9ae7ea 58 | - name: style 59 | description: Style 60 | color: c120e5 61 | - name: testing 62 | description: Testing 63 | color: b1fc6f 64 | - name: wontfix 65 | description: This will not be worked on 66 | color: ffffff 67 | -------------------------------------------------------------------------------- /docs/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /tests/test_all.py: -------------------------------------------------------------------------------- 1 | """Tests for the refactored smartrappy architecture.""" 2 | 3 | import tempfile 4 | from pathlib import Path 5 | 6 | from smartrappy import analyse_project 7 | from smartrappy.reporters import ( 8 | ConsoleReporter, 9 | GraphvizReporter, 10 | JsonReporter, 11 | MermaidReporter, 12 | ) 13 | 14 | 15 | def test_analyse_project(): 16 | """Test that analyse_project works with the test directories.""" 17 | # Analyse the test set 18 | model = analyse_project("tests/test_set_one") 19 | 20 | # Check that the model contains expected data 21 | assert len(model.nodes) > 0 22 | assert len(model.edges) > 0 23 | assert "data.csv" in model.file_operations 24 | 25 | # Test with a different directory 26 | model2 = analyse_project("tests/test_set_two") 27 | assert len(model2.nodes) > 0 28 | assert "data/input.csv" in model2.file_operations 29 | 30 | 31 | def test_reporters(): 32 | """Test that all reporters can generate output.""" 33 | # Analyse a test set 34 | model = analyse_project("tests/test_set_one") 35 | 36 | with tempfile.TemporaryDirectory() as tmpdir: 37 | # Test console reporter 38 | console_reporter = ConsoleReporter() 39 | console_reporter.generate_report(model) # No output file needed 40 | 41 | # Test graphviz reporter 42 | graphviz_output = Path(tmpdir) / "graphviz_test" 43 | graphviz_reporter = GraphvizReporter() 44 | graphviz_reporter.generate_report(model, str(graphviz_output)) 45 | assert (graphviz_output.with_suffix(".pdf")).exists() 46 | 47 | # Test mermaid reporter 48 | mermaid_output = Path(tmpdir) / "mermaid_test.md" 49 | mermaid_reporter = MermaidReporter() 50 | mermaid_reporter.generate_report(model, str(mermaid_output)) 51 | assert mermaid_output.exists() 52 | 53 | # Test JSON reporter with console output 54 | json_reporter = JsonReporter() 55 | json_reporter.generate_report(model) # Should print to console 56 | 57 | # Test JSON reporter with file output 58 | json_output = Path(tmpdir) / "json_test.json" 59 | json_reporter.generate_report(model, str(json_output)) 60 | assert json_output.exists() 61 | 62 | 63 | if __name__ == "__main__": 64 | # Simple manual test 65 | test_analyse_project() 66 | test_reporters() 67 | print("All tests passed!") 68 | -------------------------------------------------------------------------------- /create_readme.py: -------------------------------------------------------------------------------- 1 | import re 2 | from pathlib import Path 3 | 4 | import nbformat 5 | 6 | 7 | def convert_notebook_to_markdown( 8 | notebook_path: Path, output_path: Path, num_cells: int = 5 9 | ) -> None: 10 | """Converts a Jupyter notebook to a markdown file, including only the first N cells. 11 | 12 | This function reads a Jupyter notebook, extracts a specified number of cells (default 5), 13 | and converts them to markdown format. Markdown cells are preserved as-is, while code cells 14 | are wrapped in Python code blocks. 15 | 16 | notebook_path (Path): Path to the input Jupyter notebook file (.ipynb) 17 | output_path (Path): Path where the output markdown file will be saved 18 | num_cells (int, optional): Number of cells to include from the start of the notebook. Defaults to 5. 19 | 20 | Returns: 21 | None: The function prints a confirmation message but does not return any value 22 | 23 | Notes: 24 | - The function creates the output directory if it doesn't exist 25 | - Code cells are wrapped in ```python blocks 26 | - Non-markdown and non-code cells are replaced with HTML comments 27 | """ 28 | # Load the notebook 29 | with open(notebook_path, "r", encoding="utf-8") as f: 30 | notebook = nbformat.read(f, as_version=4) 31 | 32 | # Get the first `num_cells` cells 33 | cells = notebook.cells[:num_cells] 34 | 35 | # Convert cells to markdown text 36 | md_lines = [] 37 | for cell in cells: 38 | if cell.cell_type == "markdown": 39 | md_lines.append(cell.source) 40 | elif cell.cell_type == "code": 41 | md_lines.append("```python\n" + cell.source + "\n```") 42 | else: 43 | md_lines.append(f"") 44 | 45 | # Join the lines 46 | markdown_text = "\n\n".join(md_lines) 47 | 48 | # Strip extraneous. 49 | # Remove special frontmatter 50 | markdown_text = markdown_text.replace("---\nexecute:\n echo: false\n---\n", "") 51 | 52 | # Remove width specifications 53 | markdown_text = re.sub(r"{width=\d+%}", "", markdown_text) 54 | 55 | # Remove leading whitespace and newlines before first hash 56 | markdown_text = re.sub(r"^\s*(?=#)", "", markdown_text) 57 | 58 | # Replace logo.svg with docs/logo.svg 59 | markdown_text = markdown_text.replace("logo.svg", "docs/logo.svg") 60 | 61 | # Write to output markdown file 62 | with open(output_path, "w", encoding="utf-8") as f: 63 | f.write(markdown_text) 64 | 65 | print(f"Markdown saved to: {output_path}") 66 | 67 | 68 | if __name__ == "__main__": 69 | # Example usage 70 | convert_notebook_to_markdown( 71 | Path("docs/index.ipynb"), Path("README.md"), num_cells=3 72 | ) 73 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | 2 | name: Release 3 | 4 | permissions: 5 | contents: write 6 | pages: write 7 | id-token: write 8 | 9 | on: 10 | push: 11 | branches: 12 | - main 13 | - master 14 | 15 | jobs: 16 | release: 17 | name: Release 18 | runs-on: ubuntu-latest 19 | environment: pypi 20 | steps: 21 | - name: Check out the repository 22 | uses: actions/checkout@v4 23 | with: 24 | fetch-depth: 2 25 | 26 | - name: Set up Python 27 | uses: actions/setup-python@v5.4.0 28 | with: 29 | python-version: "3.10" 30 | 31 | - name: Install uv 32 | uses: astral-sh/setup-uv@v5 33 | with: 34 | # Install a specific version of uv. 35 | version: "0.5.2" 36 | 37 | - name: Check if there is a parent commit 38 | id: check-parent-commit 39 | run: | 40 | echo "::set-output name=sha::$(git rev-parse --verify --quiet HEAD^)" 41 | 42 | - name: Detect and tag new version 43 | id: check-version 44 | if: steps.check-parent-commit.outputs.sha 45 | uses: salsify/action-detect-and-tag-new-version@v2.0.3 46 | with: 47 | version-command: | 48 | uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version 49 | 50 | - name: Bump version for developmental release 51 | if: "! steps.check-version.outputs.tag" 52 | run: | 53 | uv run version_bumper.py && 54 | version=$(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version) && 55 | uvx --from=toml-cli toml set --toml-path=pyproject.toml project.version $version.dev.$(date +%s) 56 | 57 | - name: Build package 58 | run: | 59 | uv build 60 | 61 | - name: Publish package on PyPI 62 | if: steps.check-version.outputs.tag 63 | uses: pypa/gh-action-pypi-publish@release/v1 64 | 65 | - name: Publish the release notes 66 | uses: release-drafter/release-drafter@v6.1.0 67 | with: 68 | publish: ${{ steps.check-version.outputs.tag != '' }} 69 | tag: ${{ steps.check-version.outputs.tag }} 70 | env: 71 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 72 | 73 | - name: Install dependencies 74 | run: | 75 | uv sync --extra dev 76 | uv pip install -e . 77 | 78 | - name: Install Quarto 79 | uses: quarto-dev/quarto-actions/setup@v2 80 | with: 81 | version: 1.6.39 82 | 83 | - name: install graphviz 84 | uses: ts-graphviz/setup-graphviz@v2 85 | 86 | - name: Build autodocs 87 | run: uv run quartodoc build --config docs/_quarto.yml 88 | 89 | - name: Build docs 90 | run: cd docs;uv run quarto render --execute 91 | 92 | - name: git config 93 | run: | 94 | git config user.name "$(git log -n 1 --pretty=format:%an)" && 95 | git config user.email "$(git log -n 1 --pretty=format:%ae)" 96 | 97 | - name: Publish 98 | if: steps.check-version.outputs.tag 99 | run: cd docs;uv run quarto publish gh-pages --no-render --no-browser 100 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # smartrappy 2 | 3 | Smart reproducible analytical pipeline inspection. 4 | 5 | ![SVG logo of smartrappy](docs/logo.svg) 6 | 7 | [![PyPI](https://img.shields.io/pypi/v/smartrappy.svg)](https://pypi.org/project/smartrappy/) 8 | [![Status](https://img.shields.io/pypi/status/smartrappy.svg)](https://pypi.org/project/smartrappy/) 9 | [![Python Version](https://img.shields.io/pypi/pyversions/smartrappy)](https://pypi.org/project/smartrappy) 10 | [![License](https://img.shields.io/pypi/l/smartrappy)](https://opensource.org/licenses/MIT) 11 | [![Read the documentation at https://aeturrell.github.io/smartrappy/](https://img.shields.io/badge/docs-passing-brightgreen)](https://aeturrell.github.io/smartrappy/) 12 | [![Tests](https://github.com/aeturrell/smartrappy/workflows/Tests/badge.svg)](https://github.com/aeturrell/smartrappy/actions?workflow=Tests) 13 | [![Codecov](https://codecov.io/gh/aeturrell/smartrappy/branch/main/graph/badge.svg)](https://codecov.io/gh/aeturrell/smartrappy) 14 | [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit) 15 | [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) 16 | [![Downloads](https://static.pepy.tech/badge/smartrappy)](https://pepy.tech/project/smartrappy) 17 | [![Source](https://img.shields.io/badge/source%20code-github-lightgrey?style=for-the-badge)](https://github.com/aeturrell/smartrappy) 18 | 19 | ![Linux](https://img.shields.io/badge/Linux-FCC624?style=for-the-badge&logo=linux&logoColor=black) 20 | ![macOS](https://img.shields.io/badge/mac%20os-000000?style=for-the-badge&logo=macos&logoColor=F0F0F0) 21 | ![Windows](https://img.shields.io/badge/Windows-0078D6?style=for-the-badge&logo=windows&logoColor=white) 22 | 23 | 24 | 25 | ## Introduction 26 | 27 | ### What does this package do? 28 | 29 | **smartrappy** analyses a Python project and infers the directed acyclic graph (DAG) of the code and data dependencies, including the last time any data were refreshed and whether the data exist at all on disk. It is not perfect, and will miss a lot in complex projects: but for simple projects using, say, `pd.read_csv()`, it does a good job of inferring the steps. It can also infer writing to and from most databases. The inferred DAG is then visualised, and there are several options for doing that—the default being to produce a visualisation in the terminal. 30 | 31 | ### What is **smartrappy** for? 32 | 33 | **smartrappy** is designed to help you understand the dependencies in a project, especially in a context where there may be a lot of legacy code that resembles tangled spaghetti. 34 | 35 | ### Quickstart 36 | 37 | To use **smartrappy** as a command-line tool: 38 | 39 | ```bash 40 | smartrappy /path/to/your/project 41 | ``` 42 | 43 | Or to use it within a Python script: 44 | 45 | ```python 46 | from smartrappy import analyse_project 47 | from smartrappy.reporters import ConsoleReporter 48 | 49 | 50 | model = analyse_project("/path/to/your/project") 51 | reporter = ConsoleReporter() 52 | reporter.generate_report(model) 53 | ``` 54 | 55 | ### Installation 56 | 57 | To install **smartrappy**, you can use `pip install smartrappy` or `uv add smartrappy` if you are using [Astral's uv](https://docs.astral.sh/uv/). You can also use it as a standalone command-line tool with uv and the `uvx` command: 58 | 59 | ```bash 60 | uvx smartrappy path/to/your/project 61 | ``` 62 | 63 | ### Documentation 64 | 65 | You can find the full documentation for **smartrappy** at [https://aeturrell.github.io/smartrappy/](https://aeturrell.github.io/smartrappy/). 66 | -------------------------------------------------------------------------------- /tests/test_reporter_integration.py: -------------------------------------------------------------------------------- 1 | """Integration tests for reporters to improve coverage.""" 2 | 3 | import json 4 | import os 5 | import tempfile 6 | 7 | from smartrappy.analyser import analyse_project 8 | from smartrappy.reporters import ( 9 | ConsoleReporter, 10 | JsonReporter, 11 | MermaidReporter, 12 | get_reporter, 13 | ) 14 | 15 | 16 | def test_console_reporter_with_real_project(): 17 | """Test console reporter with actual project analysis.""" 18 | # Use test_set_one for a simple project 19 | test_dir = "tests/test_set_one" 20 | if not os.path.exists(test_dir): 21 | # Skip if test directory doesn't exist 22 | return 23 | 24 | model = analyse_project(test_dir, internal_only=False) 25 | reporter = ConsoleReporter() 26 | 27 | # This should not raise an exception 28 | reporter.generate_report(model) 29 | 30 | 31 | def test_mermaid_reporter_with_real_project(): 32 | """Test mermaid reporter with actual project analysis.""" 33 | test_dir = "tests/test_set_one" 34 | if not os.path.exists(test_dir): 35 | return 36 | 37 | model = analyse_project(test_dir, internal_only=False) 38 | 39 | with tempfile.TemporaryDirectory() as tmpdir: 40 | output_path = os.path.join(tmpdir, "diagram.md") 41 | reporter = MermaidReporter() 42 | reporter.generate_report(model, output_path) 43 | 44 | # Verify the file was created and has content 45 | assert os.path.exists(output_path) 46 | with open(output_path, "r") as f: 47 | content = f.read() 48 | assert "```mermaid" in content 49 | assert "graph TD" in content 50 | 51 | 52 | def test_json_reporter_with_real_project(): 53 | """Test JSON reporter with actual project analysis.""" 54 | test_dir = "tests/test_set_one" 55 | if not os.path.exists(test_dir): 56 | return 57 | 58 | model = analyse_project(test_dir, internal_only=False) 59 | 60 | with tempfile.TemporaryDirectory() as tmpdir: 61 | output_path = os.path.join(tmpdir, "output.json") 62 | reporter = JsonReporter() 63 | reporter.generate_report(model, output_path) 64 | 65 | # Verify the file was created and is valid JSON 66 | assert os.path.exists(output_path) 67 | with open(output_path, "r") as f: 68 | data = json.load(f) 69 | assert "nodes" in data 70 | assert "edges" in data 71 | 72 | 73 | def test_json_reporter_internal_only(): 74 | """Test JSON reporter with internal_only flag.""" 75 | test_dir = "tests/test_set_one" 76 | if not os.path.exists(test_dir): 77 | return 78 | 79 | model = analyse_project(test_dir, internal_only=True) 80 | 81 | with tempfile.TemporaryDirectory() as tmpdir: 82 | output_path = os.path.join(tmpdir, "output.json") 83 | reporter = JsonReporter() 84 | reporter.generate_report(model, output_path) 85 | 86 | # Verify the file was created 87 | assert os.path.exists(output_path) 88 | with open(output_path, "r") as f: 89 | data = json.load(f) 90 | # Should have filtered nodes in internal-only mode 91 | assert "nodes" in data 92 | 93 | 94 | def test_get_reporter_factory(): 95 | """Test the reporter factory function.""" 96 | console = get_reporter("console") 97 | assert isinstance(console, ConsoleReporter) 98 | 99 | mermaid = get_reporter("mermaid") 100 | assert isinstance(mermaid, MermaidReporter) 101 | 102 | json_rep = get_reporter("json") 103 | assert isinstance(json_rep, JsonReporter) 104 | -------------------------------------------------------------------------------- /docs/objects.json: -------------------------------------------------------------------------------- 1 | {"project": "smartrappy", "version": "0.0.9999", "count": 18, "items": [{"name": "smartrappy.analyse_project", "domain": "py", "role": "function", "priority": "1", "uri": "reference/analyse_project.html#smartrappy.analyse_project", "dispname": "-"}, {"name": "smartrappy.analyser.analyse_project", "domain": "py", "role": "function", "priority": "1", "uri": "reference/analyse_project.html#smartrappy.analyse_project", "dispname": "smartrappy.analyse_project"}, {"name": "smartrappy.ConsoleReporter.generate_report", "domain": "py", "role": "function", "priority": "1", "uri": "reference/ConsoleReporter.html#smartrappy.ConsoleReporter.generate_report", "dispname": "-"}, {"name": "smartrappy.reporters.ConsoleReporter.generate_report", "domain": "py", "role": "function", "priority": "1", "uri": "reference/ConsoleReporter.html#smartrappy.ConsoleReporter.generate_report", "dispname": "smartrappy.ConsoleReporter.generate_report"}, {"name": "smartrappy.ConsoleReporter", "domain": "py", "role": "class", "priority": "1", "uri": "reference/ConsoleReporter.html#smartrappy.ConsoleReporter", "dispname": "-"}, {"name": "smartrappy.reporters.ConsoleReporter", "domain": "py", "role": "class", "priority": "1", "uri": "reference/ConsoleReporter.html#smartrappy.ConsoleReporter", "dispname": "smartrappy.ConsoleReporter"}, {"name": "smartrappy.JsonReporter.generate_report", "domain": "py", "role": "function", "priority": "1", "uri": "reference/JsonReporter.html#smartrappy.JsonReporter.generate_report", "dispname": "-"}, {"name": "smartrappy.reporters.JsonReporter.generate_report", "domain": "py", "role": "function", "priority": "1", "uri": "reference/JsonReporter.html#smartrappy.JsonReporter.generate_report", "dispname": "smartrappy.JsonReporter.generate_report"}, {"name": "smartrappy.JsonReporter", "domain": "py", "role": "class", "priority": "1", "uri": "reference/JsonReporter.html#smartrappy.JsonReporter", "dispname": "-"}, {"name": "smartrappy.reporters.JsonReporter", "domain": "py", "role": "class", "priority": "1", "uri": "reference/JsonReporter.html#smartrappy.JsonReporter", "dispname": "smartrappy.JsonReporter"}, {"name": "smartrappy.MermaidReporter.generate_report", "domain": "py", "role": "function", "priority": "1", "uri": "reference/MermaidReporter.html#smartrappy.MermaidReporter.generate_report", "dispname": "-"}, {"name": "smartrappy.reporters.MermaidReporter.generate_report", "domain": "py", "role": "function", "priority": "1", "uri": "reference/MermaidReporter.html#smartrappy.MermaidReporter.generate_report", "dispname": "smartrappy.MermaidReporter.generate_report"}, {"name": "smartrappy.MermaidReporter", "domain": "py", "role": "class", "priority": "1", "uri": "reference/MermaidReporter.html#smartrappy.MermaidReporter", "dispname": "-"}, {"name": "smartrappy.reporters.MermaidReporter", "domain": "py", "role": "class", "priority": "1", "uri": "reference/MermaidReporter.html#smartrappy.MermaidReporter", "dispname": "smartrappy.MermaidReporter"}, {"name": "smartrappy.GraphvizReporter.generate_report", "domain": "py", "role": "function", "priority": "1", "uri": "reference/GraphvizReporter.html#smartrappy.GraphvizReporter.generate_report", "dispname": "-"}, {"name": "smartrappy.reporters.GraphvizReporter.generate_report", "domain": "py", "role": "function", "priority": "1", "uri": "reference/GraphvizReporter.html#smartrappy.GraphvizReporter.generate_report", "dispname": "smartrappy.GraphvizReporter.generate_report"}, {"name": "smartrappy.GraphvizReporter", "domain": "py", "role": "class", "priority": "1", "uri": "reference/GraphvizReporter.html#smartrappy.GraphvizReporter", "dispname": "-"}, {"name": "smartrappy.reporters.GraphvizReporter", "domain": "py", "role": "class", "priority": "1", "uri": "reference/GraphvizReporter.html#smartrappy.GraphvizReporter", "dispname": "smartrappy.GraphvizReporter"}]} -------------------------------------------------------------------------------- /src/smartrappy/notebook_parser.py: -------------------------------------------------------------------------------- 1 | """Parser for Jupyter notebook files (.ipynb).""" 2 | 3 | import ast 4 | import json 5 | from typing import List, Set, Tuple 6 | 7 | from smartrappy.models import DatabaseInfo, FileInfo, ModuleImport 8 | 9 | 10 | def extract_notebook_cells(notebook_content: str) -> List[str]: 11 | """ 12 | Extract Python code cells from a Jupyter notebook file. 13 | 14 | Args: 15 | notebook_content: The content of the .ipynb file as a string 16 | 17 | Returns: 18 | A list of Python code cell contents found in the notebook 19 | """ 20 | try: 21 | # Parse the notebook JSON 22 | notebook = json.loads(notebook_content) 23 | 24 | # Extract code cells 25 | code_cells = [] 26 | 27 | # Jupyter notebooks have a 'cells' key containing a list of cell objects 28 | cells = notebook.get("cells", []) 29 | 30 | for cell in cells: 31 | # Only process cells with type 'code' 32 | if cell.get("cell_type") == "code": 33 | # The source can be a string or a list of strings 34 | source = cell.get("source", []) 35 | 36 | # Convert to a single string 37 | if isinstance(source, list): 38 | cell_code = "".join(source) 39 | else: 40 | cell_code = source 41 | 42 | # Only add non-empty cells 43 | if cell_code.strip(): 44 | code_cells.append(cell_code) 45 | 46 | return code_cells 47 | 48 | except (json.JSONDecodeError, KeyError) as e: 49 | print(f"Error parsing notebook JSON: {str(e)}") 50 | return [] 51 | 52 | 53 | def analyse_notebook_file( 54 | file_path: str, 55 | project_modules: Set[str], 56 | FileOperationFinder, 57 | ModuleImportFinder, 58 | DatabaseOperationFinder, 59 | ) -> Tuple[List[FileInfo], List[ModuleImport], List[DatabaseInfo]]: 60 | """ 61 | Analyse a Jupyter notebook file for Python code cells. 62 | 63 | Args: 64 | file_path: Path to the .ipynb file 65 | project_modules: Set of known project module names 66 | FileOperationFinder: Class to find file operations 67 | ModuleImportFinder: Class to find module imports 68 | DatabaseOperationFinder: Class to find database operations 69 | 70 | Returns: 71 | A tuple of (file_operations, imports, database_operations) 72 | """ 73 | try: 74 | # Read the notebook file content 75 | with open(file_path, "r", encoding="utf-8") as f: 76 | notebook_content = f.read() 77 | 78 | # Extract Python code cells 79 | code_cells = extract_notebook_cells(notebook_content) 80 | 81 | # Initialize result lists 82 | all_file_ops = [] 83 | all_imports = [] 84 | all_db_ops = [] 85 | 86 | # Process each code cell separately 87 | for i, cell_code in enumerate(code_cells): 88 | try: 89 | # Parse the cell as Python code 90 | tree = ast.parse(cell_code) 91 | 92 | # Find file operations 93 | file_finder = FileOperationFinder(file_path) 94 | file_finder.visit(tree) 95 | all_file_ops.extend(file_finder.file_operations) 96 | 97 | # Find imports 98 | import_finder = ModuleImportFinder(file_path, project_modules) 99 | import_finder.visit(tree) 100 | all_imports.extend(import_finder.imports) 101 | 102 | # Find database operations 103 | db_finder = DatabaseOperationFinder(file_path) 104 | db_finder.visit(tree) 105 | all_db_ops.extend(db_finder.database_operations) 106 | 107 | except SyntaxError as e: 108 | print(f"Syntax error in code cell {i + 1} of {file_path}: {str(e)}") 109 | 110 | return all_file_ops, all_imports, all_db_ops 111 | 112 | except (UnicodeDecodeError, IOError) as e: 113 | print(f"Error processing notebook file {file_path}: {str(e)}") 114 | return [], [], [] 115 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | 2 | name: Tests 3 | 4 | on: 5 | - push 6 | - pull_request 7 | 8 | jobs: 9 | tests: 10 | name: ${{ matrix.session }} ${{ matrix.python-version }} / ${{ matrix.os }} 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | include: 16 | - { python-version: "3.10", os: ubuntu-latest, session: "pre-commit" } 17 | - { python-version: "3.10", os: ubuntu-latest, session: "tests" } 18 | - { python-version: "3.11", os: ubuntu-latest, session: "tests" } 19 | - { python-version: "3.12", os: ubuntu-latest, session: "tests" } 20 | - { python-version: "3.10", os: windows-latest, session: "tests" } 21 | - { python-version: "3.10", os: macos-latest, session: "tests" } 22 | - { python-version: "3.10", os: ubuntu-latest, session: "typeguard" } 23 | - { python-version: "3.10", os: ubuntu-latest, session: "xdoctest" } 24 | 25 | env: 26 | NOXSESSION: ${{ matrix.session }} 27 | 28 | steps: 29 | - name: Check out the repository 30 | uses: actions/checkout@v4 31 | 32 | - name: Set up Python ${{ matrix.python-version }} 33 | uses: actions/setup-python@v5.4.0 34 | with: 35 | python-version: ${{ matrix.python-version }} 36 | 37 | - name: Install uv 38 | uses: astral-sh/setup-uv@v5 39 | with: 40 | # Install a specific version of uv. 41 | version: "0.5.2" 42 | 43 | - name: install graphviz 44 | uses: ts-graphviz/setup-graphviz@v2 45 | 46 | - name: Compute pre-commit cache key 47 | if: matrix.session == 'pre-commit' 48 | id: pre-commit-cache 49 | shell: python 50 | run: | 51 | import hashlib 52 | import sys 53 | 54 | python = "py{}.{}".format(*sys.version_info[:2]) 55 | payload = sys.version.encode() + sys.executable.encode() 56 | digest = hashlib.sha256(payload).hexdigest() 57 | result = "${{ runner.os }}-{}-{}-pre-commit".format(python, digest[:8]) 58 | 59 | print("::set-output name=result::{}".format(result)) 60 | 61 | - name: Restore pre-commit cache 62 | uses: actions/cache@v4.2.0 63 | if: matrix.session == 'pre-commit' 64 | with: 65 | path: ~/.cache/pre-commit 66 | key: ${{ steps.pre-commit-cache.outputs.result }}-${{ hashFiles('.pre-commit-config.yaml') }} 67 | restore-keys: | 68 | ${{ steps.pre-commit-cache.outputs.result }}- 69 | 70 | - name: Run Nox 71 | run: | 72 | uv run nox --force-color --python=${{ matrix.python-version }} 73 | 74 | - name: Upload coverage data 75 | if: always() && matrix.session == 'tests' 76 | uses: "actions/upload-artifact@v4" 77 | with: 78 | name: coverage-data-${{ matrix.session }}-${{ matrix.python-version }}-${{ matrix.os }} 79 | path: ".coverage.*" 80 | if-no-files-found: ignore 81 | include-hidden-files: true 82 | 83 | coverage: 84 | runs-on: ubuntu-latest 85 | needs: tests 86 | steps: 87 | - name: Check out the repository 88 | uses: actions/checkout@v4 89 | 90 | - name: Set up Python 3.11 91 | uses: actions/setup-python@v5.4.0 92 | with: 93 | python-version: 3.11 94 | 95 | - name: Install uv 96 | uses: astral-sh/setup-uv@v5 97 | with: 98 | # Install a specific version of uv. 99 | version: "0.5.2" 100 | 101 | - name: Install dependencies 102 | run: | 103 | uv sync --extra dev 104 | 105 | - name: Download coverage data 106 | uses: actions/download-artifact@v4 107 | with: 108 | pattern: coverage-data-* 109 | merge-multiple: true 110 | 111 | - name: Combine coverage data and display human readable report 112 | run: | 113 | uv run nox --force-color --session=coverage 114 | 115 | - name: Create coverage report 116 | run: | 117 | uv run nox --force-color --session=coverage -- xml 118 | 119 | - name: Upload coverage report 120 | uses: codecov/codecov-action@v5.3.1 121 | with: 122 | token: ${{ secrets.CODECOV_TOKEN }} 123 | slug: aeturrell/smartrappy 124 | -------------------------------------------------------------------------------- /docs/output_options.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "---\n", 8 | "title: Other output options\n", 9 | "execute:\n", 10 | " echo: false\n", 11 | "---\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "## Graphviz\n", 19 | "\n", 20 | "```bash\n", 21 | "uv run smartrappy . --internal --format=graphviz\n", 22 | "```\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "![](output.svg)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "## Mermaid\n", 37 | "\n", 38 | "```bash\n", 39 | "uv run smartrappy . --internal --format=mermaid\n", 40 | "```" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "```{mermaid}\n", 48 | "graph TD\n", 49 | " %% Style definitions\n", 50 | " classDef scriptNode fill:#90EE90,stroke:#333,stroke-width:2px;\n", 51 | " classDef fileNode fill:#FFB6C1,stroke:#333,stroke-width:2px;\n", 52 | " classDef quartoNode fill:#00CED1,stroke:#333,stroke-width:2px;\n", 53 | " classDef notebookNode fill:#FFD700,stroke:#333,stroke-width:2px;\n", 54 | " classDef missingFile fill:#FFB6C1,stroke:#FF0000,stroke-width:3px,stroke-dasharray: 5 5;\n", 55 | " classDef internalModule fill:#ADD8E6,stroke:#333,stroke-width:2px;\n", 56 | " classDef externalModule fill:#FFA07A,stroke:#333,stroke-width:2px;\n", 57 | " classDef importedItem fill:#ADD8E6,stroke:#333,stroke-width:2px,shape:circle;\n", 58 | " classDef externalImportedItem fill:#FFA07A,stroke:#333,stroke-width:2px,shape:circle;\n", 59 | " classDef databaseNode fill:#B19CD9,stroke:#333,stroke-width:2px,shape:cylinder;\n", 60 | "\n", 61 | " %% Nodes\n", 62 | " quarto_document_13558783[\"slides.qmd\"]:::quartoNode\n", 63 | " data_file_14384327[\"output.png
File does not exist\"]:::missingFile\n", 64 | " data_file_7682433[\"equation.tex
File does not exist\"]:::missingFile\n", 65 | " data_file_5741772[\"alternative_equation.tex
File does not exist\"]:::missingFile\n", 66 | " jupyter_notebook_2961208[\"nb_example.ipynb\"]:::notebookNode\n", 67 | " data_file_1692456[\"data/input.csv
Modified: 2025-04-20 17:31:30\"]:::fileNode\n", 68 | " script_5034441[\"model_solver.py\"]:::scriptNode\n", 69 | " script_10331269[\"visualisation.py\"]:::scriptNode\n", 70 | " data_file_9084974[\"data/processed.csv
File does not exist\"]:::missingFile\n", 71 | " script_3375286[\"data_processing.py\"]:::scriptNode\n", 72 | " database_3520401[\"mydatabase
Type: mssql\"]:::databaseNode\n", 73 | " internal_module_3617581((\"data_processing:process_data\")):::importedItem\n", 74 | "\n", 75 | " %% Relationships\n", 76 | " data_file_14384327 --> quarto_document_13558783\n", 77 | " data_file_7682433 --> quarto_document_13558783\n", 78 | " data_file_5741772 --> quarto_document_13558783\n", 79 | " data_file_1692456 --> jupyter_notebook_2961208\n", 80 | " jupyter_notebook_2961208 --> data_file_5741772\n", 81 | " script_5034441 --> data_file_7682433\n", 82 | " script_10331269 --> data_file_14384327\n", 83 | " data_file_9084974 --> script_10331269\n", 84 | " script_3375286 --> data_file_9084974\n", 85 | " data_file_1692456 --> script_3375286\n", 86 | " database_3520401 --> script_3375286\n", 87 | " script_3375286 --> database_3520401\n", 88 | " internal_module_3617581 --> script_10331269\n", 89 | "```\n" 90 | ] 91 | } 92 | ], 93 | "metadata": { 94 | "kernelspec": { 95 | "display_name": ".venv", 96 | "language": "python", 97 | "name": "python3" 98 | }, 99 | "language_info": { 100 | "codemirror_mode": { 101 | "name": "ipython", 102 | "version": 3 103 | }, 104 | "file_extension": ".py", 105 | "mimetype": "text/x-python", 106 | "name": "python", 107 | "nbconvert_exporter": "python", 108 | "pygments_lexer": "ipython3", 109 | "version": "3.12.0" 110 | } 111 | }, 112 | "nbformat": 4, 113 | "nbformat_minor": 4 114 | } 115 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | tests/test_output/** 2 | !tests/test_output/.gitkeep 3 | 4 | # Package related files 5 | /build/ 6 | .mypy_cache/ 7 | /.coverage 8 | /.coverage.* 9 | /.nox/ 10 | /.python-version 11 | /.pytype/ 12 | /dist/ 13 | /docs/_build/ 14 | /src/*.egg-info/ 15 | __pycache__/ 16 | settings.json 17 | .DS_store 18 | /docs/.quarto/ 19 | /.quarto/ 20 | .venv/ 21 | /docs_old/ 22 | docs/reference/* 23 | docs/_sidebar.yml 24 | /.luarc.json 25 | docs/_site/ 26 | 27 | # Byte-compiled / optimized / DLL files 28 | __pycache__/ 29 | *.py[cod] 30 | *$py.class 31 | 32 | # C extensions 33 | *.so 34 | 35 | # Distribution / packaging 36 | .Python 37 | build/ 38 | develop-eggs/ 39 | dist/ 40 | downloads/ 41 | eggs/ 42 | .eggs/ 43 | lib/ 44 | lib64/ 45 | parts/ 46 | sdist/ 47 | var/ 48 | wheels/ 49 | share/python-wheels/ 50 | *.egg-info/ 51 | .installed.cfg 52 | *.egg 53 | MANIFEST 54 | 55 | # PyInstaller 56 | # Usually these files are written by a python script from a template 57 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 58 | *.manifest 59 | *.spec 60 | 61 | # Installer logs 62 | pip-log.txt 63 | pip-delete-this-directory.txt 64 | 65 | # Unit test / coverage reports 66 | htmlcov/ 67 | .tox/ 68 | .nox/ 69 | .coverage 70 | .coverage.* 71 | .cache 72 | nosetests.xml 73 | coverage.xml 74 | *.cover 75 | *.py,cover 76 | .hypothesis/ 77 | .pytest_cache/ 78 | cover/ 79 | 80 | # Translations 81 | *.mo 82 | *.pot 83 | 84 | # Django stuff: 85 | *.log 86 | local_settings.py 87 | db.sqlite3 88 | db.sqlite3-journal 89 | 90 | # Flask stuff: 91 | instance/ 92 | .webassets-cache 93 | 94 | # Scrapy stuff: 95 | .scrapy 96 | 97 | # Sphinx documentation 98 | docs/_build/ 99 | 100 | # PyBuilder 101 | .pybuilder/ 102 | target/ 103 | 104 | # Jupyter Notebook 105 | .ipynb_checkpoints 106 | 107 | # IPython 108 | profile_default/ 109 | ipython_config.py 110 | 111 | # pyenv 112 | # For a library or package, you might want to ignore these files since the code is 113 | # intended to run in multiple environments; otherwise, check them in: 114 | # .python-version 115 | 116 | # pipenv 117 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 118 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 119 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 120 | # install all needed dependencies. 121 | #Pipfile.lock 122 | 123 | # UV 124 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 125 | # This is especially recommended for binary packages to ensure reproducibility, and is more 126 | # commonly ignored for libraries. 127 | #uv.lock 128 | 129 | # poetry 130 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 131 | # This is especially recommended for binary packages to ensure reproducibility, and is more 132 | # commonly ignored for libraries. 133 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 134 | #poetry.lock 135 | 136 | # pdm 137 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 138 | #pdm.lock 139 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 140 | # in version control. 141 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 142 | .pdm.toml 143 | .pdm-python 144 | .pdm-build/ 145 | 146 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 147 | __pypackages__/ 148 | 149 | # Celery stuff 150 | celerybeat-schedule 151 | celerybeat.pid 152 | 153 | # SageMath parsed files 154 | *.sage.py 155 | 156 | # Environments 157 | .env 158 | .venv 159 | env/ 160 | venv/ 161 | ENV/ 162 | env.bak/ 163 | venv.bak/ 164 | 165 | # Spyder project settings 166 | .spyderproject 167 | .spyproject 168 | 169 | # Rope project settings 170 | .ropeproject 171 | 172 | # mkdocs documentation 173 | /site 174 | 175 | # mypy 176 | .mypy_cache/ 177 | .dmypy.json 178 | dmypy.json 179 | 180 | # Pyre type checker 181 | .pyre/ 182 | 183 | # pytype static type analyser 184 | .pytype/ 185 | 186 | # Cython debug symbols 187 | cython_debug/ 188 | 189 | # PyCharm 190 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 191 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 192 | # and can be added to the global gitignore or merged into this file. For a more nuclear 193 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 194 | #.idea/ 195 | 196 | # PyPI configuration file 197 | .pypirc 198 | -------------------------------------------------------------------------------- /docs/contributing.qmd: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Thank you for your interest in improving this project. This project is 4 | open-source under the [MIT license](https://opensource.org/licenses/MIT) 5 | and welcomes contributions in the form of bug reports, feature requests, 6 | and pull requests. 7 | 8 | Here is a list of important resources for contributors: 9 | 10 | - [Source Code](https://github.com/aeturrell/smartrappy) 11 | - [Documentation](https://aeturrell.github.io/smartrappy/) 12 | - [Issue Tracker](https://github.com/aeturrell/smartrappy/issues) 13 | 14 | ## How to report a bug 15 | 16 | Report bugs on the [Issue Tracker](https://github.com/aeturrell/smartrappy/issues). 17 | 18 | When filing an issue, make sure to answer these questions: 19 | 20 | - Which operating system and Python version are you using? 21 | - Which version of this project are you using? 22 | - What did you do? 23 | - What did you expect to see? 24 | - What did you see instead? 25 | 26 | The best way to get your bug fixed is to provide a test case, and/or 27 | steps to reproduce the issue. 28 | 29 | ## How to request a feature 30 | 31 | Request features on the [Issue Tracker](https://github.com/aeturrell/smartrappy/issues). 32 | 33 | ## How to set up your development environment 34 | 35 | You need Python and the following tools: 36 | 37 | - [uv](https://docs.astral.sh/uv/) 38 | - [Nox](https://nox.thea.codes/) 39 | - [Make](https://www.gnu.org/software/make/) 40 | - [Quarto](https://quarto.org/) 41 | 42 | Install the package with the existing development requirements: 43 | 44 | ```bash 45 | $ uv sync --frozen 46 | ``` 47 | 48 | To also update packages, do not use the `--frozen` flag. 49 | 50 | To build the documentation locally, you will also need [Make](https://www.gnu.org/software/make/) and [Quarto](https://quarto.org/) (these are non-Python dependencies). 51 | 52 | You can build the docs locally to look at them with `make`, which runs a command to build the README and then another to build the website which can then be found in `docs/_site/.` It's `make clean` to remove the existing README. 53 | 54 | To publish new docs to GitHub Pages (where the documentation is displayed as web pages), it's `make publish`—but only devs with admin rights will be able to execute this. 55 | 56 | ## How to test the project 57 | 58 | Run the full test suite: 59 | 60 | ```bash 61 | $ uv run nox 62 | ``` 63 | 64 | List the available Nox sessions: 65 | 66 | ```bash 67 | $ uv run nox --list-sessions 68 | ``` 69 | 70 | You can also run a specific Nox session. For example, invoke the unit 71 | test suite like this: 72 | 73 | ```bash 74 | $ uv run nox --session=tests 75 | ``` 76 | 77 | Unit tests are located in the `tests` directory, and are written using 78 | the [pytest](https://pytest.readthedocs.io/) testing framework. 79 | 80 | You may need to use, for example, `uv run nox` to ensure that the 81 | tests are run in the right environment. 82 | 83 | For the pre-commit checks, use 84 | 85 | ```bash 86 | $ uv run pre-commit run --all-files 87 | ``` 88 | 89 | ## How to submit changes 90 | 91 | Open a [pull request](https://github.com/aeturrell/smartrappy/pulls) to 92 | submit changes to this project. 93 | 94 | Your pull request needs to meet the following guidelines for acceptance: 95 | 96 | - The Nox test suite must pass without errors and warnings. 97 | - Include unit tests. This project aims to maintain 96% code 98 | coverage. 99 | - If your changes add functionality, update the documentation 100 | accordingly. 101 | - Run make to generate the new documentation. 102 | - Run the pre-commit suite before committing. 103 | 104 | Feel free to submit early, though---we can always iterate on this. 105 | 106 | To run linting and code formatting checks before committing your change, 107 | you need to run the following 108 | command: 109 | 110 | ```bash 111 | $ uv run nox --session=pre-commit -- install 112 | ``` 113 | 114 | It is recommended to open an issue before starting work on anything. 115 | This will allow a chance to talk it over with the owners and validate 116 | your approach. 117 | 118 | ## How to create a package release 119 | 120 | - Open a new branch with the version name 121 | 122 | - Change the version in pyproject.toml (you can run `uv run version_bumper.py`, which has script-level dependencies) 123 | 124 | - Commit the change with a new version label as the commit message (checking the tests pass) 125 | 126 | - Head to GitHub and merge into main (again, if the CI works) 127 | 128 | - Confirm the release draft on GitHub 129 | 130 | - The automatic release GitHub Action will push to PyPI. 131 | 132 | If you ever need distributable files, you can use the `uv build` command locally. 133 | 134 | ## How to build the documentation manually and locally 135 | 136 | You shouldn't need to publish the documentation because there's a GitHub action that covers it automatically whenever there's a new release. But to upload the documentation manually, it's 137 | 138 | - Run `make` to build the documentation 139 | - Run `make publish` to publish the documentation 140 | -------------------------------------------------------------------------------- /noxfile.py: -------------------------------------------------------------------------------- 1 | """Nox sessions.""" 2 | 3 | from pathlib import Path 4 | from textwrap import dedent 5 | 6 | import nox 7 | 8 | package = "smartrappy" 9 | python_versions = ["3.10", "3.11", "3.12"] 10 | nox.needs_version = ">= 2021.6.6" 11 | nox.options.default_venv_backend = "uv" 12 | nox.options.sessions = ( 13 | "pre-commit", 14 | "tests", 15 | "typeguard", 16 | "xdoctest", 17 | ) 18 | 19 | 20 | def activate_virtualenv_in_precommit_hooks(session: nox.Session) -> None: 21 | """Activate virtualenv in hooks installed by pre-commit. 22 | 23 | This function patches git hooks installed by pre-commit to activate the 24 | session's virtual environment. This allows pre-commit to locate hooks in 25 | that environment when invoked from git. 26 | 27 | Args: 28 | session: The Session object. 29 | """ 30 | if session.bin is None: 31 | return 32 | 33 | virtualenv = session.env.get("VIRTUAL_ENV") 34 | if virtualenv is None: 35 | return 36 | 37 | hookdir = Path(".git") / "hooks" 38 | if not hookdir.is_dir(): 39 | return 40 | 41 | for hook in hookdir.iterdir(): 42 | if hook.name.endswith(".sample") or not hook.is_file(): 43 | continue 44 | 45 | text = hook.read_text() 46 | bindir = repr(session.bin)[1:-1] # strip quotes 47 | if not ( 48 | Path("A") == Path("a") and bindir.lower() in text.lower() or bindir in text 49 | ): 50 | continue 51 | 52 | lines = text.splitlines() 53 | if not (lines[0].startswith("#!") and "python" in lines[0].lower()): 54 | continue 55 | 56 | header = dedent( 57 | f"""\ 58 | import os 59 | os.environ["VIRTUAL_ENV"] = {virtualenv!r} 60 | os.environ["PATH"] = os.pathsep.join(( 61 | {session.bin!r}, 62 | os.environ.get("PATH", ""), 63 | )) 64 | """ 65 | ) 66 | 67 | lines.insert(1, header) 68 | hook.write_text("\n".join(lines)) 69 | 70 | 71 | @nox.session(python=python_versions) 72 | def tests(session: nox.Session) -> None: 73 | """Run the test suite.""" 74 | session.run_install( 75 | "uv", 76 | "sync", 77 | "--group", 78 | "dev", 79 | env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, 80 | ) 81 | session.env["PYTHONPATH"] = "src" 82 | 83 | try: 84 | session.run( 85 | "coverage", 86 | "run", 87 | "--parallel", 88 | "-m", 89 | "pytest", 90 | "--cache-clear", 91 | external=True, 92 | env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, 93 | *session.posargs, 94 | ) 95 | finally: 96 | if session.interactive: 97 | session.notify("coverage", posargs=[]) 98 | 99 | 100 | @nox.session(python=python_versions[0]) 101 | def coverage(session: nox.Session) -> None: 102 | """Produce the coverage report.""" 103 | args = session.posargs or ["report"] 104 | session.run( 105 | "uv", 106 | "pip", 107 | "install", 108 | "coverage[toml]", 109 | env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, 110 | external=True, 111 | ) 112 | if not session.posargs and any(Path().glob(".coverage.*")): 113 | session.run("coverage", "erase", "--data-file=.coverage") 114 | session.run("coverage", "combine") 115 | 116 | session.run("coverage", *args, "-i") 117 | 118 | 119 | @nox.session(name="pre-commit", python=python_versions[0], venv_backend="uv") 120 | def precommit(session: nox.Session) -> None: 121 | """Lint using pre-commit.""" 122 | args = session.posargs or ["run", "--all-files", "--show-diff-on-failure"] 123 | session.run_install( 124 | "uv", 125 | "sync", 126 | "--extra=dev", 127 | env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, 128 | ) 129 | session.run("pre-commit", *args) 130 | if args and args[0] == "install": 131 | activate_virtualenv_in_precommit_hooks(session) 132 | 133 | 134 | @nox.session(venv_backend="uv", python=python_versions) 135 | def typeguard(session: nox.Session) -> None: 136 | """Runtime type checking using Typeguard.""" 137 | # Install project and dependencies using uv 138 | session.run_install( 139 | "uv", 140 | "sync", 141 | "--extra=dev", 142 | env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, 143 | ) 144 | session.run_install("uv", "pip", "install", "-e", ".") 145 | session.run("pytest", f"--typeguard-packages={package}", *session.posargs) 146 | 147 | 148 | @nox.session(venv_backend="uv", python=python_versions) 149 | def xdoctest(session: nox.Session) -> None: 150 | """Run examples with xdoctest.""" 151 | args = session.posargs or ["all"] 152 | 153 | # Install project and dependencies using uv 154 | session.run_install( 155 | "uv", 156 | "sync", 157 | "--extra=dev", 158 | env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, 159 | ) 160 | session.run_install("uv", "pip", "install", "-e", ".") 161 | session.run("python", "-m", "xdoctest", package, *args) 162 | -------------------------------------------------------------------------------- /src/smartrappy/__main__.py: -------------------------------------------------------------------------------- 1 | """Command-line interface for smartrappy.""" 2 | 3 | import os 4 | import sys 5 | from datetime import datetime 6 | 7 | import click 8 | 9 | from smartrappy import __version__ 10 | from smartrappy.analyser import analyse_project 11 | from smartrappy.reporters import get_reporter 12 | 13 | 14 | def validate_repo_path(ctx, param, value): 15 | """Validate that the input path exists and is a directory.""" 16 | if not os.path.exists(value): 17 | raise click.BadParameter(f"Path does not exist: {value}") 18 | if not os.path.isdir(value): 19 | raise click.BadParameter(f"Path is not a directory: {value}") 20 | return value 21 | 22 | 23 | def validate_output_path(ctx, param, value): 24 | """Validate that the output path is writable.""" 25 | if value is None: 26 | return None 27 | 28 | try: 29 | directory = os.path.dirname(value) or "." 30 | if not os.path.exists(directory): 31 | os.makedirs(directory) 32 | # Check if we can write to this location 33 | test_file = f"{value}_test" 34 | with open(test_file, "w") as f: 35 | f.write("") 36 | os.remove(test_file) 37 | return value 38 | except (OSError, IOError) as e: 39 | raise click.BadParameter(f"Cannot write to output location: {value}\n{str(e)}") 40 | 41 | 42 | @click.command(context_settings=dict(help_option_names=["-h", "--help"])) 43 | @click.argument( 44 | "repo_path", 45 | callback=validate_repo_path, 46 | type=click.Path(exists=True, file_okay=False, dir_okay=True), 47 | ) 48 | @click.option( 49 | "-o", 50 | "--output", 51 | callback=validate_output_path, 52 | help="Output path for the analysis files (without extension)", 53 | type=click.Path(dir_okay=False), 54 | ) 55 | @click.option( 56 | "-f", 57 | "--format", 58 | "format_type", 59 | type=click.Choice(["console", "graphviz", "mermaid", "json"], case_sensitive=False), 60 | default="console", 61 | help="Output format for the analysis (default: console)", 62 | ) 63 | @click.option( 64 | "--all-formats", 65 | is_flag=True, 66 | help="Generate all output formats", 67 | ) 68 | @click.option( 69 | "--internal", 70 | is_flag=True, 71 | help="Only include internal modules in the visualisation (exclude external packages)", 72 | ) 73 | @click.version_option(version=__version__, prog_name="smartrappy") 74 | def main(repo_path, output, format_type, all_formats, internal): 75 | """Smart reproducible analytical pipeline execution analyser. 76 | 77 | Analyses Python projects to create a visual representation of file operations 78 | and module dependencies. 79 | 80 | Examples: 81 | 82 | \b 83 | # Analyse current directory with default console output 84 | smartrappy . 85 | 86 | \b 87 | # Analyse specific project with graphviz output 88 | smartrappy /path/to/project --formnat graphviz --output /path/to/output/analysis 89 | 90 | \b 91 | # Generate all output formats 92 | smartrappy /path/to/project --all-formats --output /path/to/output/analysis 93 | 94 | \b 95 | # Show only internal module dependencies 96 | smartrappy /path/to/project --internal 97 | """ 98 | try: 99 | # Analyse the project 100 | click.echo(f"Analysing project at: {repo_path}") 101 | model = analyse_project(repo_path, internal_only=internal) 102 | 103 | # Generate reports 104 | formats_to_generate = ( 105 | ["console", "graphviz", "mermaid", "json"] if all_formats else [format_type] 106 | ) 107 | 108 | for fmt in formats_to_generate: 109 | try: 110 | reporter = get_reporter(fmt) 111 | 112 | # Handle output paths based on format 113 | fmt_output = None 114 | 115 | # Only use output path for formats that need files 116 | if fmt in ["graphviz", "mermaid"]: 117 | # Generate default output path if none provided 118 | if output is None: 119 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") 120 | base_output = f"smartrappy_analysis_{timestamp}" 121 | else: 122 | base_output = output 123 | 124 | # Append format type to output path when generating multiple formats 125 | fmt_output = ( 126 | f"{base_output}_{fmt}" 127 | if len(formats_to_generate) > 1 128 | else base_output 129 | ) 130 | 131 | # For JSON, only use output path if explicitly provided by the user 132 | elif fmt == "json" and output is not None: 133 | fmt_output = ( 134 | f"{output}_{fmt}" if len(formats_to_generate) > 1 else output 135 | ) 136 | 137 | reporter.generate_report(model, fmt_output) 138 | except Exception as e: 139 | click.secho( 140 | f"Error generating {fmt} report: {str(e)}", fg="yellow", err=True 141 | ) 142 | 143 | except Exception as e: 144 | click.secho(f"Error during analysis: {str(e)}", fg="red", err=True) 145 | sys.exit(1) 146 | 147 | 148 | if __name__ == "__main__": 149 | main(prog_name="smartrappy") # pragma: no cover 150 | -------------------------------------------------------------------------------- /tests/test_qmd_parser.py: -------------------------------------------------------------------------------- 1 | """Tests for QMD parsing functionality.""" 2 | 3 | from smartrappy.qmd_parser import extract_markdown_resources, extract_python_chunks 4 | 5 | 6 | def test_extract_python_chunks(): 7 | """Test that Python chunks are extracted correctly from QMD files.""" 8 | # Sample QMD content with Python chunks 9 | qmd_content = """# Test QMD File 10 | 11 | This is a test QMD file with Python chunks. 12 | 13 | ```{python} 14 | import pandas as pd 15 | df = pd.read_csv("data.csv") 16 | ``` 17 | 18 | Some markdown text between chunks. 19 | 20 | ```{python} 21 | df.to_excel("output.xlsx") 22 | ``` 23 | 24 | ```{r} 25 | # This is an R chunk that should be ignored 26 | print("Hello from R") 27 | ``` 28 | 29 | ```{python} 30 | import matplotlib.pyplot as plt 31 | plt.plot(df["x"], df["y"]) 32 | plt.savefig("plot.png") 33 | ``` 34 | """ 35 | 36 | # Extract Python chunks 37 | chunks = extract_python_chunks(qmd_content) 38 | 39 | # Check that we found the right number of chunks 40 | assert len(chunks) == 3 41 | 42 | # Check that the chunks have the right content 43 | assert "import pandas as pd" in chunks[0] 44 | assert "df.to_excel(" in chunks[1] 45 | assert "import matplotlib.pyplot" in chunks[2] 46 | 47 | # Check that the R chunk was ignored 48 | for chunk in chunks: 49 | assert "Hello from R" not in chunk 50 | 51 | 52 | def test_empty_qmd_file(): 53 | """Test handling of QMD files with no Python chunks.""" 54 | qmd_content = """# Empty QMD File 55 | 56 | This QMD file has no Python chunks. 57 | 58 | ```{r} 59 | print("Hello from R") 60 | ``` 61 | """ 62 | chunks = extract_python_chunks(qmd_content) 63 | assert len(chunks) == 0 64 | 65 | 66 | def test_malformed_chunks(): 67 | """Test handling of malformed Python chunks.""" 68 | qmd_content = """# Malformed QMD File 69 | 70 | ```{python 71 | # Missing closing brace 72 | x = 1 73 | ``` 74 | 75 | ```{python} 76 | # This one is fine 77 | y = 2 78 | ``` 79 | """ 80 | # The regex should still handle the malformed chunk 81 | chunks = extract_python_chunks(qmd_content) 82 | assert len(chunks) == 1 83 | assert "y = 2" in chunks[0] 84 | 85 | 86 | def test_with_metadata(): 87 | """Test handling of Python chunks with metadata.""" 88 | qmd_content = """# QMD with metadata 89 | 90 | ```{python echo=false, eval=true} 91 | import pandas as pd 92 | df = pd.read_csv("data.csv") 93 | ``` 94 | """ 95 | chunks = extract_python_chunks(qmd_content) 96 | assert len(chunks) == 1 97 | assert "import pandas as pd" in chunks[0] 98 | 99 | 100 | def test_with_actual_file(tmp_path): 101 | """Test extraction from an actual file.""" 102 | # Create a temporary QMD file 103 | qmd_file = tmp_path / "test.qmd" 104 | qmd_content = """# Test QMD File 105 | 106 | ```{python} 107 | import pandas as pd 108 | df = pd.read_csv("data.csv") 109 | df.to_excel("output.xlsx") 110 | ``` 111 | 112 | ```{python} 113 | import matplotlib.pyplot as plt 114 | plt.savefig("plot.png") 115 | ``` 116 | """ 117 | qmd_file.write_text(qmd_content) 118 | 119 | # Extract chunks from the file 120 | with open(qmd_file, "r") as f: 121 | chunks = extract_python_chunks(f.read()) 122 | 123 | assert len(chunks) == 2 124 | assert "import pandas as pd" in chunks[0] 125 | assert "import matplotlib.pyplot as plt" in chunks[1] 126 | 127 | 128 | def test_extract_markdown_resources(): 129 | """Test that markdown resources are extracted correctly from QMD files.""" 130 | # Sample QMD content with both image references and include directives 131 | qmd_content = """# Test QMD File 132 | 133 | This is a test QMD file with markdown image references and includes. 134 | 135 | ![A simple image](/path/to/image.png) 136 | 137 | Some text between resources. 138 | 139 | {{< include /outputs/equation.tex >}} 140 | 141 | ![Image with spaces in path](/outputs/my diagram.svg) 142 | 143 | {{< include "/outputs/table.html" >}} 144 | 145 | ![External image](https://example.com/image.jpg) 146 | 147 | {{< include 'outputs/data.csv' >}} 148 | 149 | ![Relative path without leading slash](outputs/chart.png) 150 | """ 151 | 152 | # Extract markdown resources 153 | resources = extract_markdown_resources(qmd_content) 154 | 155 | # Check that we found the right resources (excluding external URLs) 156 | assert len(resources) == 6 # 3 images (excluding external URL) + 3 includes 157 | 158 | # Check image resources 159 | image_resources = [path for path, type_ in resources if type_ == "image"] 160 | assert len(image_resources) == 3 161 | assert "path/to/image.png" in image_resources 162 | assert "outputs/my diagram.svg" in image_resources 163 | assert "outputs/chart.png" in image_resources 164 | 165 | # Check include resources 166 | include_resources = [path for path, type_ in resources if type_ == "include"] 167 | assert len(include_resources) == 3 168 | assert "outputs/equation.tex" in include_resources 169 | assert "outputs/table.html" in include_resources 170 | assert "outputs/data.csv" in include_resources 171 | 172 | 173 | def test_complex_quarto_includes(): 174 | """Test handling of complex Quarto include directives.""" 175 | qmd_content = """# Complex cases 176 | 177 | Standard include: 178 | {{< include /outputs/equation.tex >}} 179 | 180 | Include with options: 181 | {{< include /outputs/report.md echo=true >}} 182 | 183 | Include with multiple options: 184 | {{}} 185 | 186 | Include with whitespace: 187 | {{< include /outputs/whitespace.txt >}} 188 | """ 189 | 190 | resources = extract_markdown_resources(qmd_content) 191 | 192 | # Extract just the include paths 193 | include_paths = [path for path, type_ in resources if type_ == "include"] 194 | 195 | # Check that we found all includes 196 | assert len(include_paths) == 4 197 | assert "outputs/equation.tex" in include_paths 198 | assert "outputs/report.md" in include_paths # Should strip options 199 | assert "outputs/data.R" in include_paths 200 | assert "outputs/whitespace.txt" in include_paths 201 | -------------------------------------------------------------------------------- /src/smartrappy/qmd_parser.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import re 3 | from typing import List, Set, Tuple 4 | 5 | from smartrappy.models import DatabaseInfo, FileInfo, ModuleImport 6 | 7 | 8 | def extract_python_chunks(qmd_content: str) -> List[str]: 9 | """ 10 | Extract Python code chunks from a Quarto markdown file. 11 | 12 | Args: 13 | qmd_content: The content of the QMD file as a string 14 | 15 | Returns: 16 | A list of Python code chunks found in the file 17 | """ 18 | # Pattern to match Python code chunks in QMD files 19 | # Matches ```{python} ... ``` blocks, including those with parameters 20 | pattern = r"```\{python[^}]*\}(.*?)```" 21 | 22 | # Find all matches using re.DOTALL to match across multiple lines 23 | matches = re.findall(pattern, qmd_content, re.DOTALL) 24 | 25 | # Clean up the chunks (remove leading/trailing whitespace) 26 | cleaned_chunks = [chunk.strip() for chunk in matches] 27 | 28 | return cleaned_chunks 29 | 30 | 31 | def extract_markdown_resources(qmd_content: str) -> List[Tuple[str, str]]: 32 | """ 33 | Extract markdown resource references from a Quarto markdown file. 34 | 35 | Extracts: 36 | 1. Image references: ![alt](/path/to/image.ext) 37 | 2. Include directives: {{< include /path/to/file.ext >}} 38 | 39 | Args: 40 | qmd_content: The content of the QMD file as a string 41 | 42 | Returns: 43 | A list of tuples containing (file_path, resource_type) 44 | """ 45 | resources = [] 46 | 47 | # Pattern to match markdown image syntax ![alt](path) 48 | image_pattern = r"!\[.*?\]\(([^)]+)\)" 49 | image_matches = re.findall(image_pattern, qmd_content) 50 | 51 | # Pattern to match Quarto include directives {{< include /path/to/file >}} or {{< include /path/to/file param=value >}} 52 | include_pattern = r"\{\{<\s*include\s+([^\s>]+)(?:\s+[^>]+?)?\s*>\}\}" 53 | include_matches = re.findall(include_pattern, qmd_content) 54 | 55 | # Process image paths 56 | for path in image_matches: 57 | # Remove query parameters if present 58 | clean_path = path.split("?")[0].strip() 59 | # Remove any fragment identifiers 60 | clean_path = clean_path.split("#")[0].strip() 61 | # Remove any surrounding quotation marks 62 | if (clean_path.startswith('"') and clean_path.endswith('"')) or ( 63 | clean_path.startswith("'") and clean_path.endswith("'") 64 | ): 65 | clean_path = clean_path[1:-1] 66 | 67 | # Ignore external URLs 68 | if not clean_path.startswith(("http://", "https://", "ftp://")): 69 | # Remove leading slash if present 70 | if clean_path.startswith("/"): 71 | clean_path = clean_path[1:] 72 | resources.append((clean_path, "image")) 73 | 74 | # Process include directives 75 | for path in include_matches: 76 | clean_path = path.strip() 77 | 78 | # The regex might capture additional parameters after the path, 79 | # so ensure we just get the file path by splitting on whitespace 80 | # and taking the first part (the file path) 81 | clean_path = clean_path.split()[0] if " " in clean_path else clean_path 82 | 83 | # Remove any surrounding quotation marks 84 | if (clean_path.startswith('"') and clean_path.endswith('"')) or ( 85 | clean_path.startswith("'") and clean_path.endswith("'") 86 | ): 87 | clean_path = clean_path[1:-1] 88 | 89 | # Remove leading slash if present 90 | if clean_path.startswith("/"): 91 | clean_path = clean_path[1:] 92 | 93 | resources.append((clean_path, "include")) 94 | 95 | return resources 96 | 97 | 98 | def analyse_qmd_file( 99 | file_path: str, 100 | project_modules: Set[str], 101 | FileOperationFinder, 102 | ModuleImportFinder, 103 | DatabaseOperationFinder, 104 | ) -> Tuple[List[FileInfo], List[ModuleImport], List[DatabaseInfo]]: 105 | """ 106 | Analyse a Quarto markdown file for Python code chunks and external resources. 107 | 108 | Detects: 109 | - Python code chunks 110 | - Markdown image references (![alt](/path/to/image.ext)) 111 | - Quarto include directives ({{< include /path/to/file.ext >}}) 112 | 113 | Args: 114 | file_path: Path to the QMD file 115 | project_modules: Set of known project module names 116 | FileOperationFinder: Class to find file operations 117 | ModuleImportFinder: Class to find module imports 118 | DatabaseOperationFinder: Class to find database operations 119 | 120 | Returns: 121 | A tuple of (file_operations, imports, database_operations) 122 | """ 123 | try: 124 | # Read the QMD file content 125 | with open(file_path, "r", encoding="utf-8") as f: 126 | qmd_content = f.read() 127 | 128 | # Extract Python code chunks 129 | python_chunks = extract_python_chunks(qmd_content) 130 | 131 | # Extract markdown resources (images and includes) 132 | resources = extract_markdown_resources(qmd_content) 133 | 134 | # Create FileInfo objects for resource inputs (images, includes, etc.) 135 | resource_file_ops = [] 136 | for resource_path, resource_type in resources: 137 | # All external resources are considered read operations in QMD files 138 | resource_file_ops.append( 139 | FileInfo( 140 | filename=resource_path, 141 | is_read=True, 142 | is_write=False, 143 | source_file=file_path, 144 | ) 145 | ) 146 | 147 | # Initialize result lists 148 | all_file_ops = resource_file_ops # Start with external resource operations 149 | all_imports = [] 150 | all_db_ops = [] 151 | 152 | # Process each Python chunk separately 153 | for i, chunk in enumerate(python_chunks): 154 | try: 155 | # Parse the chunk as Python code 156 | tree = ast.parse(chunk) 157 | 158 | # Find file operations 159 | file_finder = FileOperationFinder(file_path) 160 | file_finder.visit(tree) 161 | all_file_ops.extend(file_finder.file_operations) 162 | 163 | # Find imports 164 | import_finder = ModuleImportFinder(file_path, project_modules) 165 | import_finder.visit(tree) 166 | all_imports.extend(import_finder.imports) 167 | 168 | # Find database operations 169 | db_finder = DatabaseOperationFinder(file_path) 170 | db_finder.visit(tree) 171 | all_db_ops.extend(db_finder.database_operations) 172 | 173 | except SyntaxError as e: 174 | print(f"Syntax error in Python chunk {i + 1} of {file_path}: {str(e)}") 175 | 176 | return all_file_ops, all_imports, all_db_ops 177 | 178 | except (UnicodeDecodeError, IOError) as e: 179 | print(f"Error processing QMD file {file_path}: {str(e)}") 180 | return [], [], [] 181 | -------------------------------------------------------------------------------- /docs/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "---\n", 8 | "execute:\n", 9 | " echo: false\n", 10 | "---\n", 11 | "\n", 12 | "# smartrappy\n", 13 | "\n", 14 | "Smart reproducible analytical pipeline inspection.\n", 15 | "\n", 16 | "![SVG logo of smartrappy](logo.svg){width=40%}" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "[![PyPI](https://img.shields.io/pypi/v/smartrappy.svg)](https://pypi.org/project/smartrappy/)\n", 24 | "[![Status](https://img.shields.io/pypi/status/smartrappy.svg)](https://pypi.org/project/smartrappy/)\n", 25 | "[![Python Version](https://img.shields.io/pypi/pyversions/smartrappy)](https://pypi.org/project/smartrappy)\n", 26 | "[![License](https://img.shields.io/pypi/l/smartrappy)](https://opensource.org/licenses/MIT)\n", 27 | "[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)\n", 28 | "[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)\n", 29 | "[![Tests](https://github.com/aeturrell/smartrappy/workflows/Tests/badge.svg)](https://github.com/aeturrell/smartrappy/actions?workflow=Tests)\n", 30 | "[![Codecov](https://codecov.io/gh/aeturrell/smartrappy/branch/main/graph/badge.svg)](https://codecov.io/gh/aeturrell/smartrappy)\n", 31 | "[![Read the documentation at https://aeturrell.github.io/smartrappy/](https://img.shields.io/badge/docs-passing-brightgreen)](https://aeturrell.github.io/smartrappy/)\n", 32 | "[![Downloads](https://static.pepy.tech/badge/smartrappy)](https://pepy.tech/project/smartrappy)\n", 33 | "\n", 34 | "\n", 35 | "![Linux](https://img.shields.io/badge/Linux-FCC624?style=for-the-badge&logo=linux&logoColor=black)\n", 36 | "![macOS](https://img.shields.io/badge/mac%20os-000000?style=for-the-badge&logo=macos&logoColor=F0F0F0)\n", 37 | "![Windows](https://img.shields.io/badge/Windows-0078D6?style=for-the-badge&logo=windows&logoColor=white)\n", 38 | "[![Source](https://img.shields.io/badge/source%20code-github-lightgrey?style=for-the-badge)](https://github.com/aeturrell/smartrappy)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "## Introduction\n", 46 | "\n", 47 | "### What does this package do?\n", 48 | "\n", 49 | "**smartrappy** analyses a Python project and infers the directed acyclic graph (DAG) of the code and data dependencies, including the last time any data were refreshed and whether the data exist at all on disk. It is not perfect, and will miss a lot in complex projects: but for simple projects using, say, `pd.read_csv()`, it does a good job of inferring the steps. It can also infer writing to and from most databases. The inferred DAG is then visualised, and there are several options for doing that—the default being to produce a visualisation in the terminal.\n", 50 | "\n", 51 | "### What is **smartrappy** for?\n", 52 | "\n", 53 | "**smartrappy** is designed to help you understand the dependencies in a project, especially in a context where there may be a lot of legacy code that resembles tangled spaghetti.\n", 54 | "\n", 55 | "### Quickstart\n", 56 | "\n", 57 | "To use **smartrappy** as a command-line tool:\n", 58 | "\n", 59 | "```bash\n", 60 | "smartrappy /path/to/your/project\n", 61 | "```\n", 62 | "\n", 63 | "Or to use it within a Python script:\n", 64 | "\n", 65 | "```python\n", 66 | "from smartrappy import analyse_project\n", 67 | "from smartrappy.reporters import ConsoleReporter\n", 68 | "\n", 69 | "\n", 70 | "model = analyse_project(\"/path/to/your/project\")\n", 71 | "reporter = ConsoleReporter()\n", 72 | "reporter.generate_report(model)\n", 73 | "```\n", 74 | "\n", 75 | "### Installation\n", 76 | "\n", 77 | "To install **smartrappy**, you can use `pip install smartrappy` or `uv add smartrappy` if you are using [Astral's uv](https://docs.astral.sh/uv/). You can also use it as a standalone command-line tool with uv and the `uvx` command:\n", 78 | "\n", 79 | "```bash\n", 80 | "uvx smartrappy path/to/your/project\n", 81 | "```\n", 82 | "\n", 83 | "### Documentation\n", 84 | "\n", 85 | "You can find the full documentation for **smartrappy** at [https://aeturrell.github.io/smartrappy/](https://aeturrell.github.io/smartrappy/)." 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "## Example of output\n", 93 | "\n", 94 | "```bash\n", 95 | "smartrappy .\n", 96 | "```" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "! uv run smartrappy ../tests/test_set_two" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "The graphviz and mermaid options are equally as aesthetically pleasing! [Head to the docs](https://aeturrell.github.io/smartrappy/output_options.html) to see those." 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "## Options and configuration\n", 120 | "\n", 121 | "### Command-line\n", 122 | "\n", 123 | "```bash\n", 124 | "smartrappy [OPTIONS] path/to/your/project\n", 125 | "```\n", 126 | "\n", 127 | "### Arguments\n", 128 | "\n", 129 | "- `path/to/your/project`: Path to the Python project directory to analyse (required)\n", 130 | "\n", 131 | "### Options\n", 132 | "\n", 133 | "- `-o, --output PATH`: Output path for the analysis files (without extension)\n", 134 | "- `-f, --format [console|graphviz|mermaid|json]`: Output format (default: console)\n", 135 | "- `--all-formats`: Generate all output formats. Because why shouldn't you have it all?\n", 136 | "- `-h, --help`: Show help message\n", 137 | "- `--internal`: Show only internal code dependencies. (default: false)\n", 138 | "- `--version`: Show version information\n", 139 | "\n", 140 | "### Output formats\n", 141 | "\n", 142 | "- `console`: Output in terminal (default)\n", 143 | "- `graphviz`: Generate Graphviz visualisation, saved as a PDF\n", 144 | "- `mermaid`: Generate Mermaid diagram, which can be embedded in Markdown\n", 145 | "- `json`: Generate JSON representation, printed to the terminal if no output path is specified\n", 146 | "\n", 147 | "By default, outputs are stored in the directory from where the `smartrappy` command is run." 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "## Requirements\n", 155 | "\n", 156 | "You can find a full list of requirements in the [pyproject.toml](https://github.com/aeturrell/smartrappy/blob/main/pyproject.toml) file.\n", 157 | "\n", 158 | "This package also requires that you have [GraphViz](https://graphviz.org/) installed." 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "## License\n", 166 | "\n", 167 | "Distributed under the terms of the [MIT license](https://opensource.org/licenses/MIT), *smartrappy* is free and open source software.\n", 168 | "\n", 169 | "## Issues\n", 170 | "\n", 171 | "If you encounter any problems, please [file an issue](https://github.com/aeturrell/smartrappy/issues) along with a detailed description." 172 | ] 173 | } 174 | ], 175 | "metadata": { 176 | "kernelspec": { 177 | "display_name": ".venv", 178 | "language": "python", 179 | "name": "python3" 180 | }, 181 | "language_info": { 182 | "codemirror_mode": { 183 | "name": "ipython", 184 | "version": 3 185 | }, 186 | "file_extension": ".py", 187 | "mimetype": "text/x-python", 188 | "name": "python", 189 | "nbconvert_exporter": "python", 190 | "pygments_lexer": "ipython3", 191 | "version": "3.12.0" 192 | } 193 | }, 194 | "nbformat": 4, 195 | "nbformat_minor": 4 196 | } 197 | -------------------------------------------------------------------------------- /tests/test_qmd_integration.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | from pathlib import Path 4 | 5 | from smartrappy import analyse_project 6 | from smartrappy.models import NodeType 7 | from smartrappy.reporters import ConsoleReporter 8 | 9 | 10 | def test_qmd_integration(): 11 | """Test that QMD files are properly analyzed in a project.""" 12 | with tempfile.TemporaryDirectory() as tmpdir: 13 | # Create a simple project structure with Python and QMD files 14 | tmpdir_path = Path(tmpdir) 15 | 16 | # Create a Python file 17 | py_file = tmpdir_path / "process.py" 18 | py_file.write_text(""" 19 | import pandas as pd 20 | 21 | df = pd.read_csv("input.csv") 22 | df.to_excel("output.xlsx") 23 | """) 24 | 25 | # Create a QMD file 26 | qmd_file = tmpdir_path / "analysis.qmd" 27 | qmd_file.write_text("""# Analysis Document 28 | 29 | This is a Quarto document with Python code chunks. 30 | 31 | ```{python} 32 | import pandas as pd 33 | import matplotlib.pyplot as plt 34 | 35 | df = pd.read_excel("output.xlsx") 36 | plt.plot(df["x"], df["y"]) 37 | plt.savefig("plot.png") 38 | ``` 39 | 40 | ```{python} 41 | # Another code chunk 42 | import sqlite3 43 | 44 | conn = sqlite3.connect("data.db") 45 | df_db = pd.read_sql("SELECT * FROM mytable", conn) 46 | df_db.to_csv("db_export.csv") 47 | ``` 48 | """) 49 | 50 | # Create a dummy data file to make it exist on disk 51 | (tmpdir_path / "input.csv").touch() 52 | 53 | # Analyze the project 54 | model = analyse_project(str(tmpdir_path)) 55 | 56 | # Check that nodes were created for both files 57 | py_script_found = False 58 | qmd_doc_found = False 59 | 60 | for node_id, node in model.nodes.items(): 61 | if node.name == "process.py" and node.type == NodeType.SCRIPT: 62 | py_script_found = True 63 | elif node.name == "analysis.qmd" and node.type == NodeType.QUARTO_DOCUMENT: 64 | qmd_doc_found = True 65 | 66 | assert py_script_found, "Python script node not found in the model" 67 | assert qmd_doc_found, "Quarto document node not found in the model" 68 | 69 | # Check that file operations were detected in the QMD file 70 | qmd_file_ops = [] 71 | for filename, ops in model.file_operations.items(): 72 | for op in ops: 73 | if os.path.basename(op.source_file) == "analysis.qmd": 74 | qmd_file_ops.append((filename, op.is_read, op.is_write)) 75 | 76 | # Verify expected file operations in the QMD file 77 | assert ("output.xlsx", True, False) in qmd_file_ops # Read operation 78 | assert ("plot.png", False, True) in qmd_file_ops # Write operation 79 | assert ("db_export.csv", False, True) in qmd_file_ops # Write operation 80 | 81 | # Check that database operations were detected 82 | db_ops_found = False 83 | for db_name, ops in model.database_operations.items(): 84 | for op in ops: 85 | if os.path.basename(op.source_file) == "analysis.qmd": 86 | db_ops_found = True 87 | break 88 | 89 | assert db_ops_found, "Database operations not found for QMD file" 90 | 91 | # Test that the console reporter can handle QMD files without errors 92 | reporter = ConsoleReporter() 93 | reporter.generate_report(model) # This should not raise exceptions 94 | 95 | 96 | def test_empty_qmd(): 97 | """Test that QMD files without Python chunks are handled correctly.""" 98 | with tempfile.TemporaryDirectory() as tmpdir: 99 | tmpdir_path = Path(tmpdir) 100 | 101 | # Create a QMD file without Python chunks 102 | qmd_file = tmpdir_path / "empty.qmd" 103 | qmd_file.write_text("""# Empty Document 104 | 105 | This Quarto document has no Python code chunks. 106 | 107 | ```{r} 108 | # R code that should be ignored 109 | print("Hello from R") 110 | ``` 111 | """) 112 | 113 | # Analyze the project 114 | model = analyse_project(str(tmpdir_path)) 115 | 116 | # Since there are no Python chunks, the QMD file should not appear in the model 117 | qmd_found = False 118 | for _, node in model.nodes.items(): 119 | if node.name == "empty.qmd" and node.type == NodeType.QUARTO_DOCUMENT: 120 | qmd_found = True 121 | break 122 | 123 | assert not qmd_found, "Empty QMD file should not create nodes" 124 | 125 | 126 | def test_qmd_integration_with_all_resources(): 127 | """Test that QMD files with images and include directives are properly analyzed.""" 128 | with tempfile.TemporaryDirectory() as tmpdir: 129 | # Create a simple project structure with a Quarto document containing various resources 130 | tmpdir_path = Path(tmpdir) 131 | 132 | # Create a QMD file with markdown images and include directives 133 | qmd_file = tmpdir_path / "report.qmd" 134 | qmd_file.write_text("""# Comprehensive Quarto Document 135 | 136 | This document includes various types of resources and Python code. 137 | 138 | ## Images 139 | ![First figure](/outputs/figure1.png) 140 | 141 | ## LaTeX Equation 142 | {{< include /outputs/equation.tex >}} 143 | 144 | ## Python Analysis 145 | ```{python} 146 | import pandas as pd 147 | df = pd.read_csv("data.csv") 148 | df.to_excel("processed_data.xlsx") 149 | ``` 150 | 151 | ## Results Visualization 152 | ![Results](/outputs/results.svg) 153 | 154 | ## Data Table 155 | {{< include /outputs/table.html >}} 156 | 157 | ```{python} 158 | import matplotlib.pyplot as plt 159 | plt.figure() 160 | plt.plot(df["x"], df["y"]) 161 | plt.savefig("output_plot.png") 162 | ``` 163 | 164 | ## Appendix 165 | {{< include /outputs/appendix.md >}} 166 | """) 167 | 168 | # Create dummy files to make them exist on disk 169 | outputs_dir = tmpdir_path / "outputs" 170 | outputs_dir.mkdir() 171 | (outputs_dir / "figure1.png").touch() 172 | (outputs_dir / "results.svg").touch() 173 | (outputs_dir / "equation.tex").touch() 174 | (outputs_dir / "table.html").touch() 175 | (outputs_dir / "appendix.md").touch() 176 | 177 | # Create input data file 178 | (tmpdir_path / "data.csv").touch() 179 | 180 | # Analyze the project 181 | model = analyse_project(str(tmpdir_path)) 182 | 183 | # Check that the QMD document was properly processed 184 | qmd_doc_found = False 185 | qmd_node_id = None 186 | for node_id, node in model.nodes.items(): 187 | if node.name == "report.qmd" and node.type == NodeType.QUARTO_DOCUMENT: 188 | qmd_doc_found = True 189 | qmd_node_id = node_id 190 | break 191 | 192 | assert qmd_doc_found, "Quarto document node not found in the model" 193 | 194 | # Collect all file operations from the QMD document 195 | qmd_file_ops = [] 196 | for filename, ops in model.file_operations.items(): 197 | for op in ops: 198 | if os.path.basename(op.source_file) == "report.qmd": 199 | qmd_file_ops.append((filename, op.is_read, op.is_write)) 200 | 201 | # Verify all resource types were detected 202 | # Python code operations 203 | assert ("data.csv", True, False) in qmd_file_ops # Read operation 204 | assert ("processed_data.xlsx", False, True) in qmd_file_ops # Write operation 205 | assert ("output_plot.png", False, True) in qmd_file_ops # Write operation 206 | 207 | # Image references (read operations) 208 | assert ("outputs/figure1.png", True, False) in qmd_file_ops 209 | assert ("outputs/results.svg", True, False) in qmd_file_ops 210 | 211 | # Include directives (read operations) 212 | assert ("outputs/equation.tex", True, False) in qmd_file_ops 213 | assert ("outputs/table.html", True, False) in qmd_file_ops 214 | assert ("outputs/appendix.md", True, False) in qmd_file_ops 215 | 216 | # Verify edges in the graph 217 | image_nodes_with_edges = 0 218 | include_nodes_with_edges = 0 219 | 220 | for edge in model.edges: 221 | if edge.target == qmd_node_id and edge.type == "read": 222 | source_node = model.nodes[edge.source] 223 | source_name = source_node.name 224 | 225 | # Count image and include nodes with edges to the QMD document 226 | if source_name in ["outputs/figure1.png", "outputs/results.svg"]: 227 | image_nodes_with_edges += 1 228 | elif source_name in [ 229 | "outputs/equation.tex", 230 | "outputs/table.html", 231 | "outputs/appendix.md", 232 | ]: 233 | include_nodes_with_edges += 1 234 | 235 | # Verify we have the right number of edges for each resource type 236 | assert image_nodes_with_edges == 2, ( 237 | "Not all image nodes have edges to the QMD document" 238 | ) 239 | assert include_nodes_with_edges == 3, ( 240 | "Not all include nodes have edges to the QMD document" 241 | ) 242 | 243 | # Test that the console reporter works with these resources 244 | reporter = ConsoleReporter() 245 | reporter.generate_report(model) # This should not raise exceptions 246 | -------------------------------------------------------------------------------- /tests/test_jupyter_integration.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import tempfile 4 | from pathlib import Path 5 | 6 | from smartrappy import analyse_project 7 | from smartrappy.models import NodeType 8 | from smartrappy.reporters import ConsoleReporter 9 | 10 | 11 | def test_jupyter_integration(): 12 | """Test that Jupyter notebook files are properly analyzed in a project.""" 13 | with tempfile.TemporaryDirectory() as tmpdir: 14 | # Create a simple project structure with Python and notebook files 15 | tmpdir_path = Path(tmpdir) 16 | 17 | # Create a Python file 18 | py_file = tmpdir_path / "process.py" 19 | py_file.write_text(""" 20 | import pandas as pd 21 | 22 | df = pd.read_csv("input.csv") 23 | df.to_excel("output.xlsx") 24 | """) 25 | 26 | # Create a Jupyter notebook file 27 | notebook_file = tmpdir_path / "analysis.ipynb" 28 | notebook_content = { 29 | "cells": [ 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "# Analysis Notebook\n", 35 | "\n", 36 | "This is a Jupyter notebook with Python code cells.", 37 | ], 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 1, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "import pandas as pd\n", 46 | "import matplotlib.pyplot as plt\n", 47 | "\n", 48 | "df = pd.read_excel('output.xlsx')\n", 49 | "plt.plot(df['x'], df['y'])\n", 50 | "plt.savefig('plot.png')", 51 | ], 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 2, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "# Another code cell\n", 60 | "import sqlite3\n", 61 | "\n", 62 | "conn = sqlite3.connect('data.db')\n", 63 | "df_db = pd.read_sql('SELECT * FROM mytable', conn)\n", 64 | "df_db.to_csv('db_export.csv')", 65 | ], 66 | }, 67 | ], 68 | "metadata": { 69 | "kernelspec": { 70 | "display_name": "Python 3", 71 | "language": "python", 72 | "name": "python3", 73 | }, 74 | "language_info": { 75 | "name": "python", 76 | "version": "3.11.0", 77 | }, 78 | }, 79 | "nbformat": 4, 80 | "nbformat_minor": 5, 81 | } 82 | notebook_file.write_text(json.dumps(notebook_content, indent=2)) 83 | 84 | # Create a dummy data file to make it exist on disk 85 | (tmpdir_path / "input.csv").touch() 86 | 87 | # Analyze the project 88 | model = analyse_project(str(tmpdir_path)) 89 | 90 | # Check that nodes were created for both files 91 | py_script_found = False 92 | notebook_found = False 93 | 94 | for node_id, node in model.nodes.items(): 95 | if node.name == "process.py" and node.type == NodeType.SCRIPT: 96 | py_script_found = True 97 | elif ( 98 | node.name == "analysis.ipynb" and node.type == NodeType.JUPYTER_NOTEBOOK 99 | ): 100 | notebook_found = True 101 | 102 | assert py_script_found, "Python script node not found in the model" 103 | assert notebook_found, "Jupyter notebook node not found in the model" 104 | 105 | # Check that file operations were detected in the notebook file 106 | notebook_file_ops = [] 107 | for filename, ops in model.file_operations.items(): 108 | for op in ops: 109 | if os.path.basename(op.source_file) == "analysis.ipynb": 110 | notebook_file_ops.append((filename, op.is_read, op.is_write)) 111 | 112 | # Verify expected file operations in the notebook file 113 | assert ("output.xlsx", True, False) in notebook_file_ops # Read operation 114 | assert ("plot.png", False, True) in notebook_file_ops # Write operation 115 | assert ("db_export.csv", False, True) in notebook_file_ops # Write operation 116 | 117 | # Check that database operations were detected 118 | db_ops_found = False 119 | for db_name, ops in model.database_operations.items(): 120 | for op in ops: 121 | if os.path.basename(op.source_file) == "analysis.ipynb": 122 | db_ops_found = True 123 | break 124 | 125 | assert db_ops_found, "Database operations not found for notebook file" 126 | 127 | # Test that the console reporter can handle notebook files without errors 128 | reporter = ConsoleReporter() 129 | reporter.generate_report(model) # This should not raise exceptions 130 | 131 | 132 | def test_empty_jupyter_notebook(): 133 | """Test that Jupyter notebooks without code cells are handled correctly.""" 134 | with tempfile.TemporaryDirectory() as tmpdir: 135 | tmpdir_path = Path(tmpdir) 136 | 137 | # Create a notebook file without code cells 138 | notebook_file = tmpdir_path / "empty.ipynb" 139 | notebook_content = { 140 | "cells": [ 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "# Empty Notebook\n", 146 | "\n", 147 | "This notebook has no code cells.", 148 | ], 149 | }, 150 | ], 151 | "metadata": { 152 | "kernelspec": { 153 | "display_name": "Python 3", 154 | "language": "python", 155 | "name": "python3", 156 | }, 157 | }, 158 | "nbformat": 4, 159 | "nbformat_minor": 5, 160 | } 161 | notebook_file.write_text(json.dumps(notebook_content, indent=2)) 162 | 163 | # Analyze the project 164 | model = analyse_project(str(tmpdir_path)) 165 | 166 | # Since there are no code cells, the notebook file should not appear in the model 167 | notebook_found = False 168 | for _, node in model.nodes.items(): 169 | if node.name == "empty.ipynb" and node.type == NodeType.JUPYTER_NOTEBOOK: 170 | notebook_found = True 171 | break 172 | 173 | assert not notebook_found, "Empty notebook file should not create nodes" 174 | 175 | 176 | def test_jupyter_integration_with_complex_operations(): 177 | """Test that Jupyter notebooks with complex operations are properly analyzed.""" 178 | with tempfile.TemporaryDirectory() as tmpdir: 179 | # Create a simple project structure with a notebook containing various operations 180 | tmpdir_path = Path(tmpdir) 181 | 182 | # Create a Jupyter notebook file with various operations 183 | notebook_file = tmpdir_path / "analysis.ipynb" 184 | notebook_content = { 185 | "cells": [ 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "# Comprehensive Jupyter Notebook\n", 191 | "\n", 192 | "This notebook includes various types of operations.", 193 | ], 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 1, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "import pandas as pd\n", 202 | "df = pd.read_csv('data.csv')\n", 203 | "df.to_excel('processed_data.xlsx')", 204 | ], 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 2, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "import matplotlib.pyplot as plt\n", 213 | "plt.figure()\n", 214 | "plt.plot(df['x'], df['y'])\n", 215 | "plt.savefig('output_plot.png')", 216 | ], 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 3, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "# Multiple file operations in one cell\n", 225 | "df2 = pd.read_csv('data2.csv')\n", 226 | "df3 = pd.read_parquet('data3.parquet')\n", 227 | "df_combined = pd.concat([df, df2, df3])\n", 228 | "df_combined.to_json('combined.json')", 229 | ], 230 | }, 231 | ], 232 | "metadata": { 233 | "kernelspec": { 234 | "display_name": "Python 3", 235 | "language": "python", 236 | "name": "python3", 237 | }, 238 | }, 239 | "nbformat": 4, 240 | "nbformat_minor": 5, 241 | } 242 | notebook_file.write_text(json.dumps(notebook_content, indent=2)) 243 | 244 | # Create input data files 245 | (tmpdir_path / "data.csv").touch() 246 | (tmpdir_path / "data2.csv").touch() 247 | (tmpdir_path / "data3.parquet").touch() 248 | 249 | # Analyze the project 250 | model = analyse_project(str(tmpdir_path)) 251 | 252 | # Check that the notebook was properly processed 253 | notebook_found = False 254 | notebook_node_id = None 255 | for node_id, node in model.nodes.items(): 256 | if node.name == "analysis.ipynb" and node.type == NodeType.JUPYTER_NOTEBOOK: 257 | notebook_found = True 258 | notebook_node_id = node_id 259 | break 260 | 261 | assert notebook_found, "Jupyter notebook node not found in the model" 262 | 263 | # Collect all file operations from the notebook 264 | notebook_file_ops = [] 265 | for filename, ops in model.file_operations.items(): 266 | for op in ops: 267 | if os.path.basename(op.source_file) == "analysis.ipynb": 268 | notebook_file_ops.append((filename, op.is_read, op.is_write)) 269 | 270 | # Verify all operations were detected 271 | # Read operations 272 | assert ("data.csv", True, False) in notebook_file_ops 273 | assert ("data2.csv", True, False) in notebook_file_ops 274 | assert ("data3.parquet", True, False) in notebook_file_ops 275 | 276 | # Write operations 277 | assert ("processed_data.xlsx", False, True) in notebook_file_ops 278 | assert ("output_plot.png", False, True) in notebook_file_ops 279 | assert ("combined.json", False, True) in notebook_file_ops 280 | 281 | # Verify edges in the graph 282 | read_edges = 0 283 | write_edges = 0 284 | 285 | for edge in model.edges: 286 | if edge.target == notebook_node_id and edge.type == "read": 287 | read_edges += 1 288 | elif edge.source == notebook_node_id and edge.type == "write": 289 | write_edges += 1 290 | 291 | # We should have read edges for the input files 292 | assert read_edges >= 3, "Not all read operations created edges" 293 | # We should have write edges for the output files 294 | assert write_edges >= 3, "Not all write operations created edges" 295 | 296 | # Test that the console reporter works with these operations 297 | reporter = ConsoleReporter() 298 | reporter.generate_report(model) # This should not raise exceptions 299 | -------------------------------------------------------------------------------- /src/smartrappy/models.py: -------------------------------------------------------------------------------- 1 | """Data models for smartrappy.""" 2 | 3 | import os 4 | from datetime import datetime 5 | from pathlib import Path 6 | from typing import Dict, List, NamedTuple, Optional 7 | 8 | 9 | class FileInfo(NamedTuple): 10 | """Information about a file operation found in Python code.""" 11 | 12 | filename: str 13 | is_read: bool 14 | is_write: bool 15 | source_file: str 16 | 17 | 18 | class FileStatus(NamedTuple): 19 | """Information about a file's status on disk.""" 20 | 21 | exists: bool 22 | last_modified: Optional[datetime] = None 23 | 24 | 25 | class ModuleImport(NamedTuple): 26 | """Information about a module import found in Python code.""" 27 | 28 | module_name: str 29 | source_file: str 30 | is_from_import: bool 31 | imported_names: List[str] 32 | is_internal: bool 33 | 34 | 35 | class DatabaseInfo(NamedTuple): 36 | """Information about a database operation found in Python code.""" 37 | 38 | db_name: str # Name or identifier of the database 39 | connection_string: Optional[str] # Connection string (if available) 40 | db_type: str # Type of database (e.g., "postgresql", "mysql", "sqlite") 41 | is_read: bool # Whether data is read from the database 42 | is_write: bool # Whether data is written to the database 43 | source_file: str # File containing the database operation 44 | conn_var_name: Optional[str] = None # Connection variable name if applicable 45 | uses_conn_var: Optional[str] = None # If this operation uses a connection variable 46 | 47 | 48 | class NodeType: 49 | """Enumeration of node types in the project graph.""" 50 | 51 | SCRIPT = "script" 52 | DATA_FILE = "data_file" 53 | EXTERNAL_MODULE = "external_module" 54 | INTERNAL_MODULE = "internal_module" 55 | DATABASE = "database" 56 | QUARTO_DOCUMENT = "quarto_document" 57 | JUPYTER_NOTEBOOK = "jupyter_notebook" 58 | 59 | 60 | class Node(NamedTuple): 61 | """A node in the project dependency graph.""" 62 | 63 | id: str 64 | name: str 65 | type: str 66 | metadata: dict 67 | 68 | 69 | class Edge(NamedTuple): 70 | """An edge in the project dependency graph.""" 71 | 72 | source: str 73 | target: str 74 | type: str 75 | 76 | 77 | class ProjectModel: 78 | """A complete model of the project's structure and dependencies.""" 79 | 80 | def __init__(self, base_path: str, internal_only: bool = False): 81 | self.base_path = Path(base_path) 82 | self.internal_only = internal_only 83 | self.nodes: Dict[str, Node] = {} 84 | self.edges: List[Edge] = [] 85 | self.file_operations: Dict[str, List[FileInfo]] = {} 86 | self.imports: Dict[str, List[ModuleImport]] = {} 87 | self.file_statuses: Dict[str, FileStatus] = {} 88 | self.database_operations: Dict[str, List[DatabaseInfo]] = {} 89 | 90 | def get_node_id(self, name: str, node_type: str) -> str: 91 | """Generate a consistent node ID based on name and type.""" 92 | return f"{node_type}_{hash(name) & 0xFFFFFF}" 93 | 94 | def add_node( 95 | self, name: str, node_type: str, metadata: Optional[dict] = None 96 | ) -> str: 97 | """Add a node to the model and return its ID.""" 98 | metadata = metadata or {} 99 | node_id = self.get_node_id(name, node_type) 100 | 101 | if node_id not in self.nodes: 102 | self.nodes[node_id] = Node( 103 | id=node_id, name=name, type=node_type, metadata=metadata 104 | ) 105 | 106 | return node_id 107 | 108 | def add_edge( 109 | self, source_id: str, target_id: str, edge_type: str = "dependency" 110 | ) -> None: 111 | """Add an edge between two nodes.""" 112 | # Prevent duplicate edges 113 | for edge in self.edges: 114 | if ( 115 | edge.source == source_id 116 | and edge.target == target_id 117 | and edge.type == edge_type 118 | ): 119 | return 120 | 121 | self.edges.append(Edge(source=source_id, target=target_id, type=edge_type)) 122 | 123 | def add_file_operation(self, operation: FileInfo) -> None: 124 | """Add a file operation to the model.""" 125 | if operation.filename not in self.file_operations: 126 | self.file_operations[operation.filename] = [] 127 | 128 | # Prevent duplicate operations 129 | for op in self.file_operations[operation.filename]: 130 | if ( 131 | op.source_file == operation.source_file 132 | and op.is_read == operation.is_read 133 | and op.is_write == operation.is_write 134 | ): 135 | return 136 | 137 | self.file_operations[operation.filename].append(operation) 138 | 139 | # Update file status if not already stored 140 | if operation.filename not in self.file_statuses: 141 | filepath = self.base_path / operation.filename 142 | self.file_statuses[operation.filename] = get_file_status(filepath) 143 | 144 | def add_import(self, import_info: ModuleImport) -> None: 145 | """Add a module import to the model.""" 146 | if import_info.source_file not in self.imports: 147 | self.imports[import_info.source_file] = [] 148 | 149 | # Prevent duplicate imports 150 | for imp in self.imports[import_info.source_file]: 151 | if ( 152 | imp.module_name == import_info.module_name 153 | and imp.is_from_import == import_info.is_from_import 154 | ): 155 | return 156 | 157 | self.imports[import_info.source_file].append(import_info) 158 | 159 | def add_database_operation(self, operation: DatabaseInfo) -> None: 160 | """Add a database operation to the model, handling connection variables.""" 161 | db_name_to_use = operation.db_name 162 | 163 | # If this operation uses a connection variable, prioritize its database name 164 | if hasattr(operation, "uses_conn_var") and operation.uses_conn_var: 165 | conn_var = operation.uses_conn_var 166 | # Look through existing operations to find the referenced connection 167 | for existing_ops in self.database_operations.values(): 168 | for op in existing_ops: 169 | if hasattr(op, "conn_var_name") and op.conn_var_name == conn_var: 170 | db_name_to_use = op.db_name 171 | break 172 | 173 | # Now add the operation under the appropriate database name 174 | if db_name_to_use not in self.database_operations: 175 | self.database_operations[db_name_to_use] = [] 176 | 177 | # Prevent duplicate operations 178 | for op in self.database_operations[db_name_to_use]: 179 | if ( 180 | op.source_file == operation.source_file 181 | and op.is_read == operation.is_read 182 | and op.is_write == operation.is_write 183 | and op.db_type == operation.db_type 184 | ): 185 | return 186 | 187 | self.database_operations[db_name_to_use].append(operation) 188 | 189 | def build_graph(self) -> None: 190 | """Build the graph representation from file operations, database operations, and imports.""" 191 | # Process file operations 192 | for filename, operations in self.file_operations.items(): 193 | file_node_id = self.add_node( 194 | filename, 195 | NodeType.DATA_FILE, 196 | {"status": self.file_statuses.get(filename, FileStatus(exists=False))}, 197 | ) 198 | 199 | for op in operations: 200 | script_name = os.path.basename(op.source_file) 201 | 202 | # Determine node type based on file extension 203 | if script_name.endswith(".qmd"): 204 | node_type = NodeType.QUARTO_DOCUMENT 205 | elif script_name.endswith(".ipynb"): 206 | node_type = NodeType.JUPYTER_NOTEBOOK 207 | else: 208 | node_type = NodeType.SCRIPT 209 | script_node_id = self.add_node(script_name, node_type) 210 | 211 | if op.is_read: 212 | self.add_edge(file_node_id, script_node_id, "read") 213 | if op.is_write: 214 | self.add_edge(script_node_id, file_node_id, "write") 215 | 216 | # Process database operations 217 | for db_name, operations in self.database_operations.items(): 218 | db_node_id = self.add_node( 219 | db_name, 220 | NodeType.DATABASE, 221 | {"db_type": operations[0].db_type}, # Use type from first operation 222 | ) 223 | 224 | for op in operations: 225 | script_name = os.path.basename(op.source_file) 226 | # Determine node type based on file extension 227 | if script_name.endswith(".qmd"): 228 | node_type = NodeType.QUARTO_DOCUMENT 229 | elif script_name.endswith(".ipynb"): 230 | node_type = NodeType.JUPYTER_NOTEBOOK 231 | else: 232 | node_type = NodeType.SCRIPT 233 | script_node_id = self.add_node(script_name, node_type) 234 | 235 | if op.is_read: 236 | self.add_edge(db_node_id, script_node_id, "read") 237 | if op.is_write: 238 | self.add_edge(script_node_id, db_node_id, "write") 239 | 240 | # Process imports - create more detailed nodes 241 | for source_file, imports in self.imports.items(): 242 | script_name = os.path.basename(source_file) 243 | # Determine node type based on file extension 244 | if script_name.endswith(".qmd"): 245 | node_type = NodeType.QUARTO_DOCUMENT 246 | elif script_name.endswith(".ipynb"): 247 | node_type = NodeType.JUPYTER_NOTEBOOK 248 | else: 249 | node_type = NodeType.SCRIPT 250 | script_node_id = self.add_node(script_name, node_type) 251 | 252 | for imp in imports: 253 | # Skip external modules if internal_only is True 254 | if self.internal_only and not imp.is_internal: 255 | continue 256 | 257 | # Get base module name without path 258 | base_module_name = os.path.basename(imp.module_name.replace(".", "/")) 259 | module_display_name = base_module_name 260 | 261 | # Create separate nodes for each imported item if it's a from-import 262 | if imp.is_from_import and imp.imported_names: 263 | for imported_name in imp.imported_names: 264 | # Create detailed import name with module:function format 265 | detailed_name = f"{module_display_name}:{imported_name}" 266 | node_type = ( 267 | NodeType.INTERNAL_MODULE 268 | if imp.is_internal 269 | else NodeType.EXTERNAL_MODULE 270 | ) 271 | 272 | import_node_id = self.add_node( 273 | detailed_name, 274 | node_type, 275 | { 276 | "module": module_display_name, 277 | "imported_name": imported_name, 278 | "is_from_import": True, 279 | }, 280 | ) 281 | self.add_edge(import_node_id, script_node_id, "import") 282 | else: 283 | # For regular imports, just use the module name 284 | node_type = ( 285 | NodeType.INTERNAL_MODULE 286 | if imp.is_internal 287 | else NodeType.EXTERNAL_MODULE 288 | ) 289 | import_node_id = self.add_node(module_display_name, node_type) 290 | self.add_edge(import_node_id, script_node_id, "import") 291 | 292 | 293 | def get_file_status(filepath: Path) -> FileStatus: 294 | """Get file existence and modification time information.""" 295 | if filepath.exists(): 296 | mtime = datetime.fromtimestamp(filepath.stat().st_mtime) 297 | return FileStatus(exists=True, last_modified=mtime) 298 | return FileStatus(exists=False) 299 | -------------------------------------------------------------------------------- /tests/test_analyser.py: -------------------------------------------------------------------------------- 1 | """Comprehensive tests for analyser.py functions.""" 2 | 3 | import ast 4 | 5 | from smartrappy.analyser import ( 6 | DatabaseOperationFinder, 7 | FileOperationFinder, 8 | ModuleImportFinder, 9 | extract_string_from_node, 10 | get_direct_db_driver_info, 11 | get_matplotlib_file_info, 12 | get_mode_properties, 13 | get_open_file_info, 14 | get_pandas_file_info, 15 | get_pandas_sql_info, 16 | get_sqlalchemy_info, 17 | ) 18 | 19 | 20 | class TestGetModeProperties: 21 | """Test file mode parsing.""" 22 | 23 | def test_default_read_mode(self): 24 | """Test default mode is read-only.""" 25 | is_read, is_write = get_mode_properties(None) 26 | assert is_read is True 27 | assert is_write is False 28 | 29 | def test_read_mode(self): 30 | """Test 'r' mode is read-only.""" 31 | is_read, is_write = get_mode_properties("r") 32 | assert is_read is True 33 | assert is_write is False 34 | 35 | def test_write_mode(self): 36 | """Test 'w' mode is write-only.""" 37 | is_read, is_write = get_mode_properties("w") 38 | assert is_read is False 39 | assert is_write is True 40 | 41 | def test_append_mode(self): 42 | """Test 'a' mode is write-only.""" 43 | is_read, is_write = get_mode_properties("a") 44 | assert is_read is False 45 | assert is_write is True 46 | 47 | def test_exclusive_creation_mode(self): 48 | """Test 'x' mode is write-only.""" 49 | is_read, is_write = get_mode_properties("x") 50 | assert is_read is False 51 | assert is_write is True 52 | 53 | def test_read_write_mode(self): 54 | """Test 'r+' mode allows both read and write.""" 55 | is_read, is_write = get_mode_properties("r+") 56 | assert is_read is True 57 | assert is_write is True 58 | 59 | def test_write_read_mode(self): 60 | """Test 'w+' mode allows both read and write.""" 61 | is_read, is_write = get_mode_properties("w+") 62 | assert is_read is True 63 | assert is_write is True 64 | 65 | def test_append_read_mode(self): 66 | """Test 'a+' mode allows both read and write.""" 67 | is_read, is_write = get_mode_properties("a+") 68 | assert is_read is True 69 | assert is_write is True 70 | 71 | 72 | class TestExtractStringFromNode: 73 | """Test string extraction from AST nodes.""" 74 | 75 | def test_path_call_with_name(self): 76 | """Test extraction from Path() call.""" 77 | code = 'Path("test.txt")' 78 | tree = ast.parse(code) 79 | node = tree.body[0].value 80 | result = extract_string_from_node(node) 81 | assert result == "test.txt" 82 | 83 | def test_path_call_with_attribute(self): 84 | """Test extraction from pathlib.Path() call.""" 85 | code = 'pathlib.Path("test.txt")' 86 | tree = ast.parse(code) 87 | node = tree.body[0].value 88 | result = extract_string_from_node(node) 89 | assert result == "test.txt" 90 | 91 | def test_non_path_call(self): 92 | """Test that non-Path calls return None.""" 93 | code = 'other_func("test.txt")' 94 | tree = ast.parse(code) 95 | node = tree.body[0].value 96 | result = extract_string_from_node(node) 97 | assert result is None 98 | 99 | 100 | class TestGetOpenFileInfo: 101 | """Test extraction of file info from open() calls.""" 102 | 103 | def test_open_without_args(self): 104 | """Test open() without arguments returns None.""" 105 | code = "open()" 106 | tree = ast.parse(code) 107 | node = tree.body[0].value 108 | result = get_open_file_info(node, "test.py") 109 | assert result is None 110 | 111 | def test_open_with_path_object(self): 112 | """Test open() with Path object.""" 113 | code = 'open(Path("test.txt"))' 114 | tree = ast.parse(code) 115 | node = tree.body[0].value 116 | result = get_open_file_info(node, "test.py") 117 | assert result is not None 118 | assert result.filename == "test.txt" 119 | assert result.is_read is True 120 | assert result.is_write is False 121 | 122 | def test_open_with_keyword_mode(self): 123 | """Test open() with mode as keyword argument.""" 124 | code = 'open("test.txt", mode="w")' 125 | tree = ast.parse(code) 126 | node = tree.body[0].value 127 | result = get_open_file_info(node, "test.py") 128 | assert result is not None 129 | assert result.filename == "test.txt" 130 | assert result.is_read is False 131 | assert result.is_write is True 132 | 133 | def test_open_with_append_mode(self): 134 | """Test open() with append mode.""" 135 | code = 'open("test.txt", "a")' 136 | tree = ast.parse(code) 137 | node = tree.body[0].value 138 | result = get_open_file_info(node, "test.py") 139 | assert result is not None 140 | assert result.is_write is True 141 | 142 | def test_open_with_read_write_mode(self): 143 | """Test open() with r+ mode.""" 144 | code = 'open("test.txt", "r+")' 145 | tree = ast.parse(code) 146 | node = tree.body[0].value 147 | result = get_open_file_info(node, "test.py") 148 | assert result is not None 149 | assert result.is_read is True 150 | assert result.is_write is True 151 | 152 | 153 | class TestGetPandasFileInfo: 154 | """Test extraction of file info from pandas operations.""" 155 | 156 | def test_pandas_read_without_args(self): 157 | """Test pd.read_csv() without arguments returns None.""" 158 | code = "pd.read_csv()" 159 | tree = ast.parse(code) 160 | node = tree.body[0].value 161 | result = get_pandas_file_info(node, "test.py") 162 | assert result is None 163 | 164 | def test_pandas_non_file_method(self): 165 | """Test pandas method that doesn't read/write files.""" 166 | code = "pd.concat([df1, df2])" 167 | tree = ast.parse(code) 168 | node = tree.body[0].value 169 | result = get_pandas_file_info(node, "test.py") 170 | assert result is None 171 | 172 | def test_dataframe_to_csv_without_args(self): 173 | """Test df.to_csv() without arguments returns None.""" 174 | code = "df.to_csv()" 175 | tree = ast.parse(code) 176 | node = tree.body[0].value 177 | result = get_pandas_file_info(node, "test.py") 178 | assert result is None 179 | 180 | def test_dataframe_to_sql(self): 181 | """Test df.to_sql() returns None (database, not file).""" 182 | code = 'df.to_sql("table", conn)' 183 | tree = ast.parse(code) 184 | node = tree.body[0].value 185 | result = get_pandas_file_info(node, "test.py") 186 | assert result is None 187 | 188 | def test_pandas_read_sql(self): 189 | """Test pd.read_sql() returns None (database, not file).""" 190 | code = 'pd.read_sql("SELECT * FROM table", conn)' 191 | tree = ast.parse(code) 192 | node = tree.body[0].value 193 | result = get_pandas_file_info(node, "test.py") 194 | assert result is None 195 | 196 | 197 | class TestGetMatplotlibFileInfo: 198 | """Test extraction of file info from matplotlib operations.""" 199 | 200 | def test_savefig_with_path_object(self): 201 | """Test plt.savefig() with Path object.""" 202 | code = 'plt.savefig(Path("plot.png"))' 203 | tree = ast.parse(code) 204 | node = tree.body[0].value 205 | result = get_matplotlib_file_info(node, "test.py") 206 | assert result is not None 207 | assert result.filename == "plot.png" 208 | assert result.is_write is True 209 | 210 | 211 | class TestGetSQLAlchemyInfo: 212 | """Test extraction of database info from SQLAlchemy operations.""" 213 | 214 | def test_create_engine_with_string(self): 215 | """Test create_engine with connection string.""" 216 | code = 'create_engine("sqlite:///test.db")' 217 | tree = ast.parse(code) 218 | node = tree.body[0].value 219 | result = get_sqlalchemy_info(node, "test.py") 220 | assert result is not None 221 | assert result.db_type == "sqlite" 222 | assert result.connection_string == "sqlite:///test.db" 223 | 224 | def test_create_engine_postgresql(self): 225 | """Test create_engine with PostgreSQL connection.""" 226 | code = 'create_engine("postgresql://user:pass@localhost/mydb")' 227 | tree = ast.parse(code) 228 | node = tree.body[0].value 229 | result = get_sqlalchemy_info(node, "test.py") 230 | assert result is not None 231 | assert result.db_type == "postgresql" 232 | assert result.db_name == "mydb" 233 | 234 | def test_create_engine_mysql(self): 235 | """Test create_engine with MySQL connection.""" 236 | code = 'create_engine("mysql://user:pass@localhost/mydb")' 237 | tree = ast.parse(code) 238 | node = tree.body[0].value 239 | result = get_sqlalchemy_info(node, "test.py") 240 | assert result is not None 241 | assert result.db_type == "mysql" 242 | assert result.db_name == "mydb" 243 | 244 | def test_create_engine_mssql(self): 245 | """Test create_engine with MSSQL connection.""" 246 | code = 'create_engine("mssql://user:pass@localhost/mydb")' 247 | tree = ast.parse(code) 248 | node = tree.body[0].value 249 | result = get_sqlalchemy_info(node, "test.py") 250 | assert result is not None 251 | assert result.db_type == "mssql" 252 | 253 | 254 | class TestGetPandasSQLInfo: 255 | """Test extraction of database info from pandas SQL operations.""" 256 | 257 | def test_read_sql_with_connection_string(self): 258 | """Test pd.read_sql with connection string.""" 259 | code = 'pd.read_sql("SELECT * FROM table", con="sqlite:///test.db")' 260 | tree = ast.parse(code) 261 | node = tree.body[0].value 262 | result = get_pandas_sql_info(node, "test.py") 263 | assert result is not None 264 | assert result.db_type == "sqlite" 265 | assert result.is_read is True 266 | 267 | def test_read_sql_with_postgresql_connection(self): 268 | """Test pd.read_sql with PostgreSQL connection.""" 269 | code = 'pd.read_sql("SELECT * FROM table", con="postgresql://localhost/mydb")' 270 | tree = ast.parse(code) 271 | node = tree.body[0].value 272 | result = get_pandas_sql_info(node, "test.py") 273 | assert result is not None 274 | assert result.db_type == "postgresql" 275 | assert result.db_name == "mydb" 276 | 277 | def test_read_sql_with_mysql_connection(self): 278 | """Test pd.read_sql with MySQL connection.""" 279 | code = 'pd.read_sql("SELECT * FROM table", con="mysql://localhost/mydb")' 280 | tree = ast.parse(code) 281 | node = tree.body[0].value 282 | result = get_pandas_sql_info(node, "test.py") 283 | assert result is not None 284 | assert result.db_type == "mysql" 285 | 286 | def test_read_sql_with_mssql_connection(self): 287 | """Test pd.read_sql with MSSQL ODBC connection.""" 288 | code = 'pd.read_sql("SELECT * FROM table", con="Driver={SQL Server};Server=localhost;Database=mydb")' 289 | tree = ast.parse(code) 290 | node = tree.body[0].value 291 | result = get_pandas_sql_info(node, "test.py") 292 | assert result is not None 293 | assert result.db_type == "mssql" 294 | assert result.db_name == "mydb" 295 | 296 | def test_read_sql_with_variable_connection(self): 297 | """Test pd.read_sql with variable connection.""" 298 | code = 'pd.read_sql("SELECT * FROM table", conn_var)' 299 | tree = ast.parse(code) 300 | node = tree.body[0].value 301 | result = get_pandas_sql_info(node, "test.py") 302 | # Should still return a DatabaseInfo but without connection details 303 | assert result is not None 304 | 305 | 306 | class TestGetDirectDBDriverInfo: 307 | """Test extraction of database info from direct database drivers.""" 308 | 309 | def test_sqlite3_connect(self): 310 | """Test sqlite3.connect() call.""" 311 | code = 'sqlite3.connect("test.db")' 312 | tree = ast.parse(code) 313 | node = tree.body[0].value 314 | result = get_direct_db_driver_info(node, "test.py") 315 | assert result is not None 316 | assert result.db_type == "sqlite" 317 | assert result.db_name == "test.db" 318 | 319 | def test_psycopg2_connect(self): 320 | """Test psycopg2.connect() call.""" 321 | code = 'psycopg2.connect("dbname=mydb user=postgres")' 322 | tree = ast.parse(code) 323 | node = tree.body[0].value 324 | result = get_direct_db_driver_info(node, "test.py") 325 | assert result is not None 326 | assert result.db_type == "postgresql" 327 | 328 | def test_pymysql_connect(self): 329 | """Test pymysql.connect() call.""" 330 | code = 'pymysql.connect(host="localhost", database="mydb")' 331 | tree = ast.parse(code) 332 | node = tree.body[0].value 333 | result = get_direct_db_driver_info(node, "test.py") 334 | assert result is not None 335 | assert result.db_type == "mysql" 336 | 337 | def test_pyodbc_connect(self): 338 | """Test pyodbc.connect() call.""" 339 | code = 'pyodbc.connect("Driver={SQL Server};Server=localhost;Database=mydb")' 340 | tree = ast.parse(code) 341 | node = tree.body[0].value 342 | result = get_direct_db_driver_info(node, "test.py") 343 | assert result is not None 344 | assert result.db_type == "mssql" 345 | 346 | 347 | class TestDatabaseOperationFinder: 348 | """Test the DatabaseOperationFinder AST visitor.""" 349 | 350 | def test_sqlalchemy_engine_tracking(self): 351 | """Test that SQLAlchemy engines are tracked correctly.""" 352 | code = """ 353 | import sqlalchemy as sa 354 | engine = sa.create_engine("sqlite:///test.db") 355 | df = pd.read_sql("SELECT * FROM table", engine) 356 | """ 357 | tree = ast.parse(code) 358 | finder = DatabaseOperationFinder("test.py") 359 | finder.visit(tree) 360 | 361 | # Should find 2 operations: engine creation and read_sql usage 362 | assert len(finder.database_operations) >= 1 363 | 364 | def test_direct_create_engine(self): 365 | """Test direct create_engine call tracking.""" 366 | code = """ 367 | from sqlalchemy import create_engine 368 | engine = create_engine("postgresql://localhost/mydb") 369 | """ 370 | tree = ast.parse(code) 371 | finder = DatabaseOperationFinder("test.py") 372 | finder.visit(tree) 373 | 374 | assert len(finder.database_operations) >= 1 375 | # Check that engine was registered 376 | assert "engine" in finder.sqlalchemy_engines 377 | 378 | def test_connection_variable_tracking(self): 379 | """Test that database connections are tracked correctly.""" 380 | code = """ 381 | import sqlite3 382 | conn = sqlite3.connect("test.db") 383 | df = pd.read_sql("SELECT * FROM table", conn) 384 | """ 385 | tree = ast.parse(code) 386 | finder = DatabaseOperationFinder("test.py") 387 | finder.visit(tree) 388 | 389 | # Should track the connection 390 | assert len(finder.database_operations) >= 1 391 | 392 | def test_to_sql_with_engine(self): 393 | """Test df.to_sql() with SQLAlchemy engine.""" 394 | code = """ 395 | import sqlalchemy as sa 396 | engine = sa.create_engine("sqlite:///test.db") 397 | df.to_sql("table_name", engine) 398 | """ 399 | tree = ast.parse(code) 400 | finder = DatabaseOperationFinder("test.py") 401 | finder.visit(tree) 402 | 403 | # Should find both engine creation and to_sql operation 404 | assert len(finder.database_operations) >= 1 405 | 406 | def test_to_sql_with_connection(self): 407 | """Test df.to_sql() with database connection.""" 408 | code = """ 409 | import sqlite3 410 | conn = sqlite3.connect("test.db") 411 | df.to_sql("table_name", conn) 412 | """ 413 | tree = ast.parse(code) 414 | finder = DatabaseOperationFinder("test.py") 415 | finder.visit(tree) 416 | 417 | assert len(finder.database_operations) >= 1 418 | 419 | def test_to_sql_without_tracked_connection(self): 420 | """Test df.to_sql() without a tracked connection variable.""" 421 | code = """ 422 | df.to_sql("table_name", "sqlite:///test.db") 423 | """ 424 | tree = ast.parse(code) 425 | finder = DatabaseOperationFinder("test.py") 426 | finder.visit(tree) 427 | 428 | # Should still detect the operation 429 | assert len(finder.database_operations) >= 0 430 | 431 | 432 | class TestFileOperationFinder: 433 | """Test the FileOperationFinder AST visitor.""" 434 | 435 | def test_multiple_file_operations(self): 436 | """Test finding multiple file operations in one script.""" 437 | code = """ 438 | with open("input.txt", "r") as f: 439 | data = f.read() 440 | 441 | with open("output.txt", "w") as f: 442 | f.write(data) 443 | 444 | df = pd.read_csv("data.csv") 445 | df.to_excel("output.xlsx") 446 | """ 447 | tree = ast.parse(code) 448 | finder = FileOperationFinder("test.py") 449 | finder.visit(tree) 450 | 451 | # Should find all 4 file operations 452 | assert len(finder.file_operations) >= 4 453 | 454 | 455 | class TestModuleImportFinder: 456 | """Test the ModuleImportFinder AST visitor.""" 457 | 458 | def test_import_tracking(self): 459 | """Test that imports are tracked correctly.""" 460 | code = """ 461 | import pandas as pd 462 | from pathlib import Path 463 | import numpy 464 | """ 465 | tree = ast.parse(code) 466 | # ModuleImportFinder requires project_modules parameter 467 | finder = ModuleImportFinder("test.py", project_modules=set()) 468 | finder.visit(tree) 469 | 470 | # Should find all imports 471 | assert len(finder.imports) >= 3 472 | -------------------------------------------------------------------------------- /src/smartrappy/reporters.py: -------------------------------------------------------------------------------- 1 | """Reporters for smartrappy analysis results.""" 2 | 3 | import json 4 | import os 5 | from abc import ABC, abstractmethod 6 | from typing import Dict, Optional, Set 7 | 8 | from graphviz import Digraph 9 | from rich.console import Console 10 | from rich.text import Text 11 | from rich.tree import Tree 12 | 13 | from smartrappy.models import NodeType, ProjectModel 14 | 15 | 16 | class Reporter(ABC): 17 | """Base class for all reporters.""" 18 | 19 | @abstractmethod 20 | def generate_report( 21 | self, model: ProjectModel, output_path: Optional[str] = None 22 | ) -> None: 23 | """Generate a report from the project model.""" 24 | pass 25 | 26 | 27 | class ConsoleReporter(Reporter): 28 | """Report analysis results to the console.""" 29 | 30 | def generate_report( 31 | self, model: ProjectModel, output_path: Optional[str] = None 32 | ) -> None: 33 | """Generate a console report from the project model.""" 34 | console = Console() 35 | 36 | # Print header 37 | console.print( 38 | "\n[bold cyan]File Operations, Database Operations, and Import Analysis[/bold cyan]" 39 | ) 40 | console.print("=" * 80) 41 | 42 | # Print file operations 43 | for filename, file_ops in sorted(model.file_operations.items()): 44 | console.print(f"\n[bold]File:[/bold] {filename}") 45 | has_read = any(op.is_read for op in file_ops) 46 | has_write = any(op.is_write for op in file_ops) 47 | op_type = ( 48 | "READ/WRITE" 49 | if has_read and has_write 50 | else ("READ" if has_read else "WRITE") 51 | ) 52 | console.print(f"[bold]Operation:[/bold] {op_type}") 53 | console.print("[bold]Referenced in:[/bold]") 54 | sources = sorted(set(op.source_file for op in file_ops)) 55 | for source in sources: 56 | console.print(f" - {source}") 57 | 58 | if model.database_operations: 59 | console.print("\n[bold purple]💽 Database Operations:[/bold purple]") 60 | for db_name, db_ops in sorted(model.database_operations.items()): 61 | console.print(f"\n[bold]Database:[/bold] {db_name}") 62 | db_type = db_ops[0].db_type # Get type from first operation 63 | console.print(f"[bold]Type:[/bold] {db_type}") 64 | 65 | has_read = any(op.is_read for op in db_ops) 66 | has_write = any(op.is_write for op in db_ops) 67 | op_type = ( 68 | "READ/WRITE" 69 | if has_read and has_write 70 | else ("READ" if has_read else "WRITE") 71 | ) 72 | console.print(f"[bold]Operation:[/bold] {op_type}") 73 | 74 | console.print("[bold]Referenced in:[/bold]") 75 | sources = sorted(set(op.source_file for op in db_ops)) 76 | for source in sources: 77 | console.print(f" - {source}") 78 | 79 | # Print import analysis 80 | console.print("\n[bold]Module Imports:[/bold]") 81 | for script, script_imports in sorted(model.imports.items()): 82 | if script_imports: 83 | script_name = os.path.basename(script) 84 | console.print(f"\n[bold]Script:[/bold] {script_name}") 85 | for imp in script_imports: 86 | # Get module display name with .py extension for Python modules 87 | module_display = os.path.basename(imp.module_name.replace(".", "/")) 88 | # if not module_display.endswith(".py") and "." not in module_display: 89 | # module_display = f"{module_display}.py" 90 | 91 | import_type = "from" if imp.is_from_import else "import" 92 | module_type = ( 93 | "[blue]internal[/blue]" 94 | if imp.is_internal 95 | else "[red]external[/red]" 96 | ) 97 | 98 | # For 'from' imports, show as module:imported_names 99 | if imp.is_from_import: 100 | detailed_imports = [ 101 | f"{module_display}:{name}" for name in imp.imported_names 102 | ] 103 | detailed_str = ", ".join(detailed_imports) 104 | console.print( 105 | f" - {import_type} {imp.module_name} → {detailed_str} [{module_type}]" 106 | ) 107 | else: 108 | console.print( 109 | f" - {import_type} {module_display} [{module_type}]" 110 | ) 111 | 112 | # Create and display terminal visualisation 113 | console.print("\n[bold cyan]Terminal Visualisation[/bold cyan]") 114 | tree = self._create_terminal_tree(model) 115 | console.print(tree) 116 | 117 | def _create_terminal_tree(self, model: ProjectModel) -> Tree: 118 | """Create a rich Tree visualisation of the dependency graph.""" 119 | # Create the main tree 120 | tree = Tree("📦 Project Dependencies", guide_style="bold cyan") 121 | 122 | # Track all nodes and their dependencies 123 | dependencies: Dict[str, Set[str]] = {} # node_id -> set of dependency node_ids 124 | 125 | # Process edges to build dependency map 126 | for edge in model.edges: 127 | if edge.target not in dependencies: 128 | dependencies[edge.target] = set() 129 | dependencies[edge.target].add(edge.source) 130 | 131 | # Find root nodes (nodes with no incoming edges) 132 | all_nodes = set(model.nodes.keys()) 133 | dependency_targets = set() 134 | for deps in dependencies.values(): 135 | dependency_targets.update(deps) 136 | root_nodes = all_nodes - dependency_targets 137 | 138 | # Helper function to get node style 139 | def get_node_style(node_type: str, name: str) -> Text: 140 | icons = { 141 | NodeType.SCRIPT: "📜", 142 | NodeType.EXTERNAL_MODULE: "📦", 143 | NodeType.INTERNAL_MODULE: "🔧", 144 | NodeType.DATA_FILE: "📄", 145 | NodeType.DATABASE: "💽", 146 | NodeType.QUARTO_DOCUMENT: "📰", 147 | NodeType.JUPYTER_NOTEBOOK: "📓", 148 | } 149 | colors = { 150 | NodeType.SCRIPT: "green", 151 | NodeType.EXTERNAL_MODULE: "red", 152 | NodeType.INTERNAL_MODULE: "blue", 153 | NodeType.DATA_FILE: "magenta", 154 | NodeType.DATABASE: "purple", 155 | NodeType.QUARTO_DOCUMENT: "cyan", 156 | NodeType.JUPYTER_NOTEBOOK: "yellow", 157 | } 158 | return Text( 159 | f"{icons.get(node_type, '❓')} {name}", 160 | style=colors.get(node_type, "white"), 161 | ) 162 | 163 | # Helper function to recursively build tree 164 | def build_tree(node_id: str, seen: Set[str], parent_tree: Tree) -> None: 165 | if node_id in seen: 166 | return 167 | 168 | node = model.nodes[node_id] 169 | seen.add(node_id) 170 | 171 | # Add node to tree 172 | node_tree = parent_tree.add(get_node_style(node.type, node.name)) 173 | 174 | # For database nodes, add type information 175 | if node.type == NodeType.DATABASE and "db_type" in node.metadata: 176 | node_tree.add(Text(f"Type: {node.metadata['db_type']}", "purple")) 177 | 178 | # Add dependencies 179 | for dep_id in sorted(dependencies.get(node_id, set())): 180 | if dep_id not in seen: 181 | build_tree(dep_id, seen.copy(), node_tree) 182 | else: 183 | # Show circular dependency 184 | dep_node = model.nodes[dep_id] 185 | node_tree.add(Text(f"↻ {dep_node.name} (circular)", "yellow")) 186 | 187 | # Build tree from each root node 188 | for root_id in sorted(root_nodes): 189 | build_tree(root_id, set(), tree) 190 | 191 | return tree 192 | 193 | 194 | class GraphvizReporter(Reporter): 195 | """Generate a Graphviz visualisation of the project model. Exports as PDF""" 196 | 197 | def generate_report( 198 | self, model: ProjectModel, output_path: Optional[str] = None 199 | ) -> None: 200 | """Generate a Graphviz visualisation from the project model.""" 201 | if not output_path: 202 | output_path = "project_graph" 203 | 204 | # Create a new directed graph 205 | dot = Digraph(comment="Project Dependency Graph") 206 | dot.attr(rankdir="TB") # Top to bottom layout 207 | 208 | # Define node styles 209 | dot.attr("node", shape="box", style="filled") 210 | 211 | # Add nodes 212 | for node_id, node in model.nodes.items(): 213 | if node.type == NodeType.SCRIPT: 214 | dot.node( 215 | node_id, 216 | node.name, 217 | fillcolor="#90EE90", # Light green 218 | color="#333333", 219 | penwidth="2.0", 220 | ) 221 | elif node.type == NodeType.DATA_FILE: 222 | # Handle file status for data files 223 | if "status" in node.metadata: 224 | status = node.metadata["status"] 225 | if status.exists: 226 | mod_time = status.last_modified.strftime("%Y-%m-%d %H:%M:%S") 227 | label = f"{node.name}\nModified: {mod_time}" 228 | dot.node( 229 | node_id, 230 | label, 231 | fillcolor="#FFB6C1", # Light pink 232 | color="#333333", 233 | penwidth="2.0", 234 | ) 235 | else: 236 | label = f"{node.name}\nFile does not exist" 237 | dot.node( 238 | node_id, 239 | label, 240 | fillcolor="#FFB6C1", # Light pink 241 | color="#FF0000", # Red border 242 | penwidth="3.0", 243 | style="filled,dashed", 244 | ) 245 | else: 246 | dot.node( 247 | node_id, 248 | node.name, 249 | fillcolor="#FFB6C1", # Light pink 250 | color="#333333", 251 | penwidth="2.0", 252 | ) 253 | elif node.type == NodeType.DATABASE: 254 | # Special styling for database nodes 255 | db_type = node.metadata.get("db_type", "unknown") 256 | label = f"{node.name}\nType: {db_type}" # Using node.name, not node_id 257 | dot.node( 258 | node_id, 259 | label, 260 | fillcolor="#B19CD9", # Light purple for databases 261 | color="#333333", 262 | penwidth="2.0", 263 | shape="cylinder", # Database shape 264 | ) 265 | elif node.type == NodeType.INTERNAL_MODULE: 266 | # Handle imported item nodes with specific style 267 | if "imported_name" in node.metadata: 268 | dot.node( 269 | node_id, 270 | node.name, 271 | fillcolor="#ADD8E6", # Light blue for internal modules 272 | color="#333333", 273 | penwidth="2.0", 274 | shape="oval", # Use oval shape for imported items 275 | ) 276 | else: 277 | dot.node( 278 | node_id, 279 | node.name, 280 | fillcolor="#ADD8E6", # Light blue for internal modules 281 | color="#333333", 282 | penwidth="2.0", 283 | ) 284 | elif node.type == NodeType.EXTERNAL_MODULE: 285 | # Handle imported item nodes with specific style 286 | if "imported_name" in node.metadata: 287 | dot.node( 288 | node_id, 289 | node.name, 290 | fillcolor="#FFA07A", # Light salmon for external modules 291 | color="#333333", 292 | penwidth="2.0", 293 | shape="oval", # Use oval shape for imported items 294 | ) 295 | else: 296 | dot.node( 297 | node_id, 298 | node.name, 299 | fillcolor="#FFA07A", # Light salmon for external modules 300 | color="#333333", 301 | penwidth="2.0", 302 | ) 303 | elif node.type == NodeType.QUARTO_DOCUMENT: 304 | # Special styling for Quarto documents 305 | dot.node( 306 | node_id, 307 | node.name, 308 | fillcolor="#00CED1", # Dark turquoise for Quarto docs 309 | color="#333333", 310 | penwidth="2.0", 311 | ) 312 | elif node.type == NodeType.JUPYTER_NOTEBOOK: 313 | # Special styling for Jupyter notebooks 314 | dot.node( 315 | node_id, 316 | node.name, 317 | fillcolor="#FFD700", # Gold for Jupyter notebooks 318 | color="#333333", 319 | penwidth="2.0", 320 | ) 321 | 322 | # Add edges 323 | dot.attr("edge", color="#333333") 324 | for edge in model.edges: 325 | dot.edge(edge.source, edge.target) 326 | 327 | # Render the graph 328 | output_dir = os.path.dirname(output_path) or "." 329 | os.makedirs(output_dir, exist_ok=True) 330 | 331 | dot.render(output_path, view=False, format="pdf", cleanup=True) 332 | print(f"Graphviz visualisation saved as {output_path}.pdf") 333 | 334 | 335 | class MermaidReporter(Reporter): 336 | """Generate a Mermaid visualisation of the project model.""" 337 | 338 | def generate_report( 339 | self, model: ProjectModel, output_path: Optional[str] = None 340 | ) -> None: 341 | """Generate a Mermaid diagram from the project model.""" 342 | if not output_path: 343 | output_path = "project_diagram.md" 344 | 345 | # Generate Mermaid markup 346 | mermaid = [ 347 | "graph TD", 348 | " %% Style definitions", 349 | " classDef scriptNode fill:#90EE90,stroke:#333,stroke-width:2px;", 350 | " classDef fileNode fill:#FFB6C1,stroke:#333,stroke-width:2px;", 351 | " classDef quartoNode fill:#00CED1,stroke:#333,stroke-width:2px;", 352 | " classDef notebookNode fill:#FFD700,stroke:#333,stroke-width:2px;", 353 | " classDef missingFile fill:#FFB6C1,stroke:#FF0000,stroke-width:3px,stroke-dasharray: 5 5;", 354 | " classDef internalModule fill:#ADD8E6,stroke:#333,stroke-width:2px;", 355 | " classDef externalModule fill:#FFA07A,stroke:#333,stroke-width:2px;", 356 | " classDef importedItem fill:#ADD8E6,stroke:#333,stroke-width:2px,shape:circle;", 357 | " classDef externalImportedItem fill:#FFA07A,stroke:#333,stroke-width:2px,shape:circle;", 358 | " classDef databaseNode fill:#B19CD9,stroke:#333,stroke-width:2px,shape:cylinder;", 359 | "", 360 | " %% Nodes", 361 | ] 362 | 363 | # Add nodes 364 | for node_id, node in model.nodes.items(): 365 | if node.type == NodeType.SCRIPT: 366 | mermaid.append(f' {node_id}["{node.name}"]:::scriptNode') 367 | elif node.type == NodeType.DATA_FILE: 368 | # Handle file status for data files 369 | if "status" in node.metadata: 370 | status = node.metadata["status"] 371 | if status.exists: 372 | mod_time = status.last_modified.strftime("%Y-%m-%d %H:%M:%S") 373 | label = f"{node.name}
Modified: {mod_time}" 374 | mermaid.append(f' {node_id}["{label}"]:::fileNode') 375 | else: 376 | label = f"{node.name}
File does not exist" 377 | mermaid.append(f' {node_id}["{label}"]:::missingFile') 378 | else: 379 | mermaid.append(f' {node_id}["{node.name}"]:::fileNode') 380 | elif node.type == NodeType.DATABASE: 381 | # Database nodes with specific styling 382 | db_type = node.metadata.get("db_type", "unknown") 383 | label = f"{node.name}
Type: {db_type}" 384 | mermaid.append(f' {node_id}["{label}"]:::databaseNode') 385 | elif node.type == NodeType.INTERNAL_MODULE: 386 | # Handle imported item nodes with specific style 387 | if "imported_name" in node.metadata: 388 | mermaid.append(f' {node_id}(("{node.name}")):::importedItem') 389 | else: 390 | mermaid.append(f' {node_id}["{node.name}"]:::internalModule') 391 | elif node.type == NodeType.EXTERNAL_MODULE: 392 | # Handle imported item nodes with specific style 393 | if "imported_name" in node.metadata: 394 | mermaid.append( 395 | f' {node_id}(("{node.name}")):::externalImportedItem' 396 | ) 397 | else: 398 | mermaid.append(f' {node_id}["{node.name}"]:::externalModule') 399 | elif node.type == NodeType.QUARTO_DOCUMENT: 400 | mermaid.append(f' {node_id}["{node.name}"]:::quartoNode') 401 | elif node.type == NodeType.JUPYTER_NOTEBOOK: 402 | mermaid.append(f' {node_id}["{node.name}"]:::notebookNode') 403 | 404 | mermaid.append("") 405 | mermaid.append(" %% Relationships") 406 | 407 | # Add edges 408 | for edge in model.edges: 409 | mermaid.append(f" {edge.source} --> {edge.target}") 410 | 411 | # Create markdown file with mermaid diagram 412 | output_dir = os.path.dirname(output_path) or "." 413 | os.makedirs(output_dir, exist_ok=True) 414 | 415 | with open(output_path, "w") as f: 416 | f.write("# Project Dependency Diagram\n\n") 417 | f.write("```mermaid\n") 418 | f.write("\n".join(mermaid)) 419 | f.write("\n```\n") 420 | 421 | print(f"Mermaid diagram saved as {output_path}") 422 | 423 | 424 | class JsonReporter(Reporter): 425 | """Generate a JSON representation of the project model.""" 426 | 427 | def generate_report( 428 | self, model: ProjectModel, output_path: Optional[str] = None 429 | ) -> None: 430 | """Generate a JSON file from the project model or print to console if no path is given.""" 431 | # Create a serializable representation of the model 432 | serializable = {"nodes": [], "edges": [], "file_operations": [], "imports": []} 433 | 434 | # Add nodes 435 | for node_id, node in model.nodes.items(): 436 | # Skip external modules if internal_only is True 437 | if model.internal_only and node.type == NodeType.EXTERNAL_MODULE: 438 | continue 439 | 440 | node_data = { 441 | "id": node_id, 442 | "name": node.name, 443 | "type": node.type, 444 | "metadata": {}, 445 | } 446 | 447 | # Handle file status for data files 448 | if node.type == NodeType.DATA_FILE and "status" in node.metadata: 449 | status = node.metadata["status"] 450 | node_data["metadata"]["exists"] = status.exists 451 | if status.last_modified: 452 | node_data["metadata"]["last_modified"] = ( 453 | status.last_modified.isoformat() 454 | ) 455 | 456 | serializable["nodes"].append(node_data) 457 | 458 | # Add edges 459 | for edge in model.edges: 460 | serializable["edges"].append( 461 | {"source": edge.source, "target": edge.target, "type": edge.type} 462 | ) 463 | 464 | # Add file operations 465 | for filename, operations in model.file_operations.items(): 466 | for op in operations: 467 | serializable["file_operations"].append( 468 | { 469 | "filename": op.filename, 470 | "is_read": op.is_read, 471 | "is_write": op.is_write, 472 | "source_file": op.source_file, 473 | } 474 | ) 475 | 476 | # Add imports 477 | for source_file, imports in model.imports.items(): 478 | for imp in imports: 479 | # Skip external modules if internal_only is True 480 | if model.internal_only and not imp.is_internal: 481 | continue 482 | 483 | serializable["imports"].append( 484 | { 485 | "module_name": imp.module_name, 486 | "source_file": imp.source_file, 487 | "is_from_import": imp.is_from_import, 488 | "imported_names": imp.imported_names, 489 | "is_internal": imp.is_internal, 490 | } 491 | ) 492 | 493 | # If no output path specified, print to console with rich 494 | if output_path is None: 495 | console = Console() 496 | console.print("\n[bold cyan]JSON Representation[/bold cyan]") 497 | console.print("=" * 80) 498 | console.print_json(data=serializable, indent=2) 499 | else: 500 | # Write to file 501 | output_dir = os.path.dirname(output_path) or "." 502 | os.makedirs(output_dir, exist_ok=True) 503 | 504 | with open(output_path, "w") as f: 505 | json.dump(serializable, f, indent=2) 506 | 507 | print(f"JSON report saved as {output_path}") 508 | 509 | 510 | def get_reporter(format_type: str) -> Reporter: 511 | """ 512 | Factory function to get the appropriate reporter. 513 | 514 | Args: 515 | format_type: The type of reporter to use ('console', 'graphviz', 'mermaid', or 'json') 516 | 517 | Returns: 518 | A Reporter instance 519 | 520 | Raises: 521 | ValueError: If the format type is not supported 522 | """ 523 | reporters = { 524 | "console": ConsoleReporter(), 525 | "graphviz": GraphvizReporter(), 526 | "mermaid": MermaidReporter(), 527 | "json": JsonReporter(), 528 | } 529 | 530 | if format_type.lower() not in reporters: 531 | raise ValueError( 532 | f"Unsupported format: {format_type}. " 533 | f"Supported formats: {', '.join(reporters.keys())}" 534 | ) 535 | 536 | return reporters[format_type.lower()] 537 | -------------------------------------------------------------------------------- /docs/output.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 15 | 17 | 26 | 35 | 36 | 40 | 45 | slides.qmd 54 | 59 | output.pngFile 73 | does 82 | not 91 | exist 100 | 105 | 110 | 115 | equation.texFile 129 | does 138 | not 147 | exist 156 | 161 | 166 | 171 | alternative_equation.texFile 185 | does 194 | not 203 | exist 212 | 217 | 222 | 227 | nb_example.ipynb 236 | 241 | 246 | 251 | data/input.csvModified: 265 | 2025-04-20 274 | 17:31:30 283 | 288 | 293 | 298 | data_processing.py 307 | 312 | 317 | 322 | model_solver.py 331 | 336 | 341 | 346 | visualisation.py 355 | 360 | 365 | 370 | data/processed.csvFile 384 | does 393 | not 402 | exist 411 | 416 | 421 | 426 | 431 | 436 | 441 | mydatabaseType: 455 | mssql 464 | 469 | 474 | 479 | 484 | 489 | data_processing:process_data 498 | 503 | 508 | 509 | 510 | --------------------------------------------------------------------------------