├── .github └── workflows │ └── pythonpackage.yml ├── .gitignore ├── .isort.cfg ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── kedro_diff ├── __init__.py ├── __main__.py ├── cli.py ├── commit_parser.py ├── diff.py ├── errors.py ├── get_pipelines.py ├── logger.py ├── node_diff │ ├── __init__.py │ ├── __main__.py │ └── node_diff.py └── sample_data.py ├── requirements.txt ├── requirements_dev.txt ├── setup.cfg ├── setup.py └── tests ├── __init__.py ├── sample-data ├── empty.json ├── more_nodes.json ├── one-node-one-tag.json ├── one-node.json ├── two-nodes-two-tags.json └── two-nodes.json ├── test_commit_parser.py ├── test_diff.py ├── test_load_commit_metadata.py ├── test_main.py ├── test_node_diff.py └── test_node_diff_main.py /.github/workflows/pythonpackage.yml: -------------------------------------------------------------------------------- 1 | name: Kedro Diff Package 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*' 7 | branches: 8 | - 'main' 9 | 10 | jobs: 11 | lint_test: 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | os: [windows-latest, ubuntu-latest, macos-latest] 16 | python-version: [3.6, 3.7, 3.8,] 17 | defaults: 18 | run: 19 | shell: bash 20 | steps: 21 | - uses: actions/checkout@v1 22 | # turning off matrix build for early dev 23 | # - name: Set up Python ${{ matrix.python-version }} 24 | - name: Set up Python 3.8 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | architecture: x64 29 | - name: Install dependencies 30 | run: python -m pip install -e ".[dev]" 31 | - name: Lint with flake8 32 | run: flake8 . 33 | - name: Type check with MyPy 34 | run: mypy . 35 | - name: Test with pytest 36 | run: pytest 37 | build: 38 | runs-on: ubuntu-latest 39 | needs: lint_test 40 | steps: 41 | - uses: actions/checkout@v1 42 | - name: Set up Python 3.8 43 | uses: actions/setup-python@v2 44 | with: 45 | python-version: 3.8 46 | architecture: x64 47 | - name: Install dependencies 48 | run: python -m pip install -e . 49 | - name: build 50 | run: | 51 | pip install wheel 52 | python setup.py sdist bdist_wheel 53 | - name: pypi-publish 54 | if: github.ref == 'refs/heads/main' 55 | uses: pypa/gh-action-pypi-publish@v1.1.0 56 | with: 57 | # PyPI user 58 | # Password for your PyPI user or an access toke 59 | password: ${{ secrets.pypi_password }} 60 | # The repository URL to use 61 | # repository_url: # optional 62 | # The target directory for distribution 63 | # packages_dir: # optional, default is dist 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.toptal.com/developers/gitignore/api/vim,node,data,emacs,python,pycharm,executable,sublimetext,visualstudio,visualstudiocode 3 | # Edit at https://www.toptal.com/developers/gitignore?templates=vim,node,data,emacs,python,pycharm,executable,sublimetext,visualstudio,visualstudiocode 4 | 5 | ### Data ### 6 | *.csv 7 | *.dat 8 | *.efx 9 | *.gbr 10 | *.key 11 | *.pps 12 | *.ppt 13 | *.pptx 14 | *.sdf 15 | *.tax2010 16 | *.vcf 17 | *.xml 18 | 19 | ### Emacs ### 20 | # -*- mode: gitignore; -*- 21 | *~ 22 | \#*\# 23 | /.emacs.desktop 24 | /.emacs.desktop.lock 25 | *.elc 26 | auto-save-list 27 | tramp 28 | .\#* 29 | 30 | # Org-mode 31 | .org-id-locations 32 | *_archive 33 | ltximg/** 34 | 35 | # flymake-mode 36 | *_flymake.* 37 | 38 | # eshell files 39 | /eshell/history 40 | /eshell/lastdir 41 | 42 | # elpa packages 43 | /elpa/ 44 | 45 | # reftex files 46 | *.rel 47 | 48 | # AUCTeX auto folder 49 | /auto/ 50 | 51 | # cask packages 52 | .cask/ 53 | dist/ 54 | 55 | # Flycheck 56 | flycheck_*.el 57 | 58 | # server auth directory 59 | /server/ 60 | 61 | # projectiles files 62 | .projectile 63 | 64 | # directory configuration 65 | .dir-locals.el 66 | 67 | # network security 68 | /network-security.data 69 | 70 | 71 | ### Executable ### 72 | *.app 73 | *.bat 74 | *.cgi 75 | *.com 76 | *.exe 77 | *.gadget 78 | *.jar 79 | *.pif 80 | *.vb 81 | *.wsf 82 | 83 | ### Node ### 84 | # Logs 85 | logs 86 | *.log 87 | npm-debug.log* 88 | yarn-debug.log* 89 | yarn-error.log* 90 | lerna-debug.log* 91 | 92 | # Diagnostic reports (https://nodejs.org/api/report.html) 93 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 94 | 95 | # Runtime data 96 | pids 97 | *.pid 98 | *.seed 99 | *.pid.lock 100 | 101 | # Directory for instrumented libs generated by jscoverage/JSCover 102 | lib-cov 103 | 104 | # Coverage directory used by tools like istanbul 105 | coverage 106 | *.lcov 107 | 108 | # nyc test coverage 109 | .nyc_output 110 | 111 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 112 | .grunt 113 | 114 | # Bower dependency directory (https://bower.io/) 115 | bower_components 116 | 117 | # node-waf configuration 118 | .lock-wscript 119 | 120 | # Compiled binary addons (https://nodejs.org/api/addons.html) 121 | build/Release 122 | 123 | # Dependency directories 124 | node_modules/ 125 | jspm_packages/ 126 | 127 | # TypeScript v1 declaration files 128 | typings/ 129 | 130 | # TypeScript cache 131 | *.tsbuildinfo 132 | 133 | # Optional npm cache directory 134 | .npm 135 | 136 | # Optional eslint cache 137 | .eslintcache 138 | 139 | # Optional stylelint cache 140 | .stylelintcache 141 | 142 | # Microbundle cache 143 | .rpt2_cache/ 144 | .rts2_cache_cjs/ 145 | .rts2_cache_es/ 146 | .rts2_cache_umd/ 147 | 148 | # Optional REPL history 149 | .node_repl_history 150 | 151 | # Output of 'npm pack' 152 | *.tgz 153 | 154 | # Yarn Integrity file 155 | .yarn-integrity 156 | 157 | # dotenv environment variables file 158 | .env 159 | .env.test 160 | .env*.local 161 | 162 | # parcel-bundler cache (https://parceljs.org/) 163 | .cache 164 | .parcel-cache 165 | 166 | # Next.js build output 167 | .next 168 | 169 | # Nuxt.js build / generate output 170 | .nuxt 171 | dist 172 | 173 | # Storybook build outputs 174 | .out 175 | .storybook-out 176 | storybook-static 177 | 178 | # rollup.js default build output 179 | 180 | # Gatsby files 181 | .cache/ 182 | # Comment in the public line in if your project uses Gatsby and not Next.js 183 | # https://nextjs.org/blog/next-9-1#public-directory-support 184 | # public 185 | 186 | # vuepress build output 187 | .vuepress/dist 188 | 189 | # Serverless directories 190 | .serverless/ 191 | 192 | # FuseBox cache 193 | .fusebox/ 194 | 195 | # DynamoDB Local files 196 | .dynamodb/ 197 | 198 | # TernJS port file 199 | .tern-port 200 | 201 | # Stores VSCode versions used for testing VSCode extensions 202 | .vscode-test 203 | 204 | # Temporary folders 205 | tmp/ 206 | temp/ 207 | 208 | ### PyCharm ### 209 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 210 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 211 | 212 | # User-specific stuff 213 | .idea/**/workspace.xml 214 | .idea/**/tasks.xml 215 | .idea/**/usage.statistics.xml 216 | .idea/**/dictionaries 217 | .idea/**/shelf 218 | 219 | # Generated files 220 | .idea/**/contentModel.xml 221 | 222 | # Sensitive or high-churn files 223 | .idea/**/dataSources/ 224 | .idea/**/dataSources.ids 225 | .idea/**/dataSources.local.xml 226 | .idea/**/sqlDataSources.xml 227 | .idea/**/dynamic.xml 228 | .idea/**/uiDesigner.xml 229 | .idea/**/dbnavigator.xml 230 | 231 | # Gradle 232 | .idea/**/gradle.xml 233 | .idea/**/libraries 234 | 235 | # Gradle and Maven with auto-import 236 | # When using Gradle or Maven with auto-import, you should exclude module files, 237 | # since they will be recreated, and may cause churn. Uncomment if using 238 | # auto-import. 239 | # .idea/artifacts 240 | # .idea/compiler.xml 241 | # .idea/jarRepositories.xml 242 | # .idea/modules.xml 243 | # .idea/*.iml 244 | # .idea/modules 245 | # *.iml 246 | # *.ipr 247 | 248 | # CMake 249 | cmake-build-*/ 250 | 251 | # Mongo Explorer plugin 252 | .idea/**/mongoSettings.xml 253 | 254 | # File-based project format 255 | *.iws 256 | 257 | # IntelliJ 258 | out/ 259 | 260 | # mpeltonen/sbt-idea plugin 261 | .idea_modules/ 262 | 263 | # JIRA plugin 264 | atlassian-ide-plugin.xml 265 | 266 | # Cursive Clojure plugin 267 | .idea/replstate.xml 268 | 269 | # Crashlytics plugin (for Android Studio and IntelliJ) 270 | com_crashlytics_export_strings.xml 271 | crashlytics.properties 272 | crashlytics-build.properties 273 | fabric.properties 274 | 275 | # Editor-based Rest Client 276 | .idea/httpRequests 277 | 278 | # Android studio 3.1+ serialized cache file 279 | .idea/caches/build_file_checksums.ser 280 | 281 | ### PyCharm Patch ### 282 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 283 | 284 | # *.iml 285 | # modules.xml 286 | # .idea/misc.xml 287 | # *.ipr 288 | 289 | # Sonarlint plugin 290 | # https://plugins.jetbrains.com/plugin/7973-sonarlint 291 | .idea/**/sonarlint/ 292 | 293 | # SonarQube Plugin 294 | # https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin 295 | .idea/**/sonarIssues.xml 296 | 297 | # Markdown Navigator plugin 298 | # https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced 299 | .idea/**/markdown-navigator.xml 300 | .idea/**/markdown-navigator-enh.xml 301 | .idea/**/markdown-navigator/ 302 | 303 | # Cache file creation bug 304 | # See https://youtrack.jetbrains.com/issue/JBR-2257 305 | .idea/$CACHE_FILE$ 306 | 307 | # CodeStream plugin 308 | # https://plugins.jetbrains.com/plugin/12206-codestream 309 | .idea/codestream.xml 310 | 311 | ### Python ### 312 | # Byte-compiled / optimized / DLL files 313 | __pycache__/ 314 | *.py[cod] 315 | *$py.class 316 | 317 | # C extensions 318 | *.so 319 | 320 | # Distribution / packaging 321 | .Python 322 | build/ 323 | develop-eggs/ 324 | downloads/ 325 | eggs/ 326 | .eggs/ 327 | parts/ 328 | sdist/ 329 | var/ 330 | wheels/ 331 | pip-wheel-metadata/ 332 | share/python-wheels/ 333 | *.egg-info/ 334 | .installed.cfg 335 | *.egg 336 | MANIFEST 337 | 338 | # PyInstaller 339 | # Usually these files are written by a python script from a template 340 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 341 | *.manifest 342 | *.spec 343 | 344 | # Installer logs 345 | pip-log.txt 346 | pip-delete-this-directory.txt 347 | 348 | # Unit test / coverage reports 349 | htmlcov/ 350 | .tox/ 351 | .nox/ 352 | .coverage 353 | .coverage.* 354 | nosetests.xml 355 | coverage.xml 356 | *.cover 357 | *.py,cover 358 | .hypothesis/ 359 | .pytest_cache/ 360 | pytestdebug.log 361 | 362 | # Translations 363 | *.mo 364 | *.pot 365 | 366 | # Django stuff: 367 | local_settings.py 368 | db.sqlite3 369 | db.sqlite3-journal 370 | 371 | # Flask stuff: 372 | instance/ 373 | .webassets-cache 374 | 375 | # Scrapy stuff: 376 | .scrapy 377 | 378 | # Sphinx documentation 379 | docs/_build/ 380 | doc/_build/ 381 | 382 | # PyBuilder 383 | target/ 384 | 385 | # Jupyter Notebook 386 | .ipynb_checkpoints 387 | 388 | # IPython 389 | profile_default/ 390 | ipython_config.py 391 | 392 | # pyenv 393 | .python-version 394 | 395 | # pipenv 396 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 397 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 398 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 399 | # install all needed dependencies. 400 | #Pipfile.lock 401 | 402 | # poetry 403 | #poetry.lock 404 | 405 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 406 | __pypackages__/ 407 | 408 | # Celery stuff 409 | celerybeat-schedule 410 | celerybeat.pid 411 | 412 | # SageMath parsed files 413 | *.sage.py 414 | 415 | # Environments 416 | # .env 417 | .env/ 418 | .venv/ 419 | env/ 420 | venv/ 421 | ENV/ 422 | env.bak/ 423 | venv.bak/ 424 | pythonenv* 425 | 426 | # Spyder project settings 427 | .spyderproject 428 | .spyproject 429 | 430 | # Rope project settings 431 | .ropeproject 432 | 433 | # mkdocs documentation 434 | /site 435 | 436 | # mypy 437 | .mypy_cache/ 438 | .dmypy.json 439 | dmypy.json 440 | 441 | # Pyre type checker 442 | .pyre/ 443 | 444 | # pytype static type analyzer 445 | .pytype/ 446 | 447 | # operating system-related files 448 | *.DS_Store #file properties cache/storage on macOS 449 | Thumbs.db #thumbnail cache on Windows 450 | 451 | # profiling data 452 | .prof 453 | 454 | 455 | ### SublimeText ### 456 | # Cache files for Sublime Text 457 | *.tmlanguage.cache 458 | *.tmPreferences.cache 459 | *.stTheme.cache 460 | 461 | # Workspace files are user-specific 462 | *.sublime-workspace 463 | 464 | # Project files should be checked into the repository, unless a significant 465 | # proportion of contributors will probably not be using Sublime Text 466 | # *.sublime-project 467 | 468 | # SFTP configuration file 469 | sftp-config.json 470 | 471 | # Package control specific files 472 | Package Control.last-run 473 | Package Control.ca-list 474 | Package Control.ca-bundle 475 | Package Control.system-ca-bundle 476 | Package Control.cache/ 477 | Package Control.ca-certs/ 478 | Package Control.merged-ca-bundle 479 | Package Control.user-ca-bundle 480 | oscrypto-ca-bundle.crt 481 | bh_unicode_properties.cache 482 | 483 | # Sublime-github package stores a github token in this file 484 | # https://packagecontrol.io/packages/sublime-github 485 | GitHub.sublime-settings 486 | 487 | ### Vim ### 488 | # Swap 489 | [._]*.s[a-v][a-z] 490 | !*.svg # comment out if you don't need vector files 491 | [._]*.sw[a-p] 492 | [._]s[a-rt-v][a-z] 493 | [._]ss[a-gi-z] 494 | [._]sw[a-p] 495 | 496 | # Session 497 | Session.vim 498 | Sessionx.vim 499 | 500 | # Temporary 501 | .netrwhist 502 | # Auto-generated tag files 503 | tags 504 | # Persistent undo 505 | [._]*.un~ 506 | 507 | ### VisualStudioCode ### 508 | .vscode/* 509 | !.vscode/tasks.json 510 | !.vscode/launch.json 511 | *.code-workspace 512 | 513 | ### VisualStudioCode Patch ### 514 | # Ignore all local history of files 515 | .history 516 | .ionide 517 | 518 | ### VisualStudio ### 519 | ## Ignore Visual Studio temporary files, build results, and 520 | ## files generated by popular Visual Studio add-ons. 521 | ## 522 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 523 | 524 | # User-specific files 525 | *.rsuser 526 | *.suo 527 | *.user 528 | *.userosscache 529 | *.sln.docstates 530 | 531 | # User-specific files (MonoDevelop/Xamarin Studio) 532 | *.userprefs 533 | 534 | # Mono auto generated files 535 | mono_crash.* 536 | 537 | # Build results 538 | [Dd]ebug/ 539 | [Dd]ebugPublic/ 540 | [Rr]elease/ 541 | [Rr]eleases/ 542 | x64/ 543 | x86/ 544 | [Ww][Ii][Nn]32/ 545 | [Aa][Rr][Mm]/ 546 | [Aa][Rr][Mm]64/ 547 | bld/ 548 | [Bb]in/ 549 | [Oo]bj/ 550 | [Ll]og/ 551 | [Ll]ogs/ 552 | 553 | # Visual Studio 2015/2017 cache/options directory 554 | .vs/ 555 | # Uncomment if you have tasks that create the project's static files in wwwroot 556 | #wwwroot/ 557 | 558 | # Visual Studio 2017 auto generated files 559 | Generated\ Files/ 560 | 561 | # MSTest test Results 562 | [Tt]est[Rr]esult*/ 563 | [Bb]uild[Ll]og.* 564 | 565 | # NUnit 566 | *.VisualState.xml 567 | TestResult.xml 568 | nunit-*.xml 569 | 570 | # Build Results of an ATL Project 571 | [Dd]ebugPS/ 572 | [Rr]eleasePS/ 573 | dlldata.c 574 | 575 | # Benchmark Results 576 | BenchmarkDotNet.Artifacts/ 577 | 578 | # .NET Core 579 | project.lock.json 580 | project.fragment.lock.json 581 | artifacts/ 582 | 583 | # ASP.NET Scaffolding 584 | ScaffoldingReadMe.txt 585 | 586 | # StyleCop 587 | StyleCopReport.xml 588 | 589 | # Files built by Visual Studio 590 | *_i.c 591 | *_p.c 592 | *_h.h 593 | *.ilk 594 | *.meta 595 | *.obj 596 | *.iobj 597 | *.pch 598 | *.pdb 599 | *.ipdb 600 | *.pgc 601 | *.pgd 602 | *.rsp 603 | *.sbr 604 | *.tlb 605 | *.tli 606 | *.tlh 607 | *.tmp 608 | *.tmp_proj 609 | *_wpftmp.csproj 610 | *.vspscc 611 | *.vssscc 612 | .builds 613 | *.pidb 614 | *.svclog 615 | *.scc 616 | 617 | # Chutzpah Test files 618 | _Chutzpah* 619 | 620 | # Visual C++ cache files 621 | ipch/ 622 | *.aps 623 | *.ncb 624 | *.opendb 625 | *.opensdf 626 | *.cachefile 627 | *.VC.db 628 | *.VC.VC.opendb 629 | 630 | # Visual Studio profiler 631 | *.psess 632 | *.vsp 633 | *.vspx 634 | *.sap 635 | 636 | # Visual Studio Trace Files 637 | *.e2e 638 | 639 | # TFS 2012 Local Workspace 640 | $tf/ 641 | 642 | # Guidance Automation Toolkit 643 | *.gpState 644 | 645 | # ReSharper is a .NET coding add-in 646 | _ReSharper*/ 647 | *.[Rr]e[Ss]harper 648 | *.DotSettings.user 649 | 650 | # TeamCity is a build add-in 651 | _TeamCity* 652 | 653 | # DotCover is a Code Coverage Tool 654 | *.dotCover 655 | 656 | # AxoCover is a Code Coverage Tool 657 | .axoCover/* 658 | !.axoCover/settings.json 659 | 660 | # Coverlet is a free, cross platform Code Coverage Tool 661 | coverage*[.json, .xml, .info] 662 | 663 | # Visual Studio code coverage results 664 | *.coverage 665 | *.coveragexml 666 | 667 | # NCrunch 668 | _NCrunch_* 669 | .*crunch*.local.xml 670 | nCrunchTemp_* 671 | 672 | # MightyMoose 673 | *.mm.* 674 | AutoTest.Net/ 675 | 676 | # Web workbench (sass) 677 | .sass-cache/ 678 | 679 | # Installshield output folder 680 | [Ee]xpress/ 681 | 682 | # DocProject is a documentation generator add-in 683 | DocProject/buildhelp/ 684 | DocProject/Help/*.HxT 685 | DocProject/Help/*.HxC 686 | DocProject/Help/*.hhc 687 | DocProject/Help/*.hhk 688 | DocProject/Help/*.hhp 689 | DocProject/Help/Html2 690 | DocProject/Help/html 691 | 692 | # Click-Once directory 693 | publish/ 694 | 695 | # Publish Web Output 696 | *.[Pp]ublish.xml 697 | *.azurePubxml 698 | # Note: Comment the next line if you want to checkin your web deploy settings, 699 | # but database connection strings (with potential passwords) will be unencrypted 700 | *.pubxml 701 | *.publishproj 702 | 703 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 704 | # checkin your Azure Web App publish settings, but sensitive information contained 705 | # in these scripts will be unencrypted 706 | PublishScripts/ 707 | 708 | # NuGet Packages 709 | *.nupkg 710 | # NuGet Symbol Packages 711 | *.snupkg 712 | # The packages folder can be ignored because of Package Restore 713 | **/[Pp]ackages/* 714 | # except build/, which is used as an MSBuild target. 715 | !**/[Pp]ackages/build/ 716 | # Uncomment if necessary however generally it will be regenerated when needed 717 | #!**/[Pp]ackages/repositories.config 718 | # NuGet v3's project.json files produces more ignorable files 719 | *.nuget.props 720 | *.nuget.targets 721 | 722 | # Microsoft Azure Build Output 723 | csx/ 724 | *.build.csdef 725 | 726 | # Microsoft Azure Emulator 727 | ecf/ 728 | rcf/ 729 | 730 | # Windows Store app package directories and files 731 | AppPackages/ 732 | BundleArtifacts/ 733 | Package.StoreAssociation.xml 734 | _pkginfo.txt 735 | *.appx 736 | *.appxbundle 737 | *.appxupload 738 | 739 | # Visual Studio cache files 740 | # files ending in .cache can be ignored 741 | *.[Cc]ache 742 | # but keep track of directories ending in .cache 743 | !?*.[Cc]ache/ 744 | 745 | # Others 746 | ClientBin/ 747 | ~$* 748 | *.dbmdl 749 | *.dbproj.schemaview 750 | *.jfm 751 | *.pfx 752 | *.publishsettings 753 | orleans.codegen.cs 754 | 755 | # Including strong name files can present a security risk 756 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 757 | #*.snk 758 | 759 | # Since there are multiple workflows, uncomment next line to ignore bower_components 760 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 761 | #bower_components/ 762 | 763 | # RIA/Silverlight projects 764 | Generated_Code/ 765 | 766 | # Backup & report files from converting an old project file 767 | # to a newer Visual Studio version. Backup files are not needed, 768 | # because we have git ;-) 769 | _UpgradeReport_Files/ 770 | Backup*/ 771 | UpgradeLog*.XML 772 | UpgradeLog*.htm 773 | ServiceFabricBackup/ 774 | *.rptproj.bak 775 | 776 | # SQL Server files 777 | *.mdf 778 | *.ldf 779 | *.ndf 780 | 781 | # Business Intelligence projects 782 | *.rdl.data 783 | *.bim.layout 784 | *.bim_*.settings 785 | *.rptproj.rsuser 786 | *- [Bb]ackup.rdl 787 | *- [Bb]ackup ([0-9]).rdl 788 | *- [Bb]ackup ([0-9][0-9]).rdl 789 | 790 | # Microsoft Fakes 791 | FakesAssemblies/ 792 | 793 | # GhostDoc plugin setting file 794 | *.GhostDoc.xml 795 | 796 | # Node.js Tools for Visual Studio 797 | .ntvs_analysis.dat 798 | 799 | # Visual Studio 6 build log 800 | *.plg 801 | 802 | # Visual Studio 6 workspace options file 803 | *.opt 804 | 805 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 806 | *.vbw 807 | 808 | # Visual Studio LightSwitch build output 809 | **/*.HTMLClient/GeneratedArtifacts 810 | **/*.DesktopClient/GeneratedArtifacts 811 | **/*.DesktopClient/ModelManifest.xml 812 | **/*.Server/GeneratedArtifacts 813 | **/*.Server/ModelManifest.xml 814 | _Pvt_Extensions 815 | 816 | # Paket dependency manager 817 | .paket/paket.exe 818 | paket-files/ 819 | 820 | # FAKE - F# Make 821 | .fake/ 822 | 823 | # CodeRush personal settings 824 | .cr/personal 825 | 826 | # Python Tools for Visual Studio (PTVS) 827 | *.pyc 828 | 829 | # Cake - Uncomment if you are using it 830 | # tools/** 831 | # !tools/packages.config 832 | 833 | # Tabs Studio 834 | *.tss 835 | 836 | # Telerik's JustMock configuration file 837 | *.jmconfig 838 | 839 | # BizTalk build output 840 | *.btp.cs 841 | *.btm.cs 842 | *.odx.cs 843 | *.xsd.cs 844 | 845 | # OpenCover UI analysis results 846 | OpenCover/ 847 | 848 | # Azure Stream Analytics local run output 849 | ASALocalRun/ 850 | 851 | # MSBuild Binary and Structured Log 852 | *.binlog 853 | 854 | # NVidia Nsight GPU debugger configuration file 855 | *.nvuser 856 | 857 | # MFractors (Xamarin productivity tool) working folder 858 | .mfractor/ 859 | 860 | # Local History for Visual Studio 861 | .localhistory/ 862 | 863 | # BeatPulse healthcheck temp database 864 | healthchecksdb 865 | 866 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 867 | MigrationBackup/ 868 | 869 | # Ionide (cross platform F# VS Code tools) working folder 870 | .ionide/ 871 | 872 | # Fody - auto-generated XML schema 873 | FodyWeavers.xsd 874 | 875 | # End of https://www.toptal.com/developers/gitignore/api/vim,node,data,emacs,python,pycharm,executable,sublimetext,visualstudio,visualstudiocode 876 | .envrc 877 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | known_third_party = click,kedro,more_itertools,pytest,rich,setuptools 3 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v3.4.0 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - id: check-yaml 10 | - id: check-added-large-files 11 | - id: check-case-conflict # Check for files that would conflict in case-insensitive filesystems 12 | - id: check-merge-conflict # Check for files that contain merge conflict strings. 13 | - id: debug-statements # Check for debugger imports and py37+ `breakpoint()` calls in python source. 14 | - id: requirements-txt-fixer # Sorts entries in requirements.txt 15 | - id: forbid-new-submodules 16 | 17 | # - repo: https://github.com/pycqa/pydocstyle 18 | # rev: 4.0.0 # pick a git hash / tag to point to 19 | # hooks: 20 | # - id: pydocstyle 21 | - repo: https://gitlab.com/pycqa/flake8 22 | rev: '3.9.0' 23 | hooks: 24 | - id: flake8 25 | - repo: https://github.com/asottile/seed-isort-config 26 | rev: v2.2.0 27 | hooks: 28 | - id: seed-isort-config 29 | - repo: https://github.com/pre-commit/mirrors-isort 30 | rev: v5.8.0 31 | hooks: 32 | - id: isort 33 | - repo: https://github.com/asottile/blacken-docs 34 | rev: v1.10.0 35 | hooks: 36 | - id: blacken-docs 37 | additional_dependencies: [black] 38 | - repo: https://github.com/psf/black 39 | rev: 20.8b1 40 | hooks: 41 | - id: black 42 | - repo: https://github.com/pre-commit/mirrors-mypy 43 | rev: v0.812 44 | hooks: 45 | - id: mypy 46 | exclude: tests/ 47 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Waylon Walker 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kedro-diff 2 | 3 | 4 | `kedro-diff` aims to be a familiar interface into comparing two points in 5 | history. Git diffs are fantastic tools but often are too granular to see what 6 | has changed inside the pipeline. `kedro-diff` aims to be a familiar tool at a 7 | higher level so we can see changes to nodes (names, inputs, outputs, tags). 8 | 9 | ## Installation 10 | 11 | 12 | ``` bash 13 | pip install kedro-diff 14 | ``` 15 | 16 | ## Example 17 | 18 | ``` diff 19 | kedro diff --stat develop..master 20 | M __default__ | 6 ++++- 21 | M data_science | 3 +++ 22 | M data_engineering | 3 ++- 23 | ?? new_pipeline 24 | 25 | 4 pipelines changed, 5 insertions(+), 4 deletions(-) 26 | ``` 27 | 28 | ## Usage 29 | 30 | ``` diff 31 | # diff develop into master 32 | kedro diff develop..master 33 | 34 | kedro diff develop master 35 | 36 | # diff current state with main 37 | kedro diff main 38 | 39 | # diff current state with main 40 | kedro diff ..main 41 | 42 | # comparing pipelines from two branches 43 | kedro diff master new_branch data_science 44 | ``` 45 | 46 | ## More examples 47 | 48 | ``` diff 49 | kedro diff develop..master 50 | ╭──────────────────────────────────────────────────────────────────────────────╮ 51 | │ modified: data_engineering │ 52 | ╰──────────────────────────────────────────────────────────────────────────────╯ 53 | + strip_whitespace 54 | + lowercase_columns 55 | + get_trains 56 | - get_tains 57 | ╭──────────────────────────────────────────────────────────────────────────────╮ 58 | │ modified: data_science │ 59 | ╰──────────────────────────────────────────────────────────────────────────────╯ 60 | + split_data 61 | ``` 62 | 63 | ## Roadmap 64 | 65 | ### 1.0.0 66 | 67 | - [x] commit parser 68 | - [x] get `pipeline.to_json()` for `__default__` for two different commits 69 | - [x] get `pipeline.to_json()` for all pipelines for two different commits 70 | - [x] --stat compares the number of nodes added or dropped in `__default__` 71 | - [x] --stat compares the number of nodes added or dropped in all pipelines 72 | - [x] --stat compares attribute changes (inputs, outputs, tags) in all pipelines 73 | - [x] compare input names 74 | - [x] compare output names 75 | - [ ] speed up getting repeat pipelines from the same commit (no need to reaload a new session) 76 | - [ ] speed up getting repeat commits by checking commit hash (reuse existing json) 77 | - [ ] minimize untested code 78 | 79 | ### 2.0.0 80 | 81 | _super-size `pipeline.to_json()`_ 82 | - [x] compare all attributes on a node ( not just inputs, outputs, tags) 83 | - [ ] allow users to specify custom to_json method 84 | - [ ] function names 85 | - [ ] function hashes 86 | - [ ] catalog _filepath 87 | - [ ] catalog _sql 88 | 89 | ## Testing 90 | 91 | This project strives for 100% test coverage where it makes sense. Other kedro 92 | plugins I have created have suffered development speed by the complexity of 93 | fully testing on a full kedro project. There are so many pieces to get into 94 | place that it becomes difficult to test accross multiple versions of kedro or 95 | keep the tests working as kedro changes. Minimal functionality will be placed 96 | into modules that require a kedro full kedro project in place to work. 97 | -------------------------------------------------------------------------------- /kedro_diff/__init__.py: -------------------------------------------------------------------------------- 1 | """Kedro Diff - quickly diff kedro history 2 | 3 | kedro diff aims to be a familiar interface into comparing two points in 4 | history. Git diffs are fantastic tools but often are too granular to see what 5 | has changed inside the pipeline. kedro diff aims to be a familiar tool at a 6 | higher level so we can see changes to nodes (names, inputs, outputs, tags). 7 | 8 | HomePage and documentation: https://diff.kedro.dev/ 9 | 10 | Copyright (c) 2020, Waylon Walker. 11 | License: MIT (see LICENSE for details) 12 | """ 13 | __version__ = "0.1.1" 14 | __author__ = ("Waylon Walker",) 15 | __author_email__ = ("waylon@waylonwalker.com",) 16 | __license__ = "MIT" 17 | 18 | __all__ = ["KedroDiff"] 19 | 20 | from kedro_diff.diff import KedroDiff 21 | -------------------------------------------------------------------------------- /kedro_diff/__main__.py: -------------------------------------------------------------------------------- 1 | from kedro_diff.diff import example 2 | 3 | example() 4 | -------------------------------------------------------------------------------- /kedro_diff/cli.py: -------------------------------------------------------------------------------- 1 | """kedro_diff cli module.""" 2 | import json 3 | import logging 4 | import subprocess 5 | from pathlib import Path 6 | from typing import IO, TYPE_CHECKING, Any, Tuple 7 | 8 | import click 9 | from kedro.framework.session import KedroSession 10 | from rich import print 11 | 12 | from kedro_diff.diff import KedroDiff 13 | from kedro_diff.logger import get_logger, silent_loggers 14 | from kedro_diff.sample_data import create_simple_sample 15 | 16 | if TYPE_CHECKING: 17 | from kedro.framework.startup import ProjectMetadata 18 | 19 | __version__ = "0.1.1" 20 | 21 | 22 | @click.group(name="kedro-diff") 23 | @click.version_option(__version__, "-V", "--version", help="Prints version and exits") 24 | def cli() -> None: 25 | """Kedro diff cli group.""" 26 | pass 27 | 28 | 29 | @cli.command() 30 | @click.option("-o", "--output", type=click.File("wb")) 31 | @click.option( 32 | "-v", 33 | "--verbose", 34 | count=True, 35 | help="verbosity level, -v enables diff related logs, -vv enables all logs", 36 | ) 37 | @click.option("-q", "--quiet", is_flag=True, help="runs completely quiet") 38 | @click.option("-p", "--pipeline-name", help="name of pipeline") 39 | @click.option("-c", "--commit", help="name of commit") 40 | @click.pass_obj 41 | def get_json( 42 | metadata: "ProjectMetadata", 43 | output: IO, 44 | verbose: int, 45 | quiet: bool, 46 | pipeline_name: str = "__default__", 47 | commit: str = "HEAD", 48 | ) -> None: 49 | """Get pipeline json from project context.""" 50 | if quiet: 51 | verbose = -1 52 | logger = get_logger(verbose=verbose) 53 | logger.setLevel(logging.INFO) 54 | if verbose < 1: 55 | logger.setLevel(logging.ERROR) 56 | 57 | # breakpoint() 58 | if verbose < 2: 59 | silent_loggers() 60 | 61 | session = KedroSession.create(metadata.package_name) 62 | context = session.load_context() 63 | 64 | meta_path = ( 65 | Path(output.name) 66 | / (commit.replace("/", "_").replace(" ", "_") + "-commit-metadata.json") 67 | ).absolute() 68 | 69 | if not meta_path.parent.exists(): 70 | meta_path.parent.mkdir() 71 | 72 | sha = subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode("utf-8") 73 | 74 | diffmeta = { 75 | "commit": commit, 76 | "sha": sha, 77 | "pipelines": list(context.pipelines.keys()), 78 | } 79 | meta_path.write_text(json.dumps(diffmeta)) 80 | 81 | for pipeline_name, pipeline in context.pipelines.items(): 82 | output_file = Path(output.name) / ( 83 | "_".join([commit, pipeline_name]).replace("/", "_").replace(" ", "_") 84 | + ".json" 85 | ) 86 | pipeline = pipeline.to_json() 87 | if verbose >= 0: 88 | print(pipeline) 89 | output_file.write_text(pipeline) 90 | return 91 | 92 | 93 | @cli.command() 94 | @click.option( 95 | "-v", 96 | "--verbose", 97 | count=True, 98 | help="verbosity level, -v enables diff related logs, -vv enables all logs", 99 | ) 100 | @click.option("-q", "--quiet", is_flag=True, help="runs completely quiet") 101 | @click.argument("commit", nargs=-1) 102 | @click.option("--stat", is_flag=True, help="generate short stats only") 103 | @click.pass_obj 104 | def diff( 105 | metadata: "ProjectMetadata", 106 | verbose: int, 107 | quiet: bool, 108 | commit: Tuple[str, ...], 109 | stat: bool, 110 | ) -> None: 111 | """Diff two commits.""" 112 | from kedro_diff.commit_parser import load_commit_metadata, parse_commit 113 | from kedro_diff.get_pipelines import to_json 114 | 115 | try: 116 | project_path = metadata.project_path 117 | except AttributeError: 118 | project_path = Path.cwd() 119 | 120 | if quiet: 121 | verbose = -1 122 | 123 | logger = get_logger(verbose=verbose) 124 | logger.info(f"project path is set to {project_path}") 125 | 126 | commit1, commit2 = parse_commit(commit, verbose=verbose) 127 | to_json(project_path, commit1) 128 | to_json(project_path, commit2) 129 | 130 | logger.info(f"Converted pipelines to json") 131 | 132 | meta1, meta2 = load_commit_metadata(commit) 133 | all_pipelines = sorted({*meta1["pipelines"], *meta2["pipelines"]}) 134 | for pipeline in all_pipelines: 135 | pipe1 = load_json(commit1, pipeline) 136 | pipe2 = load_json(commit2, pipeline) 137 | diff = KedroDiff(pipe1, pipe2, name=pipeline) 138 | if stat: 139 | diff.stat() 140 | else: 141 | diff.diff() 142 | 143 | 144 | def load_json(commit: str, pipeline_name: str) -> Any: 145 | """ 146 | Tries to load pipeline data from, if one is not found it returns an empty pipeline. 147 | 148 | Parameters 149 | -------- 150 | commit : str 151 | a commit to load pipeline data for. 152 | pipeline_name : str 153 | a pipeline to load pipeline data for. 154 | 155 | Returns 156 | -------- 157 | dict 158 | pipeline data 159 | 160 | """ 161 | try: 162 | return json.loads( 163 | ( 164 | Path(".kedro-diff") 165 | / ( 166 | "_".join([commit, pipeline_name]) 167 | .replace("/", "_") 168 | .replace(" ", "_") 169 | + ".json" 170 | ) 171 | ).read_text() 172 | ) 173 | except FileNotFoundError: 174 | return create_simple_sample(0) 175 | 176 | 177 | def diff_stat(commit1: str, commit2: str, pipeline_name: str = "__default__") -> None: 178 | """ 179 | Does a diff --stat for the given pipeline_name between two commits. 180 | 181 | Parameters 182 | -------- 183 | commit1 : str 184 | first commit to load pipeline data for. 185 | commit2 : str 186 | second commit to load pipeline data for. 187 | pipeline_name : str 188 | a pipeline to load pipeline data for. 189 | """ 190 | pipe1 = load_json(commit1, pipeline_name) 191 | pipe2 = load_json(commit2, pipeline_name) 192 | 193 | diff = KedroDiff(pipe1, pipe2, name=pipeline_name) 194 | diff.stat() 195 | 196 | 197 | if __name__ == "__main__": 198 | cli() 199 | -------------------------------------------------------------------------------- /kedro_diff/commit_parser.py: -------------------------------------------------------------------------------- 1 | """Commit Parser. 2 | 3 | Parses user input into two commits to compare 4 | """ 5 | import json 6 | from pathlib import Path 7 | from typing import Dict, Tuple, Union 8 | 9 | from more_itertools import flatten 10 | 11 | from kedro_diff.errors import KedroDiffError 12 | from kedro_diff.logger import get_logger 13 | 14 | __version__ = "0.1.1" 15 | 16 | 17 | def parse_commit( 18 | commit: Union[str, Tuple[str, ...]], verbose: int = 0 19 | ) -> Tuple[str, str]: 20 | """ 21 | Parse input commit into two commits for comparing. 22 | 23 | Exampe: 24 | parse_commit('develop..main') 25 | 26 | """ 27 | if len(commit) == 0: 28 | raise KedroDiffError( 29 | f"at least one commit must be passed to compare\n recieved {commit}" 30 | ) 31 | # split commits in case of `kedro diff main..branch` 32 | if isinstance(commit, str): 33 | if "..." in commit: 34 | return parse_commit(tuple(commit.split("..."))) 35 | return parse_commit(tuple(commit.split(".."))) 36 | else: 37 | if "..." in str(commit): 38 | return parse_commit(tuple(flatten([c.split("...") for c in commit]))) 39 | if ".." in str(commit): 40 | return parse_commit(tuple(flatten([c.split("..") for c in commit]))) 41 | 42 | if len(commit) > 2: 43 | raise KedroDiffError( 44 | f"no more than 2 commits may be compared\n recieved {commit}" 45 | ) 46 | 47 | if len(commit) == 2: 48 | commit1 = commit[0] 49 | commit2 = commit[1] 50 | 51 | # set to HEAD in case of `kedro diff branch` 52 | else: 53 | commit1 = commit[0] 54 | commit2 = "HEAD" 55 | 56 | # overrite commit1 in cases of `kedro diff ..branch 57 | if commit1 == "": 58 | commit1 = "HEAD" 59 | logger = get_logger(verbose=verbose) 60 | logger.info(f"comparing {commit1} to {commit2}") 61 | 62 | return commit1, commit2 63 | 64 | 65 | def load_commit_metadata( 66 | commit: Union[str, Tuple[str, ...]], 67 | verbose: int = 0, 68 | root_dir: Union[str, Path] = ".", 69 | ) -> Tuple[Dict, Dict]: 70 | commit1, commit2 = parse_commit(commit) 71 | meta1 = json.loads( 72 | ( 73 | Path(root_dir) 74 | / ".kedro-diff" 75 | / (commit1.replace("/", "_").replace(" ", "_") + "-commit-metadata.json") 76 | ) 77 | .absolute() 78 | .read_text() 79 | ) 80 | meta2 = json.loads( 81 | ( 82 | Path(root_dir) 83 | / ".kedro-diff" 84 | / (commit2.replace("/", "_").replace(" ", "_") + "-commit-metadata.json") 85 | ) 86 | .absolute() 87 | .read_text() 88 | ) 89 | return meta1, meta2 90 | 91 | 92 | if __name__ == "__main__": 93 | import sys 94 | 95 | print(sys.argv[1:]) 96 | print(parse_commit(tuple(sys.argv[1:]))) 97 | -------------------------------------------------------------------------------- /kedro_diff/diff.py: -------------------------------------------------------------------------------- 1 | """Diff. 2 | 3 | Core diffing logic for kedro diff. 4 | """ 5 | from typing import Dict, List 6 | 7 | from rich.console import Console 8 | from rich.panel import Panel 9 | 10 | from kedro_diff.sample_data import create_simple_sample 11 | 12 | from .node_diff import NodeDiff 13 | 14 | 15 | class KedroDiff: 16 | """KedroDiff. 17 | 18 | Compare kedro two pipelines 19 | 20 | Parameters 21 | -------- 22 | pipe1 : Dict 23 | base pipeline 24 | pipe2 : Dict 25 | pipeline to compare to the base pipeline 26 | name : str 27 | name of the pipeline that is being compared 28 | 29 | Examples 30 | -------- 31 | >>> from kedro_diff import KedroDiff 32 | >>> diff = KedroDiff.from_sample({"num_nodes": 2}, {"num_nodes": 4}) 33 | >>> diff.stat() 34 | M __default__ | 2 ++ 35 | """ 36 | 37 | def __init__(self, pipe1: Dict, pipe2: Dict, name: str = "__default__") -> None: 38 | self.pipe1 = pipe1["pipeline"] 39 | self.pipe2 = pipe2["pipeline"] 40 | self.name = name 41 | self.console = Console() 42 | 43 | @classmethod 44 | def from_sample( 45 | cls, pipe1_args: Dict, pipe2_args: Dict, name: str = "__default__" 46 | ) -> "KedroDiff": 47 | """ 48 | Creates a KedroDiff from `create_simple_sample` arguuments. 49 | 50 | Parameters 51 | -------- 52 | pipe1_args : dict 53 | arguments used to create pipe1 54 | pipe2_args : dict 55 | arguments used to create pipe2 56 | name : str 57 | name of the pipeline that is being compared 58 | 59 | See Also 60 | -------- 61 | kedro_diff.sample_data.create_simple_sample 62 | 63 | Examples 64 | -------- 65 | >>> from kedro_diff import KedroDiff 66 | >>> diff = KedroDiff.from_sample({"num_nodes": 2}, {"num_nodes": 4}) 67 | >>> diff.stat() 68 | M __default__ | 2 ++ 69 | 70 | """ 71 | pipe1 = create_simple_sample(**pipe1_args) 72 | pipe2 = create_simple_sample(**pipe2_args) 73 | return cls(pipe1=pipe1, pipe2=pipe2, name=name) 74 | 75 | @property 76 | def all_nodes(self) -> List: 77 | return sorted( 78 | list( 79 | set( 80 | [ 81 | *[node["name"] for node in self.pipe2], 82 | *[node["name"] for node in self.pipe1], 83 | ] 84 | ) 85 | ) 86 | ) 87 | 88 | @property 89 | def new_nodes(self) -> set: 90 | """ 91 | Compares 92 | 93 | Returns 94 | -------- 95 | set 96 | a set of new nodes. 97 | 98 | """ 99 | return set([node["name"] for node in self.pipe2]).difference( 100 | set([node["name"] for node in self.pipe1]) 101 | ) 102 | 103 | @property 104 | def dropped_nodes(self) -> set: 105 | return set([node["name"] for node in self.pipe1]).difference( 106 | set([node["name"] for node in self.pipe2]) 107 | ) 108 | 109 | @property 110 | def not_new_dropped_nodes(self) -> set: 111 | return ( 112 | set([node["name"] for node in self.pipe2]) 113 | - self.new_nodes 114 | - self.dropped_nodes 115 | ) 116 | 117 | @property 118 | def change_input(self) -> set: 119 | return self.change_attr("inputs") 120 | 121 | @property 122 | def change_output(self) -> set: 123 | return self.change_attr("outputs") 124 | 125 | @property 126 | def change_tag(self) -> set: 127 | return self.change_attr("tags") 128 | 129 | def change_attr(self, attr: str) -> set: 130 | return { 131 | ( 132 | node["name"], 133 | tuple(node[attr]), 134 | ) 135 | for node in self.pipe2 136 | if node["name"] in self.not_new_dropped_nodes 137 | }.difference( 138 | { 139 | ( 140 | node["name"], 141 | tuple(node[attr]), 142 | ) 143 | for node in self.pipe1 144 | if node["name"] in self.not_new_dropped_nodes 145 | } 146 | ) 147 | 148 | @property 149 | def num_changes(self) -> int: 150 | return ( 151 | len(self.new_nodes) 152 | + len(self.dropped_nodes) 153 | + len(self.change_input) * 2 154 | + len(self.change_output) * 2 155 | + len(self.change_tag) * 2 156 | ) 157 | 158 | @property 159 | def num_adds(self) -> int: 160 | return ( 161 | len(self.new_nodes) 162 | + len(self.change_input) 163 | + len(self.change_output) 164 | + len(self.change_tag) 165 | ) 166 | 167 | @property 168 | def num_drops(self) -> int: 169 | return ( 170 | len(self.dropped_nodes) 171 | + len(self.change_input) 172 | + len(self.change_output) 173 | + len(self.change_tag) 174 | ) 175 | 176 | @property 177 | def _stat_msg(self) -> str: 178 | return f'[red]M[/red] {self.name.ljust(30)[:30]} | {self.num_changes} [green]{"+" * self.num_adds}[/green][red]{"-"*self.num_drops}[/red]' 179 | 180 | def stat(self) -> None: 181 | self.console.print(self._stat_msg) 182 | 183 | def diff(self) -> None: 184 | if self.num_changes == 0: 185 | return 186 | self.console.print( 187 | Panel( 188 | f"modified: {self.name.ljust(88)}", 189 | title="[bright_black]kedro-diff[/bright_black]", 190 | title_align="right", 191 | expand=False, 192 | ), 193 | ) 194 | for node in self.all_nodes: 195 | try: 196 | node1 = [_node for _node in self.pipe1 if _node["name"] == node][0] 197 | except IndexError: 198 | node1 = None 199 | try: 200 | node2 = [_node for _node in self.pipe2 if _node["name"] == node][0] 201 | except IndexError: 202 | node2 = None 203 | 204 | NodeDiff(node1, node2, node).diff() 205 | 206 | 207 | def example() -> None: 208 | from copy import deepcopy 209 | 210 | pipe10 = create_simple_sample(10) 211 | 212 | pipe10_change_one_input = deepcopy(pipe10) 213 | pipe10_change_one_input["pipeline"][2]["inputs"] = ["new_input"] 214 | 215 | pipe10_change_one_output = deepcopy(pipe10) 216 | pipe10_change_one_output["pipeline"][2]["outputs"] = ["output1"] 217 | 218 | pipe10_change_one_tag = deepcopy(pipe10) 219 | pipe10_change_one_tag["pipeline"][2]["tags"] = ["tag1"] 220 | 221 | console = Console() 222 | console.print("[gold1]KedroDiff Examples[/]\n") 223 | console.print("[brightblack]KedroDiff.stat()[/]\n") 224 | 225 | KedroDiff(create_simple_sample(0), create_simple_sample(1)).stat() 226 | 227 | KedroDiff( 228 | create_simple_sample(0), create_simple_sample(2), name="two_new_nodes" 229 | ).stat() 230 | 231 | KedroDiff( 232 | create_simple_sample(0), create_simple_sample(12), name="twelve_new_nodes" 233 | ).stat() 234 | 235 | KedroDiff( 236 | create_simple_sample(10, name_prefix="first"), 237 | create_simple_sample(12), 238 | name="twelve_new_nodes_ten_dropped_nodes", 239 | ).stat() 240 | 241 | diff_change_one_input = KedroDiff( 242 | pipe10, pipe10_change_one_input, name="ten_nodes_one_input_change" 243 | ) 244 | diff_change_one_input.stat() 245 | 246 | KedroDiff( 247 | pipe10, pipe10_change_one_output, name="ten_nodes_one_output_change" 248 | ).stat() 249 | KedroDiff(pipe10, pipe10_change_one_tag, name="ten_nodes_one_tag_change").stat() 250 | 251 | console.print("\n\n") 252 | console.print("[brightblack]KedroDiff.diff()[/]\n") 253 | 254 | KedroDiff(create_simple_sample(1), create_simple_sample(1)).diff() 255 | KedroDiff(create_simple_sample(0), create_simple_sample(1)).diff() 256 | KedroDiff( 257 | create_simple_sample(10, name_prefix="first"), 258 | create_simple_sample(12), 259 | name="twelve_new_nodes_ten_dropped_nodes", 260 | ).diff() 261 | 262 | diff_change_one_input.diff() 263 | -------------------------------------------------------------------------------- /kedro_diff/errors.py: -------------------------------------------------------------------------------- 1 | class KedroDiffError(TypeError): 2 | """ 3 | Error messages related to the kedro diff command. 4 | """ 5 | -------------------------------------------------------------------------------- /kedro_diff/get_pipelines.py: -------------------------------------------------------------------------------- 1 | """get_pipelines. 2 | 3 | Get json from a specific commit 4 | """ 5 | import os 6 | import shutil 7 | import subprocess 8 | import tempfile 9 | from pathlib import Path 10 | from typing import Callable, Optional, Union 11 | 12 | from kedro_diff.logger import get_logger 13 | 14 | 15 | def copytree( 16 | src: Union[str, Path], 17 | dst: Union[str, Path], 18 | symlinks: bool = False, 19 | ignore: Optional[Callable] = None, 20 | ) -> None: 21 | """Copy src director into dst directory.""" 22 | ignore_items = [ 23 | ".envrc", 24 | ".venv", 25 | ".kedro-diff", 26 | ] 27 | items = [item for item in os.listdir(str(src)) if item not in ignore_items] 28 | 29 | for item in items: 30 | s = os.path.join(src, item) 31 | d = os.path.join(dst, item) 32 | if os.path.isdir(s): 33 | shutil.copytree(s, d, symlinks, ignore) 34 | else: 35 | shutil.copy2(s, d) 36 | 37 | 38 | def to_json(project_path: Union[str, Path], commit: str, verbose: int = 0) -> None: 39 | """Get json from specific commit.""" 40 | 41 | with tempfile.TemporaryDirectory() as tmpdirname: 42 | logger = get_logger(verbose=verbose) 43 | logger.info(f"copying {project_path} into {tmpdirname}") 44 | copytree(project_path, tmpdirname) 45 | subprocess.call( 46 | f'git checkout "{commit}" --force --quiet', shell=True, cwd=tmpdirname 47 | ) 48 | 49 | pipeline_path = (Path() / ".kedro-diff").absolute() 50 | if verbose < 1: 51 | subprocess.call( 52 | f"kedro get-json --output '{pipeline_path}' --commit '{commit}' --quiet", 53 | shell=True, 54 | cwd=tmpdirname, 55 | stdout=subprocess.DEVNULL, 56 | stderr=subprocess.DEVNULL, 57 | ) 58 | else: 59 | subprocess.call( 60 | f"kedro get-json --output '{pipeline_path}' --commit '{commit}' --quiet", 61 | shell=True, 62 | cwd=tmpdirname, 63 | ) 64 | 65 | 66 | if __name__ == "__main__": 67 | import sys 68 | 69 | to_json(sys.argv[1], sys.argv[2]) 70 | -------------------------------------------------------------------------------- /kedro_diff/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def silent_loggers() -> None: 5 | """All logs need to be silent in order for a clean kedro diff output.""" 6 | known_kedro_loggers = [ 7 | "ProfileTimeTransformer", 8 | "hooks_handler", 9 | "kedro.__init__", 10 | "kedro", 11 | "kedro.config", 12 | "kedro.config.config", 13 | "kedro.extras.decorators.memory_profiler", 14 | "kedro.framework.cli", 15 | "kedro.framework.session.session", 16 | "kedro.framework.session.store", 17 | "kedro.framework.session", 18 | "kedro.io.cached_dataset", 19 | "kedro.io.data_catalog", 20 | "kedro.io", 21 | "kedro.journal", 22 | "kedro.pipeline", 23 | "kedro.pipeline.decorators", 24 | "kedro.pipeline.node", 25 | "kedro.pipeline.pipeline", 26 | "kedro.runner", 27 | "kedro.runner.runner", 28 | "kedro.versioning.journal", 29 | "kedro_telemetry.plugin", 30 | "py4", 31 | ] 32 | for logger in [ 33 | *known_kedro_loggers, 34 | *list(logging.root.manager.loggerDict.keys()), # type: ignore 35 | ]: 36 | logging.getLogger(logger).setLevel(logging.ERROR) 37 | 38 | 39 | def get_logger(verbose: int = 0) -> logging.Logger: 40 | logger = logging.getLogger("kedro-diff") 41 | logger.setLevel(logging.INFO) 42 | if verbose < 1: 43 | logger.setLevel(logging.ERROR) 44 | 45 | if verbose < 0: 46 | silent_loggers() 47 | logger.setLevel(100) 48 | 49 | return logger 50 | -------------------------------------------------------------------------------- /kedro_diff/node_diff/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["NodeDiff"] 2 | from .node_diff import NodeDiff 3 | -------------------------------------------------------------------------------- /kedro_diff/node_diff/__main__.py: -------------------------------------------------------------------------------- 1 | from kedro.pipeline.node import node 2 | 3 | from kedro_diff.node_diff import NodeDiff 4 | 5 | node1 = node(lambda x: x, "input", "output", name="id") 6 | node1_b = node(lambda x: x, "input", "output", name="id") 7 | node2 = node(lambda x: x, "input", "output", name="new-id") 8 | node3 = node(lambda x: x, "input3", "output", name="id") 9 | node4 = node(lambda x: x, "input", "output4", name="id") 10 | node5 = node(lambda x: x, "input", "output", tags=["new-tag"], name="id") 11 | 12 | # Node that is the same node should not print 13 | NodeDiff(node1, node1, name="is").diff() 14 | # Node that is the same node should print unchanged if verbose 15 | NodeDiff(node1, node1, name="verbose_is", verbose_level=2).diff() 16 | # Node that is an equvalent node should not print 17 | NodeDiff(node1, node1_b, name="equal").diff() 18 | # Node that is an equvalent node should print unchanged if verbose 19 | NodeDiff(node1, node1_b, name="verbose_equal", verbose_level=2).diff() 20 | # Node Identifies as deleted 21 | NodeDiff(node1, None, name="deleted_node").diff() 22 | # Node Identifies as none should not print 23 | NodeDiff(None, None, name="none").diff() 24 | # Node Identifies as none should print unchanged if verbose 25 | NodeDiff(None, None, name="verbose_none", verbose_level=2).diff() 26 | # Node Identifies as new 27 | NodeDiff(None, node1, name="new_node").diff() 28 | # 29 | NodeDiff(node1, node2, name="name_changed").diff() 30 | NodeDiff(node1, node3, name="input_changed").diff() 31 | NodeDiff(node1, node4, name="output_changed").diff() 32 | NodeDiff(node1, node5, name="tag_changed", verbose_level=2).diff() 33 | NodeDiff(node5, node1, name="tag_dropped", verbose_level=2).diff() 34 | -------------------------------------------------------------------------------- /kedro_diff/node_diff/node_diff.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union 2 | 3 | from rich.console import Console 4 | 5 | if TYPE_CHECKING: 6 | from kedro.pipeline.node import Node 7 | 8 | 9 | class NodeDiff: 10 | """NodeDiff. 11 | 12 | Compare kedro two Nodes 13 | """ 14 | 15 | def __init__( 16 | self, 17 | node1: Optional[Union[Dict, "Node"]] = None, 18 | node2: Optional[Union[Dict, "Node"]] = None, 19 | name: Optional[str] = None, 20 | verbose_level: int = 1, 21 | ) -> None: 22 | """ 23 | verbose levels 24 | 0: silent 25 | 1: (default) prints only node diffs 26 | 2: prints regardless of equality 27 | 28 | """ 29 | self.node1 = node1 30 | self.node2 = node2 31 | self.name = name 32 | self.console = Console() 33 | self.verbose_level = verbose_level 34 | 35 | @property 36 | def is_changed(self) -> bool: 37 | return self.node1 != self.node2 38 | 39 | @property 40 | def is_none(self) -> bool: 41 | return self.node1 is self.node2 is None 42 | 43 | @property 44 | def is_new(self) -> bool: 45 | return self.node1 is None 46 | 47 | @property 48 | def is_deleted(self) -> bool: 49 | return self.node2 is None 50 | 51 | @property 52 | def attrs(self) -> List: 53 | if self.is_deleted: 54 | _node = self.node1 55 | else: 56 | _node = self.node2 57 | 58 | if _node is None: 59 | return [] 60 | if isinstance(_node, dict): 61 | _node_dict = _node 62 | return [a for a in _node_dict.keys() if not a.startswith("_")] 63 | return [a for a in dir(_node) if not a.startswith("_")] 64 | 65 | def get_attr(self, attr: str) -> Tuple: 66 | 67 | try: 68 | if self.node1 is None: 69 | attr1 = None 70 | elif isinstance(self.node1, dict): 71 | attr1 = self.node1[attr] 72 | else: 73 | attr1 = getattr(self.node1, attr) 74 | except KeyError: 75 | attr1 = None 76 | 77 | try: 78 | if self.node2 is None: 79 | attr2 = None 80 | elif isinstance(self.node2, dict): 81 | attr2 = self.node2[attr] 82 | else: 83 | attr2 = getattr(self.node2, attr) 84 | except KeyError: 85 | attr2 = None 86 | 87 | if callable(attr1) or callable(attr2): 88 | return None, None 89 | return attr1, attr2 90 | 91 | def diff_attr(self, attr: str) -> None: 92 | attr1, attr2 = self.get_attr(attr) 93 | attr1 = "" if attr1 is None else attr1 94 | attr2 = "" if attr2 is None else attr2 95 | if attr1 != attr2: 96 | attr_name = f"{attr}: "[:10] 97 | self.console.print( 98 | f"[{self.diff_color}] {attr_name} [red][strike]{attr1}[/strike] [green]{attr2}" 99 | ) 100 | 101 | def diff_attrs(self) -> None: 102 | for attr in self.attrs: 103 | self.diff_attr(attr) 104 | 105 | @property 106 | def diff_color(self) -> str: 107 | if self.is_deleted: 108 | return "red" 109 | if self.is_new: 110 | return "green" 111 | return "gold1" 112 | 113 | def diff(self) -> None: 114 | if self.is_none: 115 | if self.verbose_level > 1: 116 | self.console.print(f"[bright_black] {self.name} is None") 117 | elif not self.is_changed: 118 | if self.verbose_level > 1: 119 | self.console.print(f"[bright_black] {self.name} is unchanged") 120 | elif self.is_new: 121 | self.console.print(f"[green]+ {self.name}") 122 | self.diff_attrs() 123 | elif self.is_deleted: 124 | self.console.print(f"[red]- [strike]{self.name}[/strike]") 125 | self.diff_attrs() 126 | elif self.is_changed: 127 | self.console.print(f"[green]+ [{self.diff_color}]{self.name}") 128 | self.diff_attrs() 129 | -------------------------------------------------------------------------------- /kedro_diff/sample_data.py: -------------------------------------------------------------------------------- 1 | """Sample Data. 2 | 3 | Module for creating sample pipeline.json data for use in examples and tests. 4 | """ 5 | from typing import Dict 6 | 7 | 8 | def create_simple_sample( 9 | num_nodes: int = 1, 10 | kedro_version: str = "0.17.2", 11 | tagged: bool = True, 12 | name_prefix: str = "node", 13 | ) -> Dict: 14 | """Create Sample data for examples and tests. 15 | 16 | Parameters 17 | -------- 18 | num_nodes : int 19 | number of nodes to generate in the pipeline 20 | kedro_version : str 21 | kedro version to use in the pipeline.json format 22 | tagged : bool 23 | to tag the datasets or not 24 | name_prefix : str 25 | prefix to add to the name of each node 26 | 27 | Returns 28 | -------- 29 | kedro pipeline.json sample data as a dictionary 30 | 31 | Examples 32 | -------- 33 | >>> create_simple_sample(1) 34 | {'kedro_version': '0.17.2', 'pipeline': [{'name': 'node1', 'inputs': ['output0'], 'outputs': ['output1'], 'tags': ['tag1']}]} 35 | 36 | >>> create_simple_sample(1, name_prefix='first') 37 | {'kedro_version': '0.17.2', 'pipeline': [{'name': 'first1', 'inputs': ['output0'], 'outputs': ['output1'], 'tags': ['tag1']}]} 38 | 39 | >>> create_simple_sample(1, tagged=False) 40 | {'kedro_version': '0.17.2', 'pipeline': [{'name': 'node1', 'inputs': ['output0'], 'outputs': ['output1'], 'tags': ['']}]} 41 | """ 42 | return { 43 | "kedro_version": kedro_version, 44 | "pipeline": [ 45 | { 46 | "name": f"{name_prefix}{n}", 47 | "inputs": [f"output{n-1}"], 48 | "outputs": [f"output{n}"], 49 | "tags": [f"tag{n}" if tagged else ""], 50 | } 51 | for n in range(1, num_nodes + 1) 52 | ], 53 | } 54 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | click 2 | kedro 3 | more_itertools 4 | rich 5 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | black == 20.8b1 2 | bump2version 3 | flake8 == 3.9.0 4 | interrogate 5 | isort == 5.8.0 6 | mypy == 0.812 7 | pre-commit 8 | pytest 9 | pytest-cov 10 | pytest-mock 11 | setuptools 12 | wheel 13 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.1.1 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:setup.py] 7 | 8 | [bumpversion:file:kedro_diff/__init__.py] 9 | 10 | [bumpversion:file:kedro_diff/cli.py] 11 | 12 | [bumpversion:file:kedro_diff/commit_parser.py] 13 | 14 | [pydocstyle] 15 | inherit = false 16 | match = (?!punch_).*\.py 17 | 18 | [tool:pytest] 19 | addopts = -ra -q --cov=kedro_diff --cov-fail-under=100 --cov-report=term-missing 20 | testpaths = 21 | tests 22 | 23 | [coverage:report] 24 | omit = 25 | kedro_diff/cli.py 26 | kedro_diff/get_pipelines.py 27 | kedro_diff/sample_data.py 28 | kedro_diff/logger.py 29 | exclude_lines = 30 | if __name__ == .__main__.: 31 | if TYPE_CHECKING: 32 | 33 | [mypy] 34 | python_version = 3.8 35 | warn_return_any = True 36 | warn_unused_configs = True 37 | disallow_untyped_defs = True 38 | disallow_untyped_calls = True 39 | allow_untyped_globals = False 40 | 41 | [mypy-tests.*] 42 | ignore_errors = True 43 | 44 | [mypy-setuptools] 45 | ignore_missing_imports = True 46 | 47 | [mypy-rich.*] 48 | ignore_missing_imports = True 49 | 50 | [flake8] 51 | ignore = E203, E266, E501, W503, E231, F541 52 | max-line-length = 88 53 | select = B,C,E,F,W,T4,B9 54 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | kedro-diff uses setup tools for packaging. 3 | 4 | To Build kedro-diff as a Python package 5 | 6 | $ python setup.py sdist bdist_wheel --bdist-dir ~/temp/bdistwheel 7 | 8 | Regular install 9 | 10 | $ pip install -e . 11 | 12 | To setup local Development 13 | 14 | $ pip install -e ".[dev]" 15 | 16 | """ 17 | from pathlib import Path 18 | 19 | from setuptools import find_packages, setup 20 | 21 | NAME = "kedro-diff" 22 | 23 | README = (Path(__file__).parent / "README.md").read_text(encoding="utf-8") 24 | 25 | with open("requirements.txt", "r", encoding="utf-8") as f: 26 | requires = [x.strip() for x in f if x.strip()] 27 | 28 | with open("requirements_dev.txt", "r", encoding="utf-8") as f: 29 | dev_requires = [x.strip() for x in f if x.strip()] 30 | 31 | setup( 32 | name=NAME, 33 | version="0.1.1", 34 | description="diff commits to your kedro pipeline", 35 | long_description=README, 36 | long_description_content_type="text/markdown", 37 | classifiers=[ 38 | "Development Status :: 4 - Beta", 39 | "Environment :: Plugins", 40 | "Framework :: Kedro", 41 | "Intended Audience :: Developers", 42 | "Operating System :: Microsoft :: Windows", 43 | "Operating System :: MacOS", 44 | "Operating System :: POSIX :: Linux", 45 | "Programming Language :: Python :: 3.6", 46 | "Programming Language :: Python :: 3.7", 47 | "Programming Language :: Python :: 3.8", 48 | "Programming Language :: Python :: 3.9", 49 | "Programming Language :: Python :: 3.10", 50 | "Typing :: Typed", 51 | ], 52 | url="https://github.com/WaylonWalker/kedro-diff", 53 | packages=find_packages(), 54 | platforms="any", 55 | license="OSI APPROVED :: MIT LICENSE", 56 | author="Waylon Walker", 57 | keywords="pipelines, machine learning, data pipelines, data science, data engineering", 58 | install_requires=requires, 59 | extras_require={"dev": dev_requires}, 60 | entry_points={ 61 | "kedro.project_commands": ["kedro-diff = kedro_diff.cli:cli"], 62 | "console_scripts": ["kedro-diff = kedro_diff.cli:cli"], 63 | }, 64 | ) 65 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """kedro_diff test module""" 2 | -------------------------------------------------------------------------------- /tests/sample-data/empty.json: -------------------------------------------------------------------------------- 1 | { 2 | "kedro_version": "0.17.2", 3 | "pipeline": [] 4 | } 5 | -------------------------------------------------------------------------------- /tests/sample-data/more_nodes.json: -------------------------------------------------------------------------------- 1 | { 2 | "kedro_version": "0.17.2", 3 | "pipeline": [ 4 | { 5 | "name": "node1", 6 | "inputs": [], 7 | "outputs": [ 8 | "output1" 9 | ], 10 | "tags": [] 11 | }, 12 | { 13 | "name": "node2", 14 | "inputs": ["output1"], 15 | "outputs": [ 16 | "output2" 17 | ], 18 | "tags": [] 19 | } 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /tests/sample-data/one-node-one-tag.json: -------------------------------------------------------------------------------- 1 | { 2 | "kedro_version": "0.17.2", 3 | "pipeline": [ 4 | { 5 | "name": "node1", 6 | "inputs": [], 7 | "outputs": [ 8 | "output1" 9 | ], 10 | "tags": ["onetag"] 11 | } 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /tests/sample-data/one-node.json: -------------------------------------------------------------------------------- 1 | { 2 | "kedro_version": "0.17.2", 3 | "pipeline": [ 4 | { 5 | "name": "node1", 6 | "inputs": [], 7 | "outputs": [ 8 | "output1" 9 | ], 10 | "tags": [] 11 | } 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /tests/sample-data/two-nodes-two-tags.json: -------------------------------------------------------------------------------- 1 | { 2 | "kedro_version": "0.17.2", 3 | "pipeline": [ 4 | { 5 | "name": "node1", 6 | "inputs": [], 7 | "outputs": [ 8 | "output1" 9 | ], 10 | "tags": ["tag1"] 11 | }, 12 | { 13 | "name": "node2", 14 | "inputs": ["output1"], 15 | "outputs": [ 16 | "output2" 17 | ], 18 | "tags": ["tag2"] 19 | } 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /tests/sample-data/two-nodes.json: -------------------------------------------------------------------------------- 1 | { 2 | "kedro_version": "0.17.2", 3 | "pipeline": [ 4 | { 5 | "name": "node1", 6 | "inputs": [], 7 | "outputs": [ 8 | "output1" 9 | ], 10 | "tags": [] 11 | }, 12 | { 13 | "name": "node2", 14 | "inputs": ["output1"], 15 | "outputs": [ 16 | "output2" 17 | ], 18 | "tags": [] 19 | } 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /tests/test_commit_parser.py: -------------------------------------------------------------------------------- 1 | """Test commit parser.""" 2 | 3 | import pytest 4 | 5 | from kedro_diff.commit_parser import parse_commit 6 | from kedro_diff.errors import KedroDiffError 7 | 8 | 9 | @pytest.mark.parametrize( 10 | "commit,expected", 11 | [ 12 | (("master", "main"), ("master", "main")), 13 | (("main", "master"), ("main", "master")), 14 | (("master", "develop"), ("master", "develop")), 15 | (("master", "develop/new-feat"), ("master", "develop/new-feat")), 16 | (("master", "develop~2"), ("master", "develop~2")), 17 | ( 18 | ("master", "1c96dd23af05edf42ae46251022e51b7"), 19 | ("master", "1c96dd23af05edf42ae46251022e51b7"), 20 | ), 21 | ("master..main", ("master", "main")), 22 | ("master...main", ("master", "main")), 23 | (("master...main",), ("master", "main")), 24 | (("master..main",), ("master", "main")), 25 | ( 26 | ("master..1c96dd23af05edf42ae46251022e51b7",), 27 | ("master", "1c96dd23af05edf42ae46251022e51b7"), 28 | ), 29 | (("master..main~2",), ("master", "main~2")), 30 | (("..main",), ("HEAD", "main")), 31 | (("...main",), ("HEAD", "main")), 32 | (("main",), ("main", "HEAD")), 33 | (("main~2",), ("main~2", "HEAD")), 34 | ("master...develop/new-feat", ("master", "develop/new-feat")), 35 | ("main", ("main", "HEAD")), 36 | ("develop/new-feat", ("develop/new-feat", "HEAD")), 37 | ("..develop/new-feat", ("HEAD", "develop/new-feat")), 38 | (("main", "master", "develop"), KedroDiffError), 39 | (("main..master..develop"), KedroDiffError), 40 | (("main..master..develop..mine..yours..ours..thiers"), KedroDiffError), 41 | ((), KedroDiffError), 42 | ((""), KedroDiffError), 43 | ], 44 | ) 45 | def test_parse(commit, expected): 46 | """Test Commit input is parsed as expected.""" 47 | if type(expected) == type and issubclass(expected, Exception): 48 | with pytest.raises(expected): 49 | commit1, commit2 = parse_commit(commit) 50 | else: 51 | commit1, commit2 = parse_commit(commit) 52 | assert commit1 == expected[0] 53 | assert commit2 == expected[1] 54 | -------------------------------------------------------------------------------- /tests/test_diff.py: -------------------------------------------------------------------------------- 1 | import json 2 | from copy import deepcopy 3 | from pathlib import Path 4 | 5 | import pytest 6 | 7 | from kedro_diff import KedroDiff 8 | from kedro_diff.sample_data import create_simple_sample 9 | 10 | DATA = Path(__file__).parent / "sample-data" 11 | 12 | pipe10 = create_simple_sample(10) 13 | pipe10_change_one_input = deepcopy(pipe10) 14 | pipe10_change_one_input["pipeline"][2]["inputs"] = "input1" 15 | 16 | pipe10_change_one_output = deepcopy(pipe10) 17 | pipe10_change_one_output["pipeline"][2]["outputs"] = ["output1"] 18 | 19 | pipe10_change_one_tag = deepcopy(pipe10) 20 | pipe10_change_one_tag["pipeline"][2]["tags"] = ["tag1"] 21 | 22 | 23 | pipe_params = [ 24 | { 25 | "name": "__default__", 26 | "pipe1": create_simple_sample(0), 27 | "pipe2": create_simple_sample(1), 28 | "expected_contains": ("M", "1", "__default__", "+"), 29 | "expected_not_contains": ("-", "++", "??", "data_engineering"), 30 | }, 31 | { 32 | "name": "__default__", 33 | "pipe1": create_simple_sample(1), 34 | "pipe2": create_simple_sample(0), 35 | "expected_contains": ("M", "1", "__default__", "-"), 36 | "expected_not_contains": ("+", "--", "??", "data_engineering"), 37 | }, 38 | { 39 | "name": "data_engineering", 40 | "pipe1": create_simple_sample(0), 41 | "pipe2": create_simple_sample(1), 42 | "expected_contains": ("M", "1", "data_engineering", "+"), 43 | "expected_not_contains": ("-", "++", "??", "__default__"), 44 | }, 45 | { 46 | "name": "__default__", 47 | "pipe1": create_simple_sample(10, name_prefix="first"), 48 | "pipe2": create_simple_sample(10, name_prefix="second"), 49 | "expected_contains": ("M", "20", "__default__", "+" * 10, "-" * 10), 50 | "expected_not_contains": ("+" * 11, "-" * 11, "??", "data_engineering"), 51 | }, 52 | { 53 | "name": "__default__", 54 | "pipe1": pipe10, 55 | "pipe2": pipe10_change_one_input, 56 | "expected_contains": ("M", "2", "__default__", "+", "-"), 57 | "expected_not_contains": ("+" * 2, "-" * 2, "??", "data_engineering"), 58 | }, 59 | { 60 | "name": "__default__", 61 | "pipe1": pipe10, 62 | "pipe2": pipe10_change_one_output, 63 | "expected_contains": ("M", "2", "__default__", "+", "-"), 64 | "expected_not_contains": ("+" * 2, "-" * 2, "??", "data_engineering"), 65 | }, 66 | { 67 | "name": "__default__", 68 | "pipe1": pipe10, 69 | "pipe2": pipe10_change_one_tag, 70 | "expected_contains": ("M", "2", "__default__", "+", "-"), 71 | "expected_not_contains": ("+" * 2, "-" * 2, "??", "data_engineering"), 72 | }, 73 | ] 74 | 75 | 76 | def load_pipes(*files): 77 | return (json.loads((DATA / file).read_text()) for file in files) 78 | 79 | 80 | @pytest.mark.parametrize("params", pipe_params) 81 | def test_diff_stat_msg(params): 82 | def run(name, pipe1, pipe2, expected_contains, expected_not_contains): 83 | res = KedroDiff(pipe1, pipe2, name=name)._stat_msg 84 | 85 | for expected in expected_contains: 86 | assert expected in res 87 | 88 | for not_expected in expected_not_contains: 89 | assert not_expected not in res 90 | 91 | run(**params) 92 | 93 | 94 | @pytest.mark.parametrize("params", pipe_params) 95 | def test_diff_stat(capsys, params): 96 | def run(name, pipe1, pipe2, expected_contains, expected_not_contains): 97 | KedroDiff(pipe1, pipe2, name=name).stat() 98 | capture = capsys.readouterr() 99 | assert capture.err == "" 100 | 101 | res = capture.out 102 | 103 | for expected in expected_contains: 104 | assert expected in res 105 | 106 | for not_expected in expected_not_contains: 107 | assert not_expected not in res 108 | 109 | run(**params) 110 | 111 | 112 | def test_diff_from_sample(): 113 | diff = KedroDiff.from_sample({"num_nodes": 2}, {"num_nodes": 4}) 114 | assert "__default__" in diff._stat_msg 115 | assert "++" in diff._stat_msg 116 | assert "+++" not in diff._stat_msg 117 | assert "-" not in diff._stat_msg 118 | -------------------------------------------------------------------------------- /tests/test_load_commit_metadata.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pytest 4 | 5 | from kedro_diff.commit_parser import load_commit_metadata 6 | 7 | 8 | @pytest.mark.parametrize( 9 | "runargs", 10 | [ 11 | { 12 | "meta1": { 13 | "commit": "main", 14 | "sha": "hs0af0ahj", 15 | "pipelines": [ 16 | "__default__", 17 | "ten_nodes", 18 | ], 19 | }, 20 | "meta2": { 21 | "commit": "HEAD", 22 | "sha": "hs0af0ahj", 23 | "pipelines": [ 24 | "__default__", 25 | "ten_nodes", 26 | "twenty_nodes", 27 | ], 28 | }, 29 | }, 30 | { 31 | "meta1": { 32 | "commit": "main", 33 | "sha": "hs0af0ahj", 34 | "pipelines": [ 35 | "__default__", 36 | "ten_nodes", 37 | ], 38 | }, 39 | "meta2": { 40 | "commit": "hs0af0ahj", 41 | "sha": "hs0af0ahj", 42 | "pipelines": [ 43 | "__default__", 44 | "ten_nodes", 45 | "twenty_nodes", 46 | ], 47 | }, 48 | }, 49 | { 50 | "meta1": { 51 | "commit": "main", 52 | "sha": "hs0af0ahj", 53 | "pipelines": [ 54 | "__default__", 55 | "ten_nodes", 56 | ], 57 | }, 58 | "meta2": { 59 | "commit": "feat/new-nodes", 60 | "sha": "hs0af0ahj", 61 | "pipelines": [ 62 | "__default__", 63 | "ten_nodes", 64 | "twenty_nodes", 65 | ], 66 | }, 67 | }, 68 | { 69 | "meta1": { 70 | "commit": "main", 71 | "sha": "hs0af0ahj", 72 | "pipelines": [ 73 | "__default__", 74 | "ten_nodes", 75 | ], 76 | }, 77 | "meta2": { 78 | "commit": "feat/new.nodes", 79 | "sha": "hs0af0ahj", 80 | "pipelines": [ 81 | "__default__", 82 | "ten_nodes", 83 | "twenty_nodes", 84 | ], 85 | }, 86 | }, 87 | ], 88 | ) 89 | def test_load_commit_metadata(tmpdir, runargs): 90 | def run(meta1, meta2): 91 | p = tmpdir.mkdir(".kedro-diff") 92 | meta_file1 = p.join( 93 | f"{meta1['commit'].replace('/', '_').replace(' ', '_')}-commit-metadata.json" 94 | ) 95 | meta_file2 = p.join( 96 | f"{meta2['commit'].replace('/', '_').replace(' ', '_')}-commit-metadata.json" 97 | ) 98 | meta_file1.write(json.dumps(meta1)) 99 | meta_file2.write(json.dumps(meta2)) 100 | load_commit_metadata(f"{meta1['commit']}..{meta2['commit']}", root_dir=tmpdir) 101 | 102 | run(**runargs) 103 | -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | 4 | def test_main(capsys): 5 | proc = subprocess.Popen( 6 | ["python", "-m", "kedro_diff"], stderr=subprocess.PIPE, stdout=subprocess.PIPE 7 | ) 8 | proc.wait() 9 | stdout = "".join([str(line) for line in proc.stdout.readlines()]) 10 | stderr = "".join([str(line) for line in proc.stderr.readlines()]) 11 | assert stderr == "" 12 | assert_contains = [ 13 | "KedroDiff Examples", 14 | "KedroDiff.stat()", 15 | "__default__", 16 | "+++", 17 | "---", 18 | "M", 19 | ] 20 | for check_contains in assert_contains: 21 | assert check_contains in stdout 22 | -------------------------------------------------------------------------------- /tests/test_node_diff.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | 3 | import pytest 4 | from kedro.pipeline.node import node 5 | 6 | from kedro_diff.node_diff import NodeDiff 7 | 8 | datasets = ["cars", "trains", None] 9 | names = ["node1", "node2", None] 10 | 11 | nodes = [ 12 | {"name": name, "inputs": inputs, "outputs": outputs, "func": lambda x: x} 13 | for name, inputs, outputs in product(names, datasets, datasets) 14 | ] 15 | 16 | 17 | changed_nodes = [ 18 | {"node1": node1, "node2": node2, "expected": (node1 != node2)} 19 | for node1, node2 in product(nodes, nodes) 20 | ] 21 | 22 | 23 | @pytest.mark.parametrize( 24 | "runargs", 25 | [ 26 | { 27 | "node1": {"name": "node1", "inputs": None, "outputs": None}, 28 | "node2": None, 29 | "expected": True, 30 | }, 31 | { 32 | "node1": None, 33 | "node2": {"name": "node1", "inputs": None, "outputs": None}, 34 | "expected": True, 35 | }, 36 | *changed_nodes, 37 | ], 38 | ) 39 | def test_is_changed(runargs): 40 | def run(node1, node2, expected): 41 | assert ( 42 | NodeDiff(node1, node2).is_changed == expected 43 | ), f"node1:{node1}, node2: {node1}, expected: {expected}" 44 | 45 | run(**runargs) 46 | 47 | 48 | def test_is_none_true(): 49 | assert NodeDiff(None, None).is_none is True 50 | 51 | 52 | def test_is_none_false(): 53 | assert NodeDiff(None, nodes[0]).is_none is False 54 | 55 | 56 | def test_is_new_true(): 57 | assert NodeDiff(None, nodes[0]).is_new is True 58 | 59 | 60 | def test_is_new_false(): 61 | assert NodeDiff(nodes[0], nodes[0]).is_new is False 62 | assert NodeDiff(nodes[0], None).is_new is False 63 | 64 | 65 | def test_is_deleted_true(): 66 | assert NodeDiff(nodes[0], None).is_deleted is True 67 | 68 | 69 | def test_is_deleted_false(): 70 | assert NodeDiff(nodes[0], nodes[0]).is_new is False 71 | assert NodeDiff(None, nodes[0]).is_deleted is False 72 | 73 | 74 | @pytest.mark.parametrize( 75 | "runargs", 76 | [ 77 | { 78 | "node1": nodes[0], 79 | "node2": nodes[0], 80 | "expected": {"inputs", "outputs", "name", "func"}, 81 | }, 82 | { 83 | "node1": None, 84 | "node2": nodes[0], 85 | "expected": {"inputs", "outputs", "name", "func"}, 86 | }, 87 | { 88 | "node1": nodes[0], 89 | "node2": None, 90 | "expected": {"inputs", "outputs", "name", "func"}, 91 | }, 92 | { 93 | "node1": {"func": lambda x: x}, 94 | "node2": None, 95 | "expected": {"func"}, 96 | }, 97 | ], 98 | ) 99 | def test_attrs(runargs): 100 | def run(node1, node2, expected): 101 | assert set(NodeDiff(node1, node2).attrs) == expected 102 | 103 | run(**runargs) 104 | 105 | 106 | @pytest.mark.parametrize( 107 | "runargs", 108 | [ 109 | { 110 | "node1": nodes[0], 111 | "node2": nodes[0], 112 | "attr": "name", 113 | "expected": ( 114 | nodes[0]["name"], 115 | nodes[0]["name"], 116 | ), 117 | }, 118 | { 119 | "node1": nodes[0], 120 | "node2": nodes[0], 121 | "attr": "not_here", 122 | "expected": ( 123 | None, 124 | None, 125 | ), 126 | }, 127 | { 128 | "node1": None, 129 | "node2": nodes[0], 130 | "attr": "name", 131 | "expected": ( 132 | None, 133 | nodes[0]["name"], 134 | ), 135 | }, 136 | { 137 | "node1": nodes[0], 138 | "node2": None, 139 | "attr": "name", 140 | "expected": ( 141 | nodes[0]["name"], 142 | None, 143 | ), 144 | }, 145 | { 146 | "node1": {"func": lambda x: x}, 147 | "node2": None, 148 | "attr": "func", 149 | "expected": (None, None), 150 | }, 151 | ], 152 | ) 153 | def test_get_attr(runargs): 154 | def run(node1, node2, attr, expected): 155 | assert NodeDiff(node1, node2).get_attr(attr) == expected 156 | 157 | run(**runargs) 158 | 159 | 160 | @pytest.mark.parametrize( 161 | "runargs", 162 | [ 163 | { 164 | "node1": nodes[0], 165 | "node2": nodes[0], 166 | "attr": "name", 167 | }, 168 | { 169 | "node1": nodes[0], 170 | "node2": nodes[0], 171 | "attr": "inputs", 172 | }, 173 | { 174 | "node1": nodes[0], 175 | "node2": nodes[0], 176 | "attr": "outputs", 177 | }, 178 | { 179 | "node1": nodes[0], 180 | "node2": nodes[0], 181 | "attr": "func", 182 | }, 183 | ], 184 | ) 185 | def test_diff_attr_none(runargs, caplog, capsys): 186 | def run(node1, node2, attr): 187 | NodeDiff(node1, node2).diff_attr(attr) 188 | assert caplog.text == "" 189 | assert capsys.readouterr().out == "" 190 | 191 | run(**runargs) 192 | 193 | 194 | @pytest.mark.parametrize( 195 | "runargs", 196 | [ 197 | { 198 | "node1": { 199 | "name": "node1", 200 | "inputs": "cars", 201 | "outputs": "carsout", 202 | "func": lambda x: x, 203 | }, 204 | "node2": { 205 | "name": "node1", 206 | "inputs": "cars", 207 | "outputs": "carsout1", 208 | "func": lambda x: x, 209 | }, 210 | "expected_ins": ["node1", "outputs", "carsout", "carsout1"], 211 | }, 212 | ], 213 | ) 214 | def test_diff(runargs, caplog, capsys): 215 | def run(node1, node2, expected_ins): 216 | NodeDiff(node1, node2, "node1").diff() 217 | assert caplog.text == "" 218 | log = capsys.readouterr().out 219 | for expected in expected_ins: 220 | assert expected in log 221 | 222 | run(**runargs) 223 | 224 | 225 | def test_diff_none_none(caplog, capsys): 226 | NodeDiff(None, None, "node1").diff() 227 | assert caplog.text == "" 228 | log = capsys.readouterr().out 229 | assert "" in log 230 | 231 | 232 | def test_diff_none_none_verbose(caplog, capsys): 233 | NodeDiff(None, None, "node1", verbose_level=2).diff() 234 | assert caplog.text == "" 235 | log = capsys.readouterr().out 236 | assert "is None" in log 237 | 238 | 239 | def test_diff_Node_equal(caplog, capsys): 240 | 241 | node1 = node(lambda x: x, "input", "output", name="id") 242 | node2 = node(lambda x: x, "input", "output", name="id") 243 | NodeDiff(node1, node2, "node1").diff() 244 | assert caplog.text == "" 245 | log = capsys.readouterr().out 246 | assert "unchanged" not in log 247 | assert log == "" 248 | 249 | 250 | def test_diff_Node_equal_verbose(caplog, capsys): 251 | 252 | node1 = node(lambda x: x, "input", "output", name="id") 253 | node2 = node(lambda x: x, "input", "output", name="id") 254 | NodeDiff(node1, node2, "node1", verbose_level=2).diff() 255 | assert caplog.text == "" 256 | log = capsys.readouterr().out 257 | assert "unchanged" in log 258 | 259 | 260 | def test_attrs_none_none(): 261 | assert NodeDiff(None, None, "node1").attrs == [] 262 | 263 | 264 | def test_diff_Node_attrs(caplog, capsys): 265 | 266 | node1 = node(lambda x: x, "input", "output", name="id") 267 | node2 = node(lambda x: x, "input", "output", name="id") 268 | expecteds = ["func", "inputs", "outputs"] 269 | for expected in expecteds: 270 | assert expected in NodeDiff(node1, node2, "node1").attrs 271 | assert caplog.text == "" 272 | log = capsys.readouterr().out 273 | assert "unchanged" not in log 274 | assert log == "" 275 | -------------------------------------------------------------------------------- /tests/test_node_diff_main.py: -------------------------------------------------------------------------------- 1 | should_prints = [ 2 | "name_changed", 3 | "input_changed", 4 | "output_changed", 5 | "new_node", 6 | "deleted_node", 7 | "inputs", 8 | "outputs", 9 | ] 10 | 11 | 12 | def test_node_diff_main(capsys, caplog): 13 | from kedro_diff.node_diff import __main__ # noqa F401 14 | 15 | assert caplog.text == "" 16 | captured = capsys.readouterr() 17 | for should_print in should_prints: 18 | assert should_print in captured.out 19 | --------------------------------------------------------------------------------