├── .github ├── dependabot.yml └── workflows │ ├── artifacts.yml │ ├── check_version.yml │ ├── pip-audit.yml │ ├── publish_docs.yml │ ├── pythonpackage.yml │ └── pythonpublish.yml ├── .gitignore ├── CITATION.cff ├── CODEOWNERS ├── CODE_OF_CONDUCT.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── bindist ├── .gitignore ├── Makefile └── graphtage_bin.py ├── docs ├── .gitignore ├── Makefile ├── _static │ └── localtoc.js ├── _templates │ ├── layout.html │ └── searchbox.html ├── build_api.py ├── builders.rst ├── conf.py ├── example.png ├── extending.rst ├── filetypes.rst ├── howitworks.rst ├── index.rst ├── library.rst └── printing.rst ├── graphtage ├── __init__.py ├── __main__.py ├── ast.py ├── bounds.py ├── builder.py ├── constraints.py ├── csv.py ├── dataclasses.py ├── debug.py ├── edits.py ├── expressions.py ├── fibonacci.py ├── formatter.py ├── graphtage.py ├── json.py ├── levenshtein.py ├── matching.py ├── multiset.py ├── object_set.py ├── pickle.py ├── plist.py ├── printer.py ├── progress.py ├── pydiff.py ├── search.py ├── sequences.py ├── tree.py ├── utils.py ├── version.py ├── xml.py └── yaml.py ├── setup.py └── test ├── __init__.py ├── test_bounds.py ├── test_builder.py ├── test_constraints.py ├── test_dataclasses.py ├── test_expressions.py ├── test_fibonacci.py ├── test_formatting.py ├── test_graphtage.py ├── test_levenshtein.py ├── test_matching.py ├── test_object_set.py ├── test_pydiff.py ├── test_search.py ├── test_timing.py ├── test_utils.py ├── test_xml.py └── timing.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | updates: 4 | - package-ecosystem: github-actions 5 | directory: / 6 | schedule: 7 | interval: daily 8 | -------------------------------------------------------------------------------- /.github/workflows/artifacts.yml: -------------------------------------------------------------------------------- 1 | name: Build binary artifacts 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v*" 7 | 8 | jobs: 9 | binaries: 10 | 11 | strategy: 12 | matrix: 13 | os: [ ubuntu-latest, macos-latest ] # windows-latest, 14 | 15 | runs-on: ${{ matrix.os }} 16 | 17 | permissions: 18 | # NOTE: Needed to save artifacts the repository. 19 | contents: write 20 | 21 | steps: 22 | - uses: actions/checkout@v4 23 | with: 24 | submodules: recursive 25 | - name: Set up Python 26 | uses: actions/setup-python@v5 27 | with: 28 | python-version: '3.x' 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install setuptools pyinstaller 33 | pip install . 34 | - name: Build the binary 35 | run: | 36 | make -C bindist 37 | cd bindist && echo "DIST_FILE=`make dist-name | tr -d '\n'`" >> $GITHUB_ENV 38 | - name: Release binary artifacts 39 | uses: softprops/action-gh-release@v0.1.15 40 | with: 41 | files: bindist/${{ env.DIST_FILE }} 42 | -------------------------------------------------------------------------------- /.github/workflows/check_version.yml: -------------------------------------------------------------------------------- 1 | name: Check Release Version 2 | 3 | on: 4 | release: 5 | types: [created, edited, published] 6 | 7 | jobs: 8 | versioncheck: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Set up Python 14 | uses: actions/setup-python@v5 15 | with: 16 | python-version: '3.x' 17 | - name: Install dependencies 18 | run: | 19 | python -m pip install --upgrade pip 20 | pip install setuptools 21 | pip install . 22 | - name: Ensure graphtage.version.DEV_BUILD == False 23 | run: graphtage -dumpversion | grep -qv git 24 | -------------------------------------------------------------------------------- /.github/workflows/pip-audit.yml: -------------------------------------------------------------------------------- 1 | # IMPORTANT: Read and understand this template fully before applying it. 2 | 3 | name: Scan dependencies for vulnerabilities with pip-audit 4 | 5 | on: 6 | push: 7 | branches: [ "master" ] 8 | pull_request: 9 | branches: [ "master" ] 10 | schedule: 11 | - cron: "0 12 * * *" 12 | 13 | jobs: 14 | pip-audit: 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: Checkout repository 19 | uses: actions/checkout@v4 20 | 21 | - name: Install Python 22 | uses: actions/setup-python@v5 23 | with: 24 | # IMPORTANT: You may need a more specific version here. 25 | python-version: "3.x" 26 | 27 | - name: Install project 28 | run: | 29 | python -m venv /tmp/pip-audit-env 30 | source /tmp/pip-audit-env/bin/activate 31 | 32 | python -m pip install --upgrade pip setuptools wheel 33 | python -m pip install . 34 | 35 | 36 | - name: Run pip-audit 37 | uses: pypa/gh-action-pip-audit@v1.0.8 38 | with: 39 | virtual-environment: /tmp/pip-audit-env 40 | 41 | -------------------------------------------------------------------------------- /.github/workflows/publish_docs.yml: -------------------------------------------------------------------------------- 1 | name: Publish documentation 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | tags: 8 | - v* 9 | 10 | jobs: 11 | deploydocs: 12 | runs-on: ubuntu-latest 13 | permissions: 14 | # NOTE: Needed to push to the repository. 15 | contents: write 16 | steps: 17 | - uses: actions/checkout@v4 18 | with: 19 | path: graphtage 20 | - name: Get the version 21 | id: get_version 22 | run: echo "::set-env name=VERSION::${GITHUB_REF#refs/*/}" 23 | env: 24 | # The use of ::set-env here is safe! 25 | ACTIONS_ALLOW_UNSECURE_COMMANDS: 'true' 26 | - name: Set up Python 3.8 27 | uses: actions/setup-python@v5 28 | with: 29 | python-version: 3.8 30 | - name: Install dependencies 31 | run: | 32 | cd graphtage 33 | python -m pip install --upgrade pip 34 | pip install setuptools 35 | pip install .[dev] 36 | - name: Build documentation 37 | run: | 38 | cd graphtage/docs 39 | make html 40 | - name: Checkout gh-pages branch 41 | uses: actions/checkout@v4 42 | with: 43 | ref: gh-pages 44 | path: gh-pages 45 | fetch-depth: 0 46 | - name: Commit documentation changes 47 | run: | 48 | cd gh-pages 49 | git pull 50 | rm -rf ${VERSION} 51 | mkdir ${VERSION} 52 | cp -r ../graphtage/docs/_build/html/* ${VERSION}/ 53 | cd ${VERSION} 54 | git config --local user.email "action@github.com" 55 | git config --local user.name "GitHub Action" 56 | git add . 57 | if [ "$GITHUB_REF" == "refs/heads/master" ]; then 58 | cd .. 59 | # This is not tag, so it is the latest: 60 | rm -f latest 61 | ln -s ${VERSION} latest 62 | git add latest 63 | fi 64 | git commit -m "Update documentation for ${GITHUB_REF}" -a || true 65 | # The above command will fail if no changes were present, so we ignore 66 | # the return code. 67 | - name: Push changes 68 | uses: ad-m/github-push-action@master 69 | with: 70 | branch: gh-pages 71 | directory: gh-pages 72 | github_token: ${{ secrets.GITHUB_TOKEN }} 73 | -------------------------------------------------------------------------------- /.github/workflows/pythonpackage.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | 14 | build: 15 | 16 | runs-on: ubuntu-latest 17 | strategy: 18 | matrix: 19 | python-version: [3.8, 3.9, "3.10", "3.11"] 20 | 21 | steps: 22 | - uses: actions/checkout@v4 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v5 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install setuptools 31 | pip install .[dev] 32 | - name: Lint with flake8 33 | run: | 34 | pip install flake8 35 | # stop the build if there are Python syntax errors or undefined names 36 | flake8 graphtage test --count --select=E9,F63,F7,F82 --show-source --statistics 37 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 38 | flake8 graphtage test --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 39 | - name: Test building documentation 40 | run: | 41 | cd docs 42 | make html 43 | - name: Test with pytest 44 | run: | 45 | pip install pytest 46 | pytest 47 | -------------------------------------------------------------------------------- /.github/workflows/pythonpublish.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [published] 9 | 10 | jobs: 11 | 12 | deploy: 13 | 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/checkout@v4 18 | - name: Set up Python 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: '3.x' 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install --upgrade pip 25 | pip install setuptools wheel twine 26 | - name: Build and publish 27 | env: 28 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 29 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 30 | run: | 31 | python setup.py sdist bdist_wheel 32 | twine upload dist/* 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .cache 3 | .python_history 4 | *.pyc 5 | build/ 6 | dist/ 7 | graphtage.egg-info 8 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # This CITATION.cff file was generated with cffinit. 2 | # Visit https://bit.ly/cffinit to generate yours today! 3 | 4 | cff-version: 1.2.0 5 | title: Graphtage 6 | message: >- 7 | Graphtage is a command-line utility and underlying library 8 | for semantically comparing and merging tree-like 9 | structures, such as JSON, XML, HTML, YAML, plist, and CSS 10 | files. 11 | type: software 12 | authors: 13 | - given-names: Evan 14 | family-names: Sultanik 15 | email: evan.sultanik@trailofbits.com 16 | affiliation: Trail of Bits 17 | orcid: 'https://orcid.org/0000-0002-6246-1422' 18 | repository-code: 'https://github.com/trailofbits/graphtage' 19 | url: 'https://trailofbits.github.io/graphtage/' 20 | abstract: >- 21 | Graphtage is a command-line utility and underlying library 22 | for semantically comparing and merging tree-like 23 | structures, such as JSON, XML, HTML, YAML, plist, and CSS 24 | files. Its name is a portmanteau of “graph” and 25 | “graftage”—the latter being the horticultural practice of 26 | joining two trees together such that they grow as one. 27 | keywords: 28 | - diffing 29 | - graph isomorphism 30 | - edit distance 31 | license: LGPL-3.0 32 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @ESultanik 2 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at jean.bisutti@gmail.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | recursive-include test *.* 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Graphtage 2 | 3 | [![PyPI version](https://badge.fury.io/py/graphtage.svg)](https://badge.fury.io/py/graphtage) 4 | [![Tests](https://github.com/trailofbits/graphtage/workflows/Python%20package/badge.svg)](https://github.com/trailofbits/graphtage/actions) 5 | [![Slack Status](https://slack.empirehacking.nyc/badge.svg)](https://slack.empirehacking.nyc) 6 | 7 | Graphtage is a command-line utility and [underlying library](https://trailofbits.github.io/graphtage/latest/library.html) 8 | for semantically comparing and merging tree-like structures, such as JSON, XML, HTML, YAML, plist, and CSS files. Its name is a 9 | portmanteau of “graph” and “graftage”—the latter being the horticultural practice of joining two trees together such 10 | that they grow as one. 11 | 12 | ```console 13 | $ echo Original: && cat original.json && echo Modified: && cat modified.json 14 | ``` 15 | ```json 16 | Original: 17 | { 18 | "foo": [1, 2, 3, 4], 19 | "bar": "testing" 20 | } 21 | Modified: 22 | { 23 | "foo": [2, 3, 4, 5], 24 | "zab": "testing", 25 | "woo": ["foobar"] 26 | } 27 | ``` 28 | ```console 29 | $ graphtage original.json modified.json 30 | ``` 31 | ```json 32 | { 33 | "z̟b̶ab̟r̶": "testing", 34 | "foo": [ 35 | 1̶,̶ 36 | 2, 37 | 3, 38 | 4,̟ 39 | 5̟ 40 | ],̟ 41 | "̟w̟o̟o̟"̟:̟ ̟[̟ 42 | "̟f̟o̟o̟b̟a̟r̟"̟ 43 | ]̟ 44 | } 45 | ``` 46 | 47 | ## Installation 48 | 49 | ```console 50 | $ pip3 install graphtage 51 | ``` 52 | 53 | ## Command Line Usage 54 | 55 | ### Output Formatting 56 | Graphtage performs an analysis on an intermediate representation of the trees that is divorced from the filetypes of the 57 | input files. This means, for example, that you can diff a JSON file against a YAML file. Also, the output format can be 58 | different from the input format(s). By default, Graphtage will format the output diff in the same file format as the 59 | first input file. But one could, for example, diff two JSON files and format the output in YAML. There are several 60 | command-line arguments to specify these transformations, such as `--format`; please check the `--help` output for more 61 | information. 62 | 63 | By default, Graphtage pretty-prints its output with as many line breaks and indents as possible. 64 | ```json 65 | { 66 | "foo": [ 67 | 1, 68 | 2, 69 | 3 70 | ], 71 | "bar": "baz" 72 | } 73 | ``` 74 | Use the `--join-lists` or `-jl` option to suppress linebreaks after list items: 75 | ```json 76 | { 77 | "foo": [1, 2, 3], 78 | "bar": "baz" 79 | } 80 | ``` 81 | Likewise, use the `--join-dict-items` or `-jd` option to suppress linebreaks after key/value pairs in a dict: 82 | ```json 83 | {"foo": [ 84 | 1, 85 | 2, 86 | 3 87 | ], "bar": "baz"} 88 | ``` 89 | Use `--condensed` or `-j` to apply both of these options: 90 | ```json 91 | {"foo": [1, 2, 3], "bar": "baz"} 92 | ``` 93 | 94 | The `--only-edits` or `-e` option will print out a list of edits rather than applying them to the input file in place. 95 | 96 | The `--edit-digest` or `-d` option is like `--only-edits` but prints a more concise context for each edit that is more 97 | human-readable. 98 | 99 | ### Matching Options 100 | By default, Graphtage tries to match all possible pairs of elements in a dictionary. 101 | 102 | Matching two dictionaries with each other is hard. Although computationally tractable, this can sometimes be onerous for 103 | input files with huge dictionaries. Graphtage has three different strategies for matching dictionaries: 104 | 1. `--dict-strategy match` (the most computationally expensive) tries to match all pairs of keys and values between the 105 | two dictionaries, resulting in a match of minimum edit distance; 106 | 2. `--dict-strategy none` (the least computationally expensive) will not attempt to match any key/value pairs unless 107 | they have the exact same key; and 108 | 3. `--dict-strategy auto` (the default) will automatically match the values of any key-value pairs that have identical 109 | keys and then use the `match` strategy for the remainder of key/value pairs. 110 | 111 | See [Pull Request #51](https://github.com/trailofbits/graphtage/pull/51) for some examples of how these strategies 112 | affect output. 113 | 114 | The `--no-list-edits` or `-l` option will not consider interstitial insertions and removals when comparing two lists. 115 | The `--no-list-edits-when-same-length` or `-ll` option is a less drastic version of `-l` that will behave normally for 116 | lists that are of different lengths but behave like `-l` for lists that are of the same length. 117 | 118 | ### ANSI Color 119 | By default, Graphtage will only use ANSI color in its output if it is run from a TTY. If, for example, you would like 120 | to have Graphtage emit colorized output from a script or pipe, use the `--color` or `-c` argument. To disable color even 121 | when running on a TTY, use `--no-color`. 122 | 123 | ### HTML Output 124 | Graphtage can optionally emit the diff in HTML with the `--html` option. 125 | ```console 126 | $ graphtage --html original.json modified.json > diff.html 127 | ``` 128 | 129 | ### Status and Logging 130 | By default, Graphtage prints status messages and a progress bar to STDERR. To suppress this, use the `--no-status` 131 | option. To additionally suppress all but critical log messages, use `--quiet`. Fine-grained control of log messages is 132 | via the `--log-level` option. 133 | 134 | ## Why does Graphtage exist? 135 | 136 | Diffing tree-like structures with unordered elements is tough. Say you want to compare two JSON files. 137 | There are [limited tools available](https://github.com/zgrossbart/jdd), which are effectively equivalent to 138 | canonicalizing the JSON (_e.g._, sorting dictionary elements by key) and performing a standard diff. This is not always 139 | sufficient. For example, if a key in a dictionary is changed but its value is not, a traditional diff 140 | will conclude that the entire key/value pair was replaced by the new one, even though the only change was the key 141 | itself. See [our documentation](https://trailofbits.github.io/graphtage/latest/howitworks.html) for more information. 142 | 143 | ## Using Graphtage as a Library 144 | 145 | Graphtage has a complete API for programmatically operating its diffing capabilities. 146 | When using Graphtage as a library, it is also capable of diffing in-memory Python objects. 147 | This can be useful for debugging Python code, for example, to determine a differential between two objects. 148 | See [our documentation](https://trailofbits.github.io/graphtage/latest/library.html) for more information. 149 | 150 | ## Extending Graphtage 151 | 152 | Graphtage is designed to be extensible: New filetypes can easily be defined, as well as new node types, edit types, 153 | formatters, and printers. See [our documentation](https://trailofbits.github.io/graphtage/latest/extending.html) for 154 | more information. 155 | 156 | Complete API documentation is available [here](https://trailofbits.github.io/graphtage/latest/package.html). 157 | 158 | ## License and Acknowledgements 159 | 160 | This research was developed by [Trail of Bits](https://www.trailofbits.com/) with partial funding from the Defense 161 | Advanced Research Projects Agency (DARPA) under the SafeDocs program as a subcontractor to [Galois](https://galois.com). 162 | It is licensed under the [GNU Lesser General Public License v3.0](LICENSE). 163 | [Contact us](mailto:opensource@trailofbits.com) if you're looking for an exception to the terms. 164 | © 2020–2023, Trail of Bits. 165 | -------------------------------------------------------------------------------- /bindist/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | dist 3 | graphtage.spec 4 | graphtage-*.tgz -------------------------------------------------------------------------------- /bindist/Makefile: -------------------------------------------------------------------------------- 1 | GRAPHTAGE_VERSION=$(shell graphtage --version 2>&1 | sed "s/Graphtage version //") 2 | DIST_VERSION=$(shell uname | tr '[:upper:]' '[:lower:]')-$(shell uname -m | tr '[:upper:]' '[:lower:]') 3 | DIST_NAME=graphtage-$(GRAPHTAGE_VERSION)-$(DIST_VERSION) 4 | DIST_FILE=$(DIST_NAME).zip 5 | 6 | .PHONY: $(DIST_FILE) 7 | $(DIST_FILE): 8 | pyinstaller -F -y --name graphtage graphtage_bin.py 9 | @rm -rf $(DIST_NAME) 10 | mkdir $(DIST_NAME) 11 | cp dist/graphtage $(DIST_NAME)/ 12 | cp -p ../README.md $(DIST_NAME)/ 13 | cp -p ../LICENSE $(DIST_NAME)/ 14 | zip -r $(DIST_FILE) $(DIST_NAME) 15 | rm -rf $(DIST_NAME) 16 | 17 | .PHONY: dist-name 18 | dist-name: 19 | @echo $(DIST_FILE) 20 | 21 | .PHONY: clean 22 | clean: 23 | rm -rf graphtage.spec dist build $(DIST_FILE) $(DIST_NAME) 24 | -------------------------------------------------------------------------------- /bindist/graphtage_bin.py: -------------------------------------------------------------------------------- 1 | from graphtage.__main__ import main 2 | 3 | if __name__ == "__main__": 4 | import sys 5 | sys.exit(main()) 6 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | graphtage*.rst 3 | package.rst 4 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile graphtage.rst package.rst 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | 22 | .PHONY: graphtage.rst 23 | graphtage.rst package.rst: 24 | # sphinx-apidoc wasn't configurable enough, so I wrote my own version: 25 | python3 build_api.py 26 | # graphtage.py, edits.py, and tree.py are all merged into the main graphtage module by __init__.py, 27 | # so we should not generate separate submodules for them: 28 | #sphinx-apidoc -f -e -M -T -o . ../graphtage ../graphtage/graphtage.py ../graphtage/edits.py ../graphtage/tree.py 29 | 30 | .PHONY: clean 31 | clean: 32 | rm -rf _build graphtage*.rst package.rst 33 | -------------------------------------------------------------------------------- /docs/_static/localtoc.js: -------------------------------------------------------------------------------- 1 | $( document ).ready(function (){ 2 | 3 | var createList = function(selector){ 4 | 5 | var ul = $(''); 6 | var selected = $(selector); 7 | 8 | if (selected.length === 0){ 9 | return; 10 | } 11 | 12 | selected.clone().each(function (i,e){ 13 | 14 | var p = $(e).children('.descclassname'); 15 | var n = $(e).children('.descname'); 16 | var l = $(e).children('.headerlink'); 17 | 18 | var a = $(''); 19 | a.attr('href',l.attr('href')).attr('title', 'Link to this definition'); 20 | 21 | a.append(p).append(n); 22 | 23 | var entry = $('
  • ').append(a); 24 | ul.append(entry); 25 | }); 26 | return ul; 27 | } 28 | 29 | if($('dl.class > dt').length || $('dl.function > dt').length || $('dl.data > dt').length) { 30 | /* collapse any open menus */ 31 | var menu = $('.wy-menu ul:first'); 32 | menu.find('.current').removeClass("current"); 33 | 34 | var pagename = $("h1")[0].innerText; 35 | 36 | if(pagename === "graphtage package") { 37 | pagename = "graphtage module"; 38 | } 39 | 40 | var header = $('
  • ' + pagename + '
  • ') 41 | var ul = $(''); 42 | header.append(ul); 43 | 44 | menu.find('ul:first').prepend(header); 45 | 46 | var x = []; 47 | x.push(['Classes','dl.class > dt']); 48 | x.push(['Functions','dl.function > dt']); 49 | x.push(['Variables','dl.data > dt']); 50 | 51 | var first = true; 52 | 53 | x.forEach(function (e) { 54 | var l = createList(e[1]); 55 | if (l) { 56 | var li = $('
  • ' + e[0] + '
  • ') 57 | if(first) { 58 | li.addClass("current"); 59 | first = false; 60 | } 61 | li.append(l); 62 | ul.append(li); 63 | } 64 | }); 65 | } 66 | 67 | }); 68 | -------------------------------------------------------------------------------- /docs/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {%- extends "!layout.html" %} 2 | 3 | {% block footer %} 4 | {% if not READTHEDOCS %} 5 |
    6 | 7 | Graphtage Documentation 8 | {{ version }} 9 | 10 | 11 |
    12 |
    13 |
    {{ _('Versions') }}
    14 | {% if test_versions %} 15 | {% for version in test_versions %} 16 |
    {{ version }}
    17 | {% endfor %} 18 | {% else %} 19 |
    latest
    20 |
    0.3.1
    21 |
    0.3.0
    22 |
    0.2.9
    23 |
    0.2.8
    24 |
    0.2.7
    25 |
    0.2.6
    26 |
    0.2.5
    27 |
    0.2.4
    28 |
    0.2.3
    29 |
    0.2.2
    30 |
    0.2.1
    31 |
    0.2.0
    32 |
    0.1.1
    33 |
    0.1.0
    34 | {% endif %} 35 |
    36 |
    37 |
    {{ _('Source Code') }}
    38 |
    39 | {{ _('GitHub Page') }} 40 |
    41 |
    42 |
    43 |
    44 | {% endif %} 45 | {% endblock %} -------------------------------------------------------------------------------- /docs/_templates/searchbox.html: -------------------------------------------------------------------------------- 1 | {%- if builder != 'singlehtml' %} 2 |
    {{ version }}
    3 |
    4 |
    5 | 6 | 7 | 8 |
    9 |
    10 | {%- endif %} 11 | -------------------------------------------------------------------------------- /docs/build_api.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import os 3 | import sys 4 | from pathlib import Path 5 | 6 | 7 | DOCS_PATH = os.path.dirname(os.path.realpath(__file__)) 8 | ROOT_PATH = Path(DOCS_PATH).parents[0] 9 | 10 | sys.path = [ROOT_PATH] + sys.path 11 | 12 | import graphtage 13 | 14 | MODULES = [] 15 | 16 | 17 | def process_module(module): 18 | shortname = module.__name__.split('.')[-1] 19 | with open(os.path.join(DOCS_PATH, f"{module.__name__}.rst"), 'w') as f: 20 | f.write(f"{module.__name__}\n") 21 | f.write(f"{'=' * len(module.__name__)}\n") 22 | f.write(f""" 23 | .. automodule:: {module.__name__} 24 | """) 25 | classes = [] 26 | for name, c in inspect.getmembers(module, inspect.isclass): 27 | if hasattr(c, '__module__') and c.__module__ == module.__name__ and not name.startswith('_'): 28 | classes.append(c) 29 | if classes: 30 | f.write(f""" 31 | {shortname} classes 32 | {'-' * len(shortname)}-------- 33 | """) 34 | for cls in sorted(classes, key=lambda c: c.__name__): 35 | f.write(f""" 36 | {cls.__name__} 37 | {'*' * len(cls.__name__)} 38 | 39 | .. autoclass:: {cls.__name__} 40 | :members: 41 | :undoc-members: 42 | :inherited-members: 43 | :show-inheritance: 44 | """) 45 | 46 | functions = [] 47 | for name, func in inspect.getmembers(module, inspect.isfunction): 48 | if hasattr(func, '__module__') and func.__module__ == module.__name__ and not name.startswith('_'): 49 | functions.append(func) 50 | if functions: 51 | f.write(f""" 52 | {shortname} functions 53 | {'-' * len(shortname)}---------- 54 | """) 55 | for func in sorted(functions, key=lambda o: o.__name__): 56 | f.write(f""" 57 | {func.__name__} 58 | {'*' * len(func.__name__)} 59 | 60 | .. autofunction:: {func.__name__} 61 | """) 62 | 63 | # attrs = [] 64 | # for name in dir(module): 65 | # if name.startswith('_'): 66 | # continue 67 | # attr = getattr(module, name) 68 | # if not inspect.isfunction(attr) and not inspect.isclass(attr) and not inspect.ismodule(attr) and ( 69 | # not hasattr(attr, '__module__') or attr.__module__ == module.__name__ 70 | # ) and inspect.getattr_static(attr, '__doc__') is not None: 71 | # attrs.append(name) 72 | # if attrs: 73 | # f.write(f""" 74 | # {shortname} attributes 75 | # {'-' * len(shortname)}----------- 76 | # """) 77 | # for name in sorted(attrs): 78 | # f.write(f""" 79 | # {name} 80 | # {'*' * len(name)} 81 | # 82 | # .. autoattribute:: {name} 83 | # """) 84 | 85 | 86 | 87 | for name, obj in inspect.getmembers(graphtage, inspect.ismodule): 88 | if obj.__name__.startswith('graphtage') and name not in ('graphtage', 'tree', 'edits'): 89 | MODULES.append(obj) 90 | 91 | MODULES = [graphtage] + sorted(MODULES, key=lambda m: m.__name__) 92 | 93 | for m in MODULES: 94 | process_module(m) 95 | 96 | with open(os.path.join(DOCS_PATH, "package.rst"), 'w') as f: 97 | f.write("""Graphtage API 98 | ------------- 99 | 100 | .. toctree:: 101 | :maxdepth: 4 102 | 103 | """) 104 | f.write('\n'.join(f' {m.__name__}' for m in MODULES)) 105 | -------------------------------------------------------------------------------- /docs/builders.rst: -------------------------------------------------------------------------------- 1 | .. _Builders: 2 | 3 | Constructing Graphtage Trees 4 | ============================ 5 | 6 | Graphtage operates on trees represented by the :class:`graphtage.TreeNode` base class. 7 | There are various predefined specializations of tree nodes, such as :class:`graphtage.IntegerNode` for integers, :class:`graphtage.ListNode` for lists, and :class:`graphtage.DictNode` for dictionaries. :class:`graphtage.TreeNode` has an optional :attr:`parent ` and a potentially empty set of :func:`children `. 8 | 9 | Graphtage provides a :class:`graphtage.builder.Builder` class for conveniently converting arbitrary objects into a tree of :class:`TreeNode ` objects. It uses Python magic to define the conversions. 10 | 11 | .. code-block:: python 12 | 13 | from graphtage import IntegerNode, TreeNode 14 | from graphtage.builder import Builder 15 | 16 | class CustomBuilder(Builder): 17 | @Builder.builder(int) 18 | def build_int(self, node: int, children: list[TreeNode]): 19 | return IntegerNode(node) 20 | 21 | >>> CustomBuilder().build_tree(10) 22 | IntegerNode(10) 23 | 24 | The :func:`@Builder.builder(int) ` decorator specifies that the function is able to build a Graphtage `TreeNode` object from inputs that are :func:`instanceof` the type `int`. If there are multiple builder functions that match a given object, the function associated with the most specialized type is chosen. For example: 25 | 26 | .. code-block:: python 27 | 28 | class Foo: 29 | pass 30 | 31 | 32 | class Bar(Foo): 33 | pass 34 | 35 | 36 | class CustomBuilder(Builder): 37 | @Builder.builder(Foo) 38 | def build_foo(self, node: Foo, children: list[TreeNode]): 39 | return StringNode("foo") 40 | 41 | @Build.builder(Bar) 42 | def build_bar(self, node: Bar, children: list[TreeNode]): 43 | return StringNode("bar") 44 | 45 | >>> CustomBuilder().build_tree(Foo()) 46 | StringNode("foo") 47 | >>> CustomBuilder().build_tree(Bar()) 48 | StringNode("bar") 49 | 50 | Expanding Children 51 | ------------------ 52 | 53 | So far we have only given examples of the production of leaf nodes, like integers and strings. 54 | What if a node has children, like a list? We can handle this using the :func:`@Builder.expander ` decorator. Here is an example of how a list can be built: 55 | 56 | .. code-block:: python 57 | 58 | class CustomBuilder(Builder): 59 | ... 60 | 61 | @Builder.expander(list) 62 | def expand_list(self, node: list): 63 | """Returns an iterable over the node's children""" 64 | yield from node 65 | 66 | @Builder.builder(list) 67 | def build_list(self, node: list, children: list[TreeNode]): 68 | return ListNode(children) 69 | 70 | >>> CustomBuilder().build_tree([1, 2, 3, 4]) 71 | ListNode([IntegerNode(1), IntegerNode(2), IntegerNode(3), IntegerNode(4)]) 72 | 73 | If an expander is not defined for a type, it is assumed that the type is a leaf with no children. 74 | 75 | If the root node or one of its descendants is of a type that has no associated builder function, a :exc:`NotImplementedError` is raised. 76 | 77 | Graphtage has a subclassed builder :class:`graphtage.builder.BasicBuilder` that has builders and expanders for the Python basic types like :class:`int`, :class:`float`, :class:`str`, :class:`bytes`, :class:`list`, :class:`dict`, :class:`set`, and :class:`tuple`. You can extend :class:`graphtage.builder.BasicBuilder` to implement support for additional types. 78 | 79 | Custom Nodes 80 | ------------ 81 | 82 | Graphtage provides abstract classes like :class:`graphtage.ContainerNode` and :class:`graphtage.SequenceNode` to aid in the implementation of custom node types. But the easiest way to define a custom node type is to extend off of :class:`graphtage.dataclasses.DataClass`. 83 | 84 | 85 | .. code-block:: python 86 | 87 | from graphtage import IntegerNode, ListNode, StringNode 88 | from graphtage.dataclasses import DataClass 89 | 90 | class CustomNode(DataClass): 91 | name: StringNode 92 | value: IntegerNode 93 | attributes: ListNode 94 | 95 | This will automatically build a node type that has three children: a string, an integer, and a list. 96 | 97 | >>> CustomNode(name=StringNode("the name"), value=IntegerNode(1337), attributes=ListNode((IntegerNode(1), IntegerNode(2), IntegerNode(3)))) 98 | 99 | Let's say you have another, non-graphtage class that corresponds to :class:`CustomNode`: 100 | 101 | .. code-block:: python 102 | 103 | class NonGraphtageClass: 104 | name: str 105 | value: int 106 | attributes: list[int] 107 | 108 | You can add support for building Graphtage nodes from this custom class as follows: 109 | 110 | .. code-block:: python 111 | 112 | class CustomBuilder(BasicBuilder): 113 | @Builder.expander(NonGraphtageClass) 114 | def expand_non_graphtage_class(node: NonGraphtageClass): 115 | yield node.name 116 | yield node.value 117 | yield node.attributes 118 | 119 | @Builder.builder(NonGraphtageClass) 120 | def build_non_graphtage_class(node: NonGraphtageClass, children: List[TreeNode]) -> CustomNode: 121 | return CustomNode(*children) 122 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | 13 | import os 14 | from pathlib import Path 15 | 16 | VERSION_MODULE_PATH = os.path.join(Path(os.path.dirname(__file__)).parents[0], "graphtage", "version.py") 17 | 18 | 19 | def get_version_string(): 20 | attrs = {} 21 | with open(VERSION_MODULE_PATH) as f: 22 | exec(f.read(), attrs) 23 | vstring = attrs['VERSION_STRING'] 24 | if 'git' in vstring: 25 | return vstring 26 | else: 27 | return f"v{vstring}" 28 | 29 | 30 | # -- Project information ----------------------------------------------------- 31 | 32 | project = 'Graphtage' 33 | copyright = '2020, Trail of Bits' 34 | author = 'Evan Sultanik' 35 | 36 | # The full version, including alpha/beta/rc tags 37 | release = get_version_string() 38 | version = release 39 | github_url = 'https://github.com/trailofbits/graphtage/' 40 | if 'git' not in version: 41 | github_url = f"{github_url}releases/tag/{ version }" 42 | 43 | 44 | # -- General configuration --------------------------------------------------- 45 | 46 | # Add any Sphinx extension module names here, as strings. They can be 47 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 48 | # ones. 49 | extensions = [ 50 | 'sphinx.ext.autodoc', 51 | 'sphinx.ext.napoleon', 52 | 'sphinx.ext.intersphinx', 53 | 'sphinx.ext.todo', 54 | 'sphinx.ext.autosectionlabel', 55 | 'sphinx_rtd_theme', 56 | #'sphinxcontrib.fulltoc' 57 | ] 58 | 59 | # Add any paths that contain templates here, relative to this directory. 60 | templates_path = ['_templates'] 61 | 62 | # List of patterns, relative to source directory, that match files and 63 | # directories to ignore when looking for source files. 64 | # This pattern also affects html_static_path and html_extra_path. 65 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 66 | 67 | 68 | # -- Options for HTML output ------------------------------------------------- 69 | 70 | # The theme to use for HTML and HTML Help pages. See the documentation for 71 | # a list of builtin themes. 72 | # 73 | #html_theme = 'classic' 74 | html_theme = 'sphinx_rtd_theme' 75 | 76 | html_theme_options = { 77 | 'canonical_url': f'https://trailofbits.github.io/graphtage/latest/', 78 | 'logo_only': False, 79 | 'display_version': False, # This manually configured in our custom templates 80 | 'prev_next_buttons_location': 'bottom', 81 | 'style_external_links': True, 82 | #'vcs_pageview_mode': '', 83 | #'style_nav_header_background': 'white', 84 | # Toc options 85 | 'collapse_navigation': True, 86 | 'sticky_navigation': True, 87 | 'navigation_depth': 4, 88 | 'includehidden': True, 89 | 'titles_only': False 90 | } 91 | 92 | html_context = { 93 | 'github_url': github_url 94 | } 95 | 96 | # Add any paths that contain custom static files (such as style sheets) here, 97 | # relative to this directory. They are copied after the builtin static files, 98 | # so a file named "default.css" will overwrite the builtin "default.css". 99 | html_static_path = ['_static'] 100 | 101 | #html_js_files = [ 102 | # 'localtoc.js', 103 | #] 104 | 105 | 106 | def skip(app, what, name, obj, would_skip, options): 107 | if name == "__init__": 108 | return False 109 | return would_skip 110 | 111 | 112 | def docstring_callback(app, what, name, obj, options, lines: list): 113 | if what == 'class' or what == 'function': 114 | if lines and lines[0].strip(): 115 | lines.insert(1, '') 116 | lines.insert(2, name) 117 | lines.insert(3, '*' * len(name)) 118 | if len(lines) == 4: 119 | lines.append('') 120 | 121 | 122 | def setup(app): 123 | app.connect("autodoc-skip-member", skip) 124 | #app.connect('autodoc-process-docstring', docstring_callback) 125 | 126 | 127 | add_package_names = False 128 | # prefix each section label with the name of the document it is in, followed by a colon 129 | autosectionlabel_prefix_document = True 130 | intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} 131 | napoleon_include_private_with_doc = True 132 | napoleon_include_special_with_doc = True 133 | todo_include_todos = True 134 | 135 | #autodoc_default_options = { 136 | # 'inherited-members': True 137 | #} 138 | -------------------------------------------------------------------------------- /docs/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trailofbits/graphtage/23654acf488eb803a60ce27ac515ee0755feb1a7/docs/example.png -------------------------------------------------------------------------------- /docs/extending.rst: -------------------------------------------------------------------------------- 1 | Extending Graphtage 2 | =================== 3 | 4 | Graphtage is designed to be extensible; new filetypes can easily be defined, as well as new node types, edit types, 5 | formatters, and printers. This section will give some examples on how to implement each. 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | builders 11 | filetypes 12 | printing 13 | -------------------------------------------------------------------------------- /docs/filetypes.rst: -------------------------------------------------------------------------------- 1 | .. _Filetypes: 2 | 3 | Defining New Filetypes 4 | ====================== 5 | 6 | Implementing support for a new Graphtage filetype entails extending the :class:`graphtage.Filetype` class. Subclassing :class:`graphtage.Filetype` automatically registers it with Graphtage. 7 | 8 | Filetype Matching 9 | ----------------- 10 | 11 | Input files are matched to an associated :class:`graphtage.Filetype` using MIME types. Each :class:`graphtage.Filetype` registers one or more MIME types for which it will be responsible. Input file MIME types are classified using the :mod:`mimetypes` module. Sometimes a filetype does not have a standardized MIME type or is not properly classified by the :mod:`mimetypes` module. For example, Graphtage's :class:`graphtage.pickle.Pickle` filetype has neither. You can add support for such a filetype as follows: 12 | 13 | .. code-block:: python 14 | 15 | import mimetypes 16 | 17 | if '.pkl' not in mimetypes.types_map and '.pickle' not in mimetypes.types_map: 18 | mimetypes.add_type('application/x-python-pickle', '.pkl') 19 | mimetypes.suffix_map['.pickle'] = '.pkl' 20 | 21 | Implementing a New Filetype 22 | --------------------------- 23 | 24 | With the MIME type registered, here is a sketch of how one might define the Pickle filetype: 25 | 26 | .. code-block:: python 27 | 28 | from graphtage import BuildOptions, Filetype, Formatter, TreeNode 29 | 30 | class Pickle(Filetype): 31 | def __init__(self): 32 | super().__init__( 33 | "pickle", # a unique identifier 34 | "application/python-pickle", # the primary MIME type 35 | "application/x-python-pickle" # an optional secondary MIME type 36 | ) 37 | 38 | def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode: 39 | # return the root node of the tree built from the given pickle file 40 | 41 | def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]: 42 | # the same as the build_tree() function, 43 | # but on error return a string containing the error message 44 | # 45 | # for example: 46 | try: 47 | return self.build_tree(path=path, options=options) 48 | except PickleDecodeError as e: 49 | return f"Error deserializing {os.path.basename(path)}: {e!s}" 50 | 51 | def get_default_formatter(self) -> GraphtageFormatter: 52 | # return the formatter associated with this file type 53 | -------------------------------------------------------------------------------- /docs/howitworks.rst: -------------------------------------------------------------------------------- 1 | How Graphtage Works 2 | =================== 3 | 4 | In general, optimally mapping one graph to another 5 | cannot be executed in polynomial time [#]_, and is therefore not 6 | tractable for graphs of any useful size [*]_. This is true even for restricted classes of graphs like DAGs [#]_. 7 | However, trees and forests are a special case that *can* be mapped in polynomial time, with reasonable constraints on 8 | the types of edits possible. Graphtage exploits this. 9 | 10 | Why Mapping Trees is Complex 11 | ---------------------------- 12 | 13 | Ordered nodes in the tree (*e.g.*, JSON lists) and, in particular, mappings (*e.g.*, JSON dicts) are challenging. Most 14 | extant diffing algorithms and utilities assume that the structures are ordered. Take this JSON as an example: 15 | 16 | .. list-table:: 17 | :class: align-center 18 | 19 | * - Original JSON 20 | - Modified JSON 21 | * - .. code-block:: json 22 | 23 | { 24 | "foo": [1, 2, 3, 4], 25 | "bar": "testing" 26 | } 27 | 28 | - .. code-block:: json 29 | 30 | { 31 | "foo": [2, 3, 4, 5], 32 | "zab": "testing", 33 | "woo": ["foobar"] 34 | } 35 | 36 | Existing tools effectively canonicalize the JSON (*e.g.*, sort dictionary elements by key and format lists with one 37 | item per line), and then perform a traditional diff: 38 | 39 | .. code-block:: console 40 | 41 | $ cat original.json | jq -M --sort-keys > original.canonical.json 42 | $ cat modified.json | jq -M --sort-keys > modified.canonical.json 43 | $ diff -u original.canonical.json modified.canonical.json 44 | 45 | .. code-block:: diff 46 | :linenos: 47 | 48 | { 49 | - "bar": "testing", 50 | "foo": [ 51 | - 1, 52 | 2, 53 | 3, 54 | - 4 55 | - ] 56 | + 4, 57 | + 5 58 | + ], 59 | + "woo": [ 60 | + "foobar" 61 | + ], 62 | + "zab": "testing" 63 | } 64 | 65 | Not entirely useful, particularly if the input files are large. The problem is that changing dict keys breaks the diff: 66 | Since "bar" was changed to "zab", the canonical representation changes and they are considered separate edits (lines 2 67 | and 15 of the diff). 68 | 69 | Matching Ordered Sequences 70 | -------------------------- 71 | 72 | Graphtage matches ordered sequences like lists using an "online" [#]_, "constructive" [#]_ implementation of the 73 | Levenshtein distance metric [#]_, similar to the Wagner–Fischer algorithm [#]_. The algorithm starts with an 74 | unbounded mapping and iteratively improves it until the bounds converge, at which point the optimal edit sequence is 75 | discovered. This is implemented in the :mod:`graphtage.levenshtein` module. 76 | 77 | Matching Unordered Collections 78 | ------------------------------ 79 | 80 | Dicts are matched by solving the minimum weight matching problem [#]_ on the complete bipartite graph from key/value 81 | pairs in the source dict to key/value pairs in the destination dict. This is implemented in the 82 | :mod:`graphtage.matching` module. 83 | 84 | Footnotes 85 | --------- 86 | 87 | .. [#] https://en.wikipedia.org/wiki/Graph_isomorphism_problem 88 | .. [#] https://en.wikipedia.org/wiki/Directed_acyclic_graph 89 | .. [#] https://en.wikipedia.org/wiki/Online_algorithm 90 | .. [#] https://en.wikipedia.org/wiki/Constructive_proof 91 | .. [#] https://en.wikipedia.org/wiki/Levenshtein_distance 92 | .. [#] https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm 93 | .. [#] https://en.wikipedia.org/wiki/Assignment_problem 94 | .. [*] Unless |pvsnp|_. 95 | .. _pvsnp: 96 | https://en.wikipedia.org/wiki/P_versus_NP_problem 97 | .. |pvsnp| replace:: :math:`P = NP` 98 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Graphtage Documentation 2 | ======================= 3 | 4 | Graphtage is *both* a commandline utility *and* a general purpose library for semantically comparing and merging 5 | tree-like structures, such as JSON, XML, HTML, YAML, and CSV files. Its name is a portmanteau of “graph” and 6 | “graftage”—the latter being the practice of joining two trees together such that they grow as one. 7 | 8 | There are several reasons why you might be here… 9 | 10 | .. topic:: You want to learn how to use Graphtage as a command line utility. 11 | 12 | This documentation focuses on Graphtage’ use as a library, specifically how to extend it by implementing new file 13 | formats. For instructions on using Graphtage as a utility, see the documentation in its `GitHub page`_. 14 | 15 | .. topic:: You want to programmatically interact with Graphtage as a library. 16 | 17 | You should start by reading about :doc:`Using Graphtage Programmatically `. 18 | 19 | .. topic:: You want to modify or extend Graphtage. 20 | 21 | For example, you might want to implement support for a new file format or edit type. You should start by reading 22 | the :doc:`Extending Graphtage ` section. 23 | 24 | .. topic:: You are already familiar with Graphtage and just need an API reference. 25 | 26 | The API documentation is :doc:`here `. 27 | 28 | .. topic:: You are curious and want to learn more about how Graphtage works. 29 | 30 | Documentation on how Graphtage works is :doc:`here `. 31 | 32 | .. _GitHub page: https://github.com/trailofbits/graphtage 33 | 34 | .. toctree:: 35 | :maxdepth: 4 36 | :caption: Contents: 37 | 38 | library 39 | extending 40 | howitworks 41 | package 42 | 43 | Indices and tables 44 | ================== 45 | 46 | * :ref:`genindex` 47 | * :ref:`modindex` 48 | * :ref:`search` 49 | -------------------------------------------------------------------------------- /docs/library.rst: -------------------------------------------------------------------------------- 1 | Using Graphtage Programmatically 2 | ================================ 3 | 4 | Graphtage is a command line utility, but it can just as easily be used as a library. This section documents how to 5 | interact with Graphtage directly from Python. 6 | 7 | The Intermediate Representation 8 | ------------------------------- 9 | 10 | Graphtage's diffing algorithms operate on an 11 | `intermediate representation `__ rather than on the data 12 | structures of the original file format. This allows Graphtage to have generic comparison algorithms that can work on 13 | *any* input file type. The intermediate representation is a tree of :class:`graphtage.TreeNode` objects. 14 | 15 | Therefore, the first step is to convert the files being diffed into Graphtage's intermediate representation. The JSON 16 | filetype has a function to convert arbitrary Python objects (comprised of standard Python types) into Graphtage trees:: 17 | 18 | >>> from graphtage import json 19 | >>> from_tree = json.build_tree({"foo": [1, 2, 3, 4]}) 20 | >>> from_tree 21 | DictNode([KeyValuePairNode(key=StringNode('foo'), value=ListNode((IntegerNode(1), IntegerNode(2), IntegerNode(3), IntegerNode(4))))]) 22 | 23 | Transforming Nodes with Edits 24 | ----------------------------- 25 | 26 | To see the sequence of edits to transform this tree to another, we call :meth:`graphtage.TreeNode.get_all_edits`:: 27 | 28 | >>> to_tree = json.build_tree({"bar": [2, 3, 4]}) 29 | >>> to_tree 30 | DictNode([KeyValuePairNode(key=StringNode('bar'), value=ListNode((IntegerNode(2), IntegerNode(3), IntegerNode(4))))]) 31 | >>> for edit in from_tree.get_all_edits(to_tree): 32 | ... print(edit) 33 | Remove(IntegerNode(1), remove_from=ListNode((IntegerNode(1), IntegerNode(2), IntegerNode(3), IntegerNode(4)))) 34 | StringEdit(from_node=StringNode('foo'), to_node=StringNode('bar')) 35 | 36 | Applying Edits to Nodes 37 | ----------------------- 38 | 39 | Both nodes and edits are immutable. We can perform a diff to apply edits to nodes, producing a new tree constructed of 40 | :class:`graphtage.EditedTreeNode` objects. Using some Python magic, the new tree's nodes maintain all of the same 41 | characteristics of the source nodes—including their source node class types—but are *also* :func:`instanceof` 42 | :class:`graphtage.EditedTreeNode`, too. 43 | 44 | Here is how to diff two nodes:: 45 | 46 | >>> from_node.diff(to_node) 47 | >>> diff = from_tree.diff(to_tree) 48 | >>> diff 49 | EditedDictNode([EditedKeyValuePairNode(key=EditedStringNode('foo'), value=EditedListNode((EditedIntegerNode(1), EditedIntegerNode(2), EditedIntegerNode(3), EditedIntegerNode(4))))]) 50 | 51 | As you can see, the tree was reconstructed with edited versions of each node. Each node will have a new member variable, 52 | :attr:`graphtage.EditedTreeNode.edit`, containing the edit that that chose to apply to itself (or :const:`None` if the 53 | node did not need to be edited). There are also additional member variables to indicate whether the node has been 54 | removed from its parent container. 55 | 56 | Formatting and Printing Results 57 | ------------------------------- 58 | 59 | There are two components to outputting a tree or diff: a :class:`graphtage.formatter.Formatter`, which is responsible 60 | for the syntax of the output, and a :class:`graphtage.printer.Printer`, which is responsible for rendering that output 61 | to a stream. For example, to print our diff in JSON format to the default printer (STDOUT), we would do:: 62 | 63 | >>> from graphtage import printer 64 | >>> with printer.DEFAULT_PRINTER as p: 65 | ... json.JSONFormatter.DEFAULT_INSTANCE.print(printer.DEFAULT_PRINTER, diff) 66 | ... 67 | { 68 | "++bar++~~foo~~": [ 69 | ~~1~~, 70 | 2, 71 | 3, 72 | 4 73 | ] 74 | } 75 | 76 | Since Graphtage's formatters are independent of the input format, thanks to the intermediate representation, we can 77 | just as easily output the diff in another format, like YAML:: 78 | 79 | >>> from graphtage import yaml 80 | >>> with printer.DEFAULT_PRINTER as p: 81 | ... yaml.YAMLFormatter.DEFAULT_INSTANCE.print(printer.DEFAULT_PRINTER, diff) 82 | ... 83 | ++bar++~~foo~~: 84 | - ~~1~~ 85 | - 2 86 | - 3 87 | - 4 88 | 89 | Diffing In-Memory Python Objects 90 | -------------------------------- 91 | 92 | When used as a library, Graphtage has the ability to diff in-memory Python objects. This can be useful when debugging, 93 | for example, to quickly determine the difference between two Python objects that cause a differential.:: 94 | 95 | >>> from graphtage.pydiff import print_diff 96 | >>> with printer.DEFAULT_PRINTER as p: 97 | ... obj1 = [1, 2, {3: "three"}, 4] 98 | ... obj2 = [1, 2, {3: 3}, "four"] 99 | ... print_diff(obj1, obj2, printer=p) 100 | [1,2,{3: "three" -> 3},++"four"++~~4~~] 101 | 102 | Python object diffing also works with custom classes:: 103 | 104 | >>> class Foo: 105 | ... def __init__(self, bar, baz): 106 | ... self.bar = bar 107 | ... self.baz = baz 108 | >>> with printer.DEFAULT_PRINTER as p: 109 | ... print_diff(Foo("bar", "baz"), Foo("bar", "bak"), printer=p) 110 | Foo(bar="bar", baz="ba++k++~~z~~") 111 | -------------------------------------------------------------------------------- /docs/printing.rst: -------------------------------------------------------------------------------- 1 | .. _Printing Protocol: 2 | 3 | Printing Protocol 4 | ================= 5 | 6 | The protocol for delegating how a :class:`graphtage.TreeNode` or :class:`graphtage.Edit` is printed in 7 | :meth:`graphtage.GraphtageFormatter.print` is as follows: 8 | 9 | #. Determine the actual object to be printed: 10 | * If ``node_or_edit`` is an :class:`graphtage.Edit`: 11 | * If ``with_edits``, then choose the edit 12 | * Otherwise, choose :attr:`node_or_edit.from_node ` 13 | * If ``node_or_edit`` is a :class:`graphtage.TreeNode`: 14 | * If ``with_edits`` *and* the node is edited and has a non-zero cost, 15 | then choose :attr:`node_or_edit.edit `:: 16 | 17 | node_or_edit.edit is not None and node_or_edit.edit.bounds().lower_bound > 0 18 | 19 | * Otherwise choose ``node_or_edit`` 20 | #. If the chosen object is an edit: 21 | * See if there is a specialized formatter for this edit by calling 22 | :meth:`graphtage.formatter.Formatter.get_formatter` 23 | * If so, delegate to that formatter and return. 24 | * If not, try calling the edit's :func:`graphtage.Edit.print` method. If :exc:`NotImplementedError` is 25 | *not* raised, return. 26 | #. If the chosen object is a node, or if we failed to find a printer for the edit: 27 | * See if there is a specialized formatter for this node by calling 28 | :meth:`graphtage.formatter.Formatter.get_formatter` 29 | * If so, delegate to that formatter and return. 30 | * If not, print a debug warning and delegate to the node's internal print implementation 31 | :meth:`graphtage.TreeNode.print`. 32 | 33 | This is implemented in :meth:`graphtage.GraphtageFormatter.print`. See the :ref:`Formatting Protocol` for how formatters 34 | are chosen. 35 | -------------------------------------------------------------------------------- /graphtage/__init__.py: -------------------------------------------------------------------------------- 1 | from . import graphtage 2 | 3 | from .graphtage import * 4 | from .tree import * 5 | from .edits import * 6 | 7 | from .version import __version__, VERSION_STRING 8 | from . import ( 9 | ast, bounds, builder, constraints, dataclasses, edits, expressions, fibonacci, formatter, levenshtein, matching, 10 | object_set, pickle, printer, pydiff, search, sequences, tree, utils 11 | ) 12 | from . import csv, json, xml, yaml, plist 13 | 14 | import inspect 15 | 16 | # All of the classes in SUBMODULES_TO_SUBSUME should really be in the top-level `graphtage` module. 17 | # They are separated into submodules solely for making the Python file sizes more manageable. 18 | # So the following code loops over those submodules and reassigns all of the classes to the top-level module. 19 | SUBMODULES_TO_SUBSUME = (graphtage, tree, edits) 20 | for module_to_subsume in SUBMODULES_TO_SUBSUME: 21 | for name, obj in inspect.getmembers(module_to_subsume): 22 | if hasattr(obj, '__module__') and obj.__module__ == module_to_subsume.__name__: 23 | obj.__module__ = 'graphtage' 24 | del module_to_subsume 25 | 26 | del inspect, SUBMODULES_TO_SUBSUME 27 | -------------------------------------------------------------------------------- /graphtage/ast.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generic node types for representing abstract syntax trees. 3 | """ 4 | from colorama import Fore 5 | 6 | from . import KeyValuePairNode, ListNode, Printer, TreeNode, DictNode, StringNode 7 | from .dataclasses import DataClassNode 8 | from .sequences import SequenceFormatter 9 | 10 | 11 | class KeywordArgument(KeyValuePairNode): 12 | pass 13 | 14 | 15 | class Module(ListNode): 16 | def print(self, printer: Printer): 17 | SequenceFormatter('', '', '\n').print(printer, self) 18 | 19 | 20 | class Assignment(DataClassNode): 21 | """A node representing an assignment.""" 22 | 23 | targets: ListNode 24 | value: TreeNode 25 | 26 | def print(self, printer: Printer): 27 | """Prints this node.""" 28 | SequenceFormatter('', '', ', ').print(printer, self.targets) 29 | with printer.bright(): 30 | printer.write(" = ") 31 | self.value.print(printer) 32 | 33 | def __str__(self): 34 | return f"{', '.join(map(str, self.targets.children()))} = {self.value!s}" 35 | 36 | 37 | class CallArguments(ListNode): 38 | pass 39 | 40 | 41 | class CallKeywords(DictNode): 42 | pass 43 | 44 | 45 | class Call(DataClassNode): 46 | """A node representing a function call.""" 47 | 48 | func: TreeNode 49 | args: CallArguments 50 | kwargs: CallKeywords 51 | 52 | def __init__(self, *args, **kwargs): 53 | super().__init__(*args, **kwargs) 54 | if isinstance(self.func, StringNode): 55 | self.func.quoted = False 56 | 57 | def print(self, printer: Printer): 58 | with printer.color(Fore.YELLOW): 59 | self.func.print(printer) 60 | printer.write("(") 61 | SequenceFormatter('', '', ', ').print(printer, self.args) 62 | if self.args and len(self.kwargs) > 0: 63 | printer.write(", ") 64 | for kvp in self.kwargs: 65 | with printer.color(Fore.RED): 66 | kvp.key.print(printer) 67 | with printer.bright(): 68 | printer.write("=") 69 | kvp.value.print(printer) 70 | printer.write(")") 71 | 72 | def __str__(self): 73 | args = ", ".join([str(a) for a in self.args] + [ 74 | f"{kvp.key!s}={kvp.value!s}" 75 | for kvp in self.kwargs 76 | ]) 77 | return f"{self.func!s}({args})" 78 | 79 | 80 | class Subscript(DataClassNode): 81 | """A node representing an object subscript (i.e., the `[]` operator)""" 82 | 83 | value: TreeNode 84 | slice: TreeNode 85 | 86 | def print(self, printer: Printer): 87 | self.value.print(printer) 88 | with printer.color(Fore.LIGHTBLUE_EX): 89 | printer.write("[") 90 | self.slice.write(printer) 91 | with printer.color(Fore.LIGHTBLUE_EX): 92 | printer.write("]") 93 | 94 | 95 | class Import(DataClassNode): 96 | names: ListNode 97 | from_name: StringNode 98 | 99 | def __init__(self, names: ListNode, from_name: StringNode): 100 | super().__init__(names=names, from_name=from_name) 101 | self.from_name.quoted = False 102 | for child in self.names: 103 | if isinstance(child, StringNode): 104 | child.quoted = False 105 | 106 | def print(self, printer: Printer): 107 | if self.from_name.object: 108 | with printer.color(Fore.YELLOW): 109 | printer.write("from ") 110 | self.from_name.print(printer) 111 | printer.write(" ") 112 | with printer.color(Fore.YELLOW): 113 | printer.write("import ") 114 | SequenceFormatter('', '', ', ').print(printer, self.names) 115 | -------------------------------------------------------------------------------- /graphtage/builder.py: -------------------------------------------------------------------------------- 1 | """A module intended to simplify building Graphtage IR trees from other tree-like data structures.""" 2 | 3 | from abc import ABC 4 | import logging 5 | from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Type, TypeVar 6 | 7 | from . import ( 8 | BoolNode, BuildOptions, DictNode, FixedKeyDictNode, FloatNode, IntegerNode, LeafNode, ListNode, MultiSetNode, 9 | NullNode, StringNode, TreeNode 10 | ) 11 | from .object_set import IdentityHash 12 | 13 | C = TypeVar("C") 14 | T = TypeVar("T") 15 | 16 | log = logging.getLogger(__name__) 17 | 18 | 19 | class CyclicReference(LeafNode): 20 | def __init__(self, obj): 21 | super().__init__(IdentityHash(obj)) 22 | 23 | def __hash__(self): 24 | return id(self.object) 25 | 26 | def __eq__(self, other): 27 | return isinstance(other, CyclicReference) and other.object is self.object 28 | 29 | 30 | class Builder(ABC): 31 | EXPANDERS: Dict[Type[Any], Callable[["Builder", Any], Optional[Iterable[Any]]]] 32 | BUILDERS: Dict[Type[Any], Callable[["Builder", Any, List[TreeNode]], TreeNode]] 33 | 34 | def __init__(self, options: Optional[BuildOptions] = None): 35 | if options is None: 36 | self.options: BuildOptions = BuildOptions() 37 | else: 38 | self.options = options 39 | 40 | @staticmethod 41 | def expander(node_type: Type[T]): 42 | def wrapper(func: Callable[[C, T], Iterable[Any]]) -> Callable[[C, T], Iterable[Any]]: 43 | if hasattr(func, "_visitor_expander_for_type"): 44 | func._visitor_expander_for_type = func._visitor_expander_for_type + (node_type,) 45 | else: 46 | setattr(func, "_visitor_expander_for_type", (node_type,)) 47 | return func 48 | 49 | return wrapper 50 | 51 | @staticmethod 52 | def builder(node_type: Type[T]): 53 | def wrapper(func: Callable[[C, T, List[TreeNode]], TreeNode]) -> Callable[[C, T, List[TreeNode]], TreeNode]: 54 | if hasattr(func, "_visitor_builder_for_type"): 55 | func._visitor_builder_for_type = func._visitor_builder_for_type + (node_type,) 56 | else: 57 | setattr(func, "_visitor_builder_for_type", (node_type,)) 58 | return func 59 | 60 | return wrapper 61 | 62 | def __init_subclass__(cls, **kwargs): 63 | super().__init_subclass__(**kwargs) 64 | if not hasattr(cls, "EXPANDERS") or cls.EXPANDERS is None: 65 | setattr(cls, "EXPANDERS", {}) 66 | else: 67 | setattr(cls, "EXPANDERS", dict(cls.EXPANDERS)) 68 | if not hasattr(cls, "BUILDERS") or cls.BUILDERS is None: 69 | setattr(cls, "BUILDERS", {}) 70 | else: 71 | setattr(cls, "BUILDERS", dict(cls.BUILDERS)) 72 | new_expanders = {} 73 | new_builders = {} 74 | for member_name, member in cls.__dict__.items(): 75 | if hasattr(member, "_visitor_expander_for_type"): 76 | for expander_type in getattr(member, "_visitor_expander_for_type"): 77 | if not isinstance(expander_type, type): 78 | raise TypeError(f"{cls.__name__}.{member_name} was registered as an expander for " 79 | f"{expander_type!r}, which is not a type") 80 | elif expander_type in cls.EXPANDERS: 81 | raise TypeError(f"An expander for type {expander_type.__name__} is already registered to " 82 | f"{cls.EXPANDERS[expander_type]!r} and cannot be re-registered to " 83 | f"{cls.__name__}.{member_name}") 84 | elif expander_type in new_expanders: 85 | raise TypeError(f"An expander for type {expander_type.__name__} is already registered to " 86 | f"{new_expanders[expander_type]!r} and cannot be re-registered to " 87 | f"{cls.__name__}.{member_name}") 88 | new_expanders[expander_type] = member 89 | if hasattr(member, "_visitor_builder_for_type"): 90 | for builder_type in getattr(member, "_visitor_builder_for_type"): 91 | if not isinstance(builder_type, type): 92 | raise TypeError(f"{cls.__name__}.{member_name} was registered as an builder for " 93 | f"{builder_type!r}, which is not a type") 94 | elif builder_type in cls.EXPANDERS: 95 | raise TypeError(f"A builder for type {builder_type.__name__} is already registered to " 96 | f"{cls.BUILDERS[builder_type]!r} and cannot be re-registered to " 97 | f"{cls.__name__}.{builder_type}") 98 | elif builder_type in new_builders: 99 | raise TypeError(f"A builder for type {builder_type.__name__} is already registered to " 100 | f"{new_builders[builder_type]!r} and cannot be re-registered to " 101 | f"{cls.__name__}.{builder_type}") 102 | new_builders[builder_type] = member 103 | cls.EXPANDERS.update(new_expanders) 104 | cls.BUILDERS.update(new_builders) 105 | 106 | def default_expander(self, node: Any) -> Iterable[Any]: 107 | return () 108 | 109 | def default_builder(self, node: Any, children: List[TreeNode]) -> TreeNode: 110 | raise NotImplementedError(f"A builder for type {node.__class__.__name__} is not defined for object {node!r}") 111 | 112 | @classmethod 113 | def _resolve(cls, obj_type: Type[Any], choices: Dict[Type[Any], T]) -> Optional[T]: 114 | """Resolves the most specialized expander or builder for `obj_type`""" 115 | for t in obj_type.__mro__: 116 | if t in choices: 117 | return choices[t] 118 | return None 119 | 120 | @classmethod 121 | def resolve_expander(cls, obj_type: Type[Any]) -> Optional[Callable[[Any], Optional[Iterable[Any]]]]: 122 | """Resolves the most specialized expander for `obj_type`""" 123 | return cls._resolve(obj_type, cls.EXPANDERS) 124 | 125 | @classmethod 126 | def resolve_builder(cls, obj_type: Type[Any]) -> Optional[Callable[[Any, List[TreeNode]], TreeNode]]: 127 | """Resolves the most specialized builder for `obj_type`""" 128 | return cls._resolve(obj_type, cls.BUILDERS) 129 | 130 | def expand(self, node: Any) -> Iterable[Any]: 131 | expander = self.resolve_expander(type(node)) 132 | if expander is None: 133 | return self.default_expander(node) 134 | return expander(self, node) 135 | 136 | def build(self, node: Any, children: List[TreeNode]) -> TreeNode: 137 | builder = self.resolve_builder(type(node)) 138 | if builder is None: 139 | result = self.default_builder(node, children) 140 | else: 141 | result = builder(self, node, children) 142 | if not isinstance(result, TreeNode): 143 | if builder is None: 144 | source = f"{self.__class__.__name__}.default_builder" 145 | else: 146 | source = f"{builder!r}" 147 | raise ValueError(f"{source}(node={node!r}, children={children!r}) returned {result!r}; " 148 | f"builders must return a graphtage.TreeNode") 149 | return result 150 | 151 | def build_tree(self, root_obj) -> TreeNode: 152 | children = self.expand(root_obj) 153 | work: List[Tuple[Any, List[TreeNode], List[Any]]] = [(root_obj, [], list(reversed(list(children))))] 154 | basic_builder = BasicBuilder(self.options) 155 | with self.options.printer.tqdm( 156 | desc="Walking the Tree", leave=False, delay=2.0, unit=" nodes", total=1 + len(work[-1][-1]) 157 | ) as t: 158 | while work: 159 | node, processed_children, unprocessed_children = work[-1] 160 | 161 | if unprocessed_children: 162 | child = unprocessed_children.pop() 163 | t.update(1) 164 | 165 | grandchildren = list(self.expand(child)) 166 | 167 | if grandchildren and self.options.check_for_cycles: 168 | # first, check if all of our grandchildren are leaves; if so, we don't need to check for a cycle 169 | all_are_leaves = all( 170 | all(False for _ in self.expand(grandchild)) 171 | for grandchild in grandchildren 172 | ) 173 | if not all_are_leaves: 174 | # make sure we aren't already in the process of expanding this child 175 | is_cycle = False 176 | for already_expanding, _, _ in work: 177 | if already_expanding is child: 178 | if self.options.ignore_cycles: 179 | log.debug(f"Detected a cycle in {node!r} at child {child!r}; ignoring…") 180 | processed_children.append(CyclicReference(child)) 181 | is_cycle = True 182 | break 183 | else: 184 | raise ValueError(f"Detected a cycle in {node!r} at child {child!r}") 185 | if is_cycle: 186 | continue 187 | work.append((child, [], list(reversed(grandchildren)))) 188 | t.total = t.total + 1 + len(grandchildren) 189 | t.refresh() 190 | continue 191 | 192 | _ = work.pop() 193 | t.update(1) 194 | 195 | new_node = self.build(node, processed_children) 196 | if not work: 197 | return new_node 198 | work[-1][1].append(new_node) 199 | 200 | return NullNode() 201 | 202 | 203 | class BasicBuilder(Builder): 204 | """A builder for basic Python types""" 205 | 206 | @Builder.builder(int) 207 | def build_int(self, obj: int, _) -> IntegerNode: 208 | return IntegerNode(obj) 209 | 210 | @Builder.builder(str) 211 | @Builder.builder(bytes) 212 | def build_str(self, obj: str, _) -> StringNode: 213 | return StringNode(obj) 214 | 215 | @Builder.builder(type(None)) 216 | def build_none(self, obj, _) -> NullNode: 217 | assert obj is None 218 | return NullNode() 219 | 220 | @Builder.builder(float) 221 | def build_float(self, obj: float, _) -> FloatNode: 222 | return FloatNode(obj) 223 | 224 | @Builder.builder(bool) 225 | def build_bool(self, obj: bool, _) -> BoolNode: 226 | return BoolNode(obj) 227 | 228 | @Builder.expander(list) 229 | @Builder.expander(tuple) 230 | @Builder.expander(set) 231 | @Builder.expander(frozenset) 232 | def expand_list(self, obj: list): 233 | yield from obj 234 | 235 | @Builder.builder(list) 236 | @Builder.builder(tuple) 237 | def build_list(self, obj, children: List[TreeNode]) -> ListNode: 238 | return ListNode( 239 | children, 240 | allow_list_edits=self.options.allow_list_edits, 241 | allow_list_edits_when_same_length=self.options.allow_list_edits_when_same_length 242 | ) 243 | 244 | @Builder.builder(set) 245 | @Builder.builder(frozenset) 246 | def build_set(self, obj, children: List[TreeNode]) -> MultiSetNode: 247 | return MultiSetNode(children) 248 | 249 | @Builder.expander(dict) 250 | def expand_dict(self, obj: dict): 251 | yield from obj.keys() 252 | yield from obj.values() 253 | 254 | @Builder.builder(dict) 255 | def build_dict(self, _, children: List[TreeNode]): 256 | n = len(children) // 2 257 | keys = children[:n] 258 | values = children[n:] 259 | dict_items = { 260 | k: v 261 | for k, v in zip(keys, values) 262 | } 263 | if self.options.allow_key_edits: 264 | dict_node = DictNode.from_dict(dict_items) 265 | dict_node.auto_match_keys = self.options.auto_match_keys 266 | return dict_node 267 | else: 268 | return FixedKeyDictNode.from_dict(dict_items) 269 | -------------------------------------------------------------------------------- /graphtage/constraints.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | import logging 3 | from typing import Optional 4 | 5 | from .edits import Edit 6 | from . import expressions 7 | from . import graphtage 8 | 9 | log = logging.getLogger('graphtage') 10 | 11 | 12 | class ConditionalMatcher(metaclass=ABCMeta): 13 | def __init__(self, condition: expressions.Expression): 14 | self.condition: expressions.Expression = condition 15 | 16 | @abstractmethod 17 | def __call__(self, from_node: graphtage.TreeNode, to_node: graphtage.TreeNode) -> Optional[Edit]: 18 | raise NotImplementedError() 19 | 20 | @classmethod 21 | def apply(cls, node: graphtage.TreeNode, condition: expressions.Expression): 22 | node.add_edit_modifier(cls(condition)) 23 | 24 | 25 | class MatchIf(ConditionalMatcher): 26 | def __call__(self, from_node: graphtage.TreeNode, to_node: graphtage.TreeNode) -> Optional[Edit]: 27 | try: 28 | if self.condition.eval(locals={'from': from_node, 'to': to_node}): 29 | return None 30 | except Exception as e: 31 | log.debug(f"{e!s} while evaluating --match-if for nodes {from_node} and {to_node}") 32 | return graphtage.Replace(from_node, to_node) 33 | 34 | 35 | class MatchUnless(ConditionalMatcher): 36 | def __call__(self, from_node: graphtage.TreeNode, to_node: graphtage.TreeNode) -> Optional[Edit]: 37 | try: 38 | if self.condition.eval(locals={'from': from_node.to_obj(), 'to': to_node.to_obj()}): 39 | return graphtage.Replace(from_node, to_node) 40 | except Exception as e: 41 | log.debug(f"{e!s} while evaluating --match-unless for nodes {from_node} and {to_node}") 42 | return None 43 | -------------------------------------------------------------------------------- /graphtage/csv.py: -------------------------------------------------------------------------------- 1 | """A :class:`graphtage.Filetype` for parsing, diffing, and rendering `CSV files`_. 2 | 3 | .. _CSV files: 4 | https://en.wikipedia.org/wiki/Comma-separated_values 5 | 6 | """ 7 | 8 | import csv 9 | from io import StringIO 10 | from typing import Optional 11 | 12 | from . import graphtage, json 13 | from .json import JSONFormatter 14 | from .printer import Printer 15 | from .sequences import SequenceFormatter 16 | from .tree import GraphtageFormatter, TreeNode 17 | 18 | 19 | class CSVRow(graphtage.ListNode[TreeNode]): 20 | """A node representing a row of a CSV file.""" 21 | def __bool__(self): 22 | return bool(self._children) 23 | 24 | 25 | class CSVNode(graphtage.ListNode[CSVRow]): 26 | """A node representing zero or more CSV rows.""" 27 | def __bool__(self): 28 | return bool(self._children) and any(self._children) 29 | 30 | def __eq__(self, other: 'CSVNode'): 31 | return self._children == other._children or (not self and not other) 32 | 33 | 34 | def build_tree(path: str, options: Optional[graphtage.BuildOptions] = None, *args, **kwargs) -> CSVNode: 35 | """Constructs a :class:`CSVNode` from a CSV file. 36 | 37 | The file is parsed using Python's :func:`csv.reader`. The elements in each row are constructed by delegating to 38 | :func:`graphtage.json.build_tree`:: 39 | 40 | CSVRow([json.build_tree(i, options=options) for i in row]) 41 | 42 | Args: 43 | path: The path to the file to be parsed. 44 | options: Optional build options to pass on to :meth:`graphtage.json.build_tree`. 45 | *args: Any extra positional arguments are passed on to :func:`csv.reader`. 46 | **kwargs: Any extra keyword arguments are passed on to :func:`csv.reader`. 47 | 48 | Returns: 49 | CSVNode: The resulting CSV node object. 50 | 51 | """ 52 | csv_data = [] 53 | with open(path) as f: 54 | for row in csv.reader(f, *args, **kwargs): 55 | rowdata = [json.build_tree(i, options=options) for i in row] 56 | for col in rowdata: 57 | if isinstance(col, graphtage.StringNode): 58 | col.quoted = False 59 | csv_data.append(CSVRow(rowdata)) 60 | return CSVNode(csv_data) 61 | 62 | 63 | class CSVRowFormatter(SequenceFormatter): 64 | """A formatter for CSV rows.""" 65 | is_partial = True 66 | 67 | def __init__(self): 68 | """Initializes the formatter. 69 | 70 | Equivalent to:: 71 | 72 | super().__init__('', '', ',') 73 | 74 | """ 75 | super().__init__('', '', ',') 76 | 77 | def print_CSVRow(self, *args, **kwargs): 78 | """Prints a CSV row. 79 | 80 | Equivalent to:: 81 | 82 | super().print_SequenceNode(*args, **kwargs) 83 | 84 | """ 85 | super().print_SequenceNode(*args, **kwargs) 86 | 87 | def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False): 88 | """An empty implementation, since each row should be printed as a single line.""" 89 | pass 90 | 91 | 92 | class CSVRows(SequenceFormatter): 93 | """A sub formatter for printing the sequence of rows in a CSV file.""" 94 | is_partial = True 95 | 96 | sub_format_types = [CSVRowFormatter] 97 | 98 | def __init__(self): 99 | """Initializes the formatter. 100 | 101 | Equivalent to:: 102 | 103 | super().__init__('', '', '') 104 | 105 | """ 106 | super().__init__('', '', '') 107 | 108 | def print_CSVNode(self, *args, **kwargs): 109 | """Prints a CSV node. 110 | 111 | Equivalent to:: 112 | 113 | super().print_SequenceNode(*args, **kwargs) 114 | 115 | """ 116 | super().print_SequenceNode(*args, **kwargs) 117 | 118 | def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False): 119 | """Prints a newline on all but the first and last items.""" 120 | if not is_first: 121 | printer.newline() 122 | 123 | def items_indent(self, printer: Printer): 124 | """Returns :obj:`printer` because CSV rows do not need to be indented.""" 125 | return printer 126 | 127 | 128 | class CSVFormatter(GraphtageFormatter): 129 | """Top-level formatter for CSV files.""" 130 | sub_format_types = [CSVRows, JSONFormatter] 131 | 132 | def print_LeafNode(self, printer: Printer, node: graphtage.LeafNode): 133 | """Prints a leaf node, which should always be a column in a CSV row. 134 | 135 | The node is escaped by first writing it to :func:`csv.writer`:: 136 | 137 | csv.writer(...).writerow([node.object]) 138 | 139 | """ 140 | if node.edited and node.edit is not None: 141 | self.sub_formatters[1].print(printer, node.edit) 142 | return 143 | s = StringIO() 144 | writer = csv.writer(s) 145 | writer.writerow([node.object]) 146 | r = s.getvalue() 147 | if r.endswith('\r\n'): 148 | r = r[:-2] 149 | elif r.endswith('\n') or r.endswith('\r'): 150 | r = r[:-1] 151 | printer.write(r) 152 | s.close() 153 | 154 | 155 | class CSV(graphtage.Filetype): 156 | """The CSV filetype.""" 157 | def __init__(self): 158 | """Initializes the CSV filetype. 159 | 160 | CSV identifies itself with the MIME types `csv` and `text/csv`. 161 | 162 | """ 163 | super().__init__( 164 | 'csv', 165 | 'text/csv' 166 | ) 167 | 168 | def build_tree(self, path: str, options: Optional[graphtage.BuildOptions] = None) -> TreeNode: 169 | """Equivalent to :func:`build_tree`""" 170 | return build_tree(path, options=options) 171 | 172 | def build_tree_handling_errors(self, path: str, options: Optional[graphtage.BuildOptions] = None) -> TreeNode: 173 | return self.build_tree(path=path, options=options) 174 | 175 | def get_default_formatter(self) -> CSVFormatter: 176 | return CSVFormatter.DEFAULT_INSTANCE 177 | -------------------------------------------------------------------------------- /graphtage/dataclasses.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Iterator, List, Tuple, Type 2 | 3 | from . import AbstractCompoundEdit, Edit, Range, Replace 4 | from .printer import Fore, Printer 5 | from .tree import ContainerNode, TreeNode 6 | 7 | 8 | class DataClassEdit(AbstractCompoundEdit): 9 | def __init__(self, from_node: "DataClassNode", to_node: "DataClassNode"): 10 | from_slots = dict(from_node.items()) 11 | to_slots = dict(to_node.items()) 12 | if from_slots.keys() != to_slots.keys(): 13 | raise ValueError(f"Node {from_node!r} cannot be edited to {to_node!r} because they have incompatible slots") 14 | self.slot_edits: List[Edit] = [ 15 | value.edits(to_slots[slot]) 16 | for slot, value in from_slots.items() 17 | ] 18 | super().__init__(from_node, to_node) 19 | 20 | def bounds(self) -> Range: 21 | total = Range(0, 0) 22 | for e in self.slot_edits: 23 | total = total + e.bounds() 24 | return total 25 | 26 | def edits(self) -> Iterator[Edit]: 27 | yield from self.slot_edits 28 | 29 | def tighten_bounds(self) -> bool: 30 | for edit in self.slot_edits: 31 | if edit.tighten_bounds(): 32 | return True 33 | return False 34 | 35 | 36 | class DataClassNode(ContainerNode): 37 | """A container node that can be initialized similar to a Python :func:`dataclasses.dataclass`""" 38 | 39 | _SLOTS: Tuple[str, ...] 40 | _SLOT_ANNOTATIONS: Dict[str, Type[TreeNode]] 41 | _DATA_CLASS_ANCESTORS: List[Type["DataClassNode"]] 42 | 43 | def __init__(self, *args, **kwargs): 44 | """Be careful extending __init__; consider using :func:`DataClassNode.post_init` instead.""" 45 | our_kwargs = { 46 | k: v 47 | for k, v in kwargs.items() 48 | if k in self._SLOTS 49 | } 50 | parent_kwargs = { 51 | k: v 52 | for k, v in kwargs.items() 53 | if k not in self._SLOTS 54 | } 55 | required_positional_args = len(self._SLOTS) - len(our_kwargs) 56 | assert required_positional_args >= 0 57 | if required_positional_args > len(args): 58 | raise ValueError(f"Not enough arguments sent to {self.__class__.__name__}.__init__: {args!r} {kwargs!r}; " 59 | f"expected at least {len(self._SLOTS)}") 60 | start_index = len(args) - required_positional_args 61 | parent_args = args[:start_index] 62 | super().__init__(*parent_args, **parent_kwargs) 63 | our_args = list(args[start_index:]) 64 | for s in self._SLOTS: 65 | if s in our_kwargs: 66 | value = our_kwargs[s] 67 | elif not our_args: 68 | raise ValueError(f"Missing argument for {self.__class__.__name__}.{s}") 69 | else: 70 | value = our_args[0] 71 | our_args = our_args[1:] 72 | expected_type = self._SLOT_ANNOTATIONS[s] 73 | if not isinstance(value, expected_type): 74 | raise ValueError(f"Expected a node of type {expected_type.__name__} for argument " 75 | f"{self.__class__.__name__}.{s} but instead got {value!r}") 76 | setattr(self, s, value) 77 | # self.__hash__ gets called so often, we cache the result: 78 | self.__hash = hash(tuple(self)) 79 | for ancestor in self._DATA_CLASS_ANCESTORS: 80 | ancestor.post_init(self) 81 | 82 | def post_init(self): 83 | """Callback called after this class's members have been initialized. 84 | 85 | This callback should not call `super().post_init()`. Each superclass's `post_init()` will be automatically 86 | called in order of the `__mro__`. 87 | """ 88 | pass 89 | 90 | def __init_subclass__(cls, **kwargs): 91 | super().__init_subclass__(**kwargs) 92 | ancestors = [ 93 | c 94 | for c in cls.__mro__ 95 | if c is not cls and issubclass(c, DataClassNode) and c is not DataClassNode 96 | ] 97 | cls._DATA_CLASS_ANCESTORS = ancestors 98 | ancestor_slot_names = { 99 | name: a 100 | for a in ancestors 101 | for name in a._SLOTS 102 | } 103 | if not hasattr(cls, "_SLOT_ANNOTATIONS") or cls._SLOT_ANNOTATIONS is None: 104 | cls._SLOT_ANNOTATIONS = {} 105 | cls._SLOTS = () 106 | else: 107 | cls._SLOT_ANNOTATIONS = dict(cls._SLOT_ANNOTATIONS) 108 | new_slots = [] 109 | for i, (name, slot_type) in enumerate(cls.__annotations__.items()): 110 | if not isinstance(slot_type, type) or not issubclass(slot_type, TreeNode): 111 | continue 112 | if name in ancestor_slot_names: 113 | raise TypeError(f"Dataclass {cls.__name__} cannot redefine slot {name!r} because it is already " 114 | f"defined in its superclass {ancestor_slot_names[name].__name__}") 115 | new_slots.append(name) 116 | cls._SLOT_ANNOTATIONS[name] = slot_type 117 | cls._SLOTS = cls._SLOTS + tuple(new_slots) 118 | 119 | def __hash__(self): 120 | return self.__hash 121 | 122 | def __iter__(self) -> Iterator[TreeNode]: 123 | for _, value in self.items(): 124 | yield value 125 | 126 | def items(self) -> Iterator[Tuple[str, TreeNode]]: 127 | for slot in self._SLOTS: 128 | yield slot, getattr(self, slot) 129 | 130 | def to_obj(self): 131 | return { 132 | slot: getattr(self, slot).to_obj() 133 | for slot in self._SLOTS 134 | } 135 | 136 | def edits(self, node: TreeNode) -> Edit: 137 | if isinstance(node, DataClassNode): 138 | our_slots = set(self._SLOTS) 139 | their_slots = set(node._SLOTS) 140 | if our_slots == their_slots: 141 | return DataClassEdit(self, node) 142 | return Replace(self, node) 143 | 144 | def calculate_total_size(self) -> int: 145 | return sum(s.calculate_total_size() for s in self) 146 | 147 | def print(self, printer: Printer): 148 | with printer.color(Fore.Yellow): 149 | printer.write(self.__class__.__name__) 150 | printer.write("(") 151 | for i, slot in enumerate(self._SLOTS): 152 | if i > 0: 153 | printer.write(", ") 154 | with printer.color(Fore.RED): 155 | printer.write(slot) 156 | with printer.bright(): 157 | printer.write("=") 158 | getattr(self, slot).print(printer) 159 | printer.write(")") 160 | 161 | def __len__(self): 162 | return len(self._SLOTS) 163 | 164 | def __eq__(self, other): 165 | return isinstance(other, DataClassNode) and dict(self.items()) == dict(other.items()) 166 | 167 | def __repr__(self): 168 | attrs = ", ".join( 169 | f"{slot}={value!r}" 170 | for slot, value in self.items() 171 | ) 172 | return f"{self.__class__.__name__}({attrs})" 173 | -------------------------------------------------------------------------------- /graphtage/debug.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities to aid in debugging 3 | """ 4 | 5 | from functools import partial 6 | from inspect import getmembers 7 | 8 | DEBUG_MODE = False 9 | 10 | 11 | if DEBUG_MODE: 12 | class Debuggable: 13 | _DEBUG_PATCHED: bool = False 14 | 15 | def __new__(cls, *args, **kwargs): 16 | instance = super().__new__(cls) 17 | if not instance._DEBUG_PATCHED: 18 | debug_all_member = None 19 | for name, member in getmembers(instance): 20 | if not name.startswith("_debug_"): 21 | continue 22 | name = name[len("_debug_"):] 23 | if name == "__all__": 24 | debug_all_member = member 25 | continue 26 | elif not hasattr(instance, name): 27 | continue 28 | func = getattr(instance, name) 29 | setattr(instance, f"_original_{name}", func) 30 | setattr(instance, name, member) 31 | if debug_all_member is not None: 32 | for name, member in getmembers(instance): 33 | if name.startswith("_") or not callable(member): 34 | continue 35 | 36 | setattr(instance, name, partial(debug_all_member, name, member)) 37 | instance._DEBUG_PATCHED = True 38 | return instance 39 | else: 40 | class Debuggable: 41 | pass 42 | -------------------------------------------------------------------------------- /graphtage/fibonacci.py: -------------------------------------------------------------------------------- 1 | """A pure Python implementation of a `Fibonacci Heap`_. 2 | 3 | Many of the algorithms in Graphtage only require partially sorting collections, so we can get a speedup from using a 4 | Fibonacci Heap that has amortized constant time insertion. 5 | 6 | .. _Fibonacci Heap: 7 | https://en.wikipedia.org/wiki/Fibonacci_heap 8 | 9 | """ 10 | 11 | from typing import Callable, Generic, Iterator, Optional, TypeVar 12 | 13 | T = TypeVar('T') 14 | Key = TypeVar('Key') 15 | DefaultKey = object() 16 | 17 | 18 | class HeapNode(Generic[T, Key]): 19 | """A node in a :class:`FibonacciHeap`.""" 20 | def __init__(self, item: T, key: Key = DefaultKey): 21 | """Initializes a Fibonacci heap node. 22 | 23 | Args: 24 | item: The heap item associated with the node. 25 | key: An optional key to use for the item in sorting. If omitted, the item itself will be used. 26 | 27 | """ 28 | self.item: T = item 29 | """The item associated with this heap node.""" 30 | if id(key) == id(DefaultKey): 31 | key = item 32 | self.key: Key = key 33 | """The key to be used when sorting this heap node.""" 34 | self.parent: Optional[HeapNode[T, Key]] = None 35 | """The node's parent.""" 36 | self.child: Optional[HeapNode[T, Key]] = None 37 | """The node's child.""" 38 | self.left: HeapNode[T, Key] = self 39 | """The left sibling of this node, or :obj:`self` if it has no left sibling.""" 40 | self.right: HeapNode[T, Key] = self 41 | """The right sibling of this node, or :obj:`self` if it has no left sibling.""" 42 | self.degree: int = 0 43 | """The degree of this node (*i.e.*, the number of its children).""" 44 | self.mark: bool = False 45 | """The node's marked state.""" 46 | self.deleted: bool = False 47 | """Whether the node has been deleted. 48 | 49 | This is to prevent nodes from being manipulated after they have been removed from a heap. 50 | 51 | Warning: 52 | Do not set :attr:`HeapNode.deleted` to :const:`True` unless the node has already been removed from the heap. 53 | 54 | """ 55 | 56 | def add_child(self, node): 57 | """Adds a child to this heap node, incrementing its degree.""" 58 | assert node != self 59 | if self.child is None: 60 | self.child = node 61 | else: 62 | node.right = self.child.right 63 | node.left = self.child 64 | self.child.right.left = node 65 | self.child.right = node 66 | self.degree += 1 67 | 68 | def remove_child(self, node): 69 | """Removes a child from this heap node, decrementing its degree.""" 70 | assert self.child is not None 71 | if self.child == self.child.right: 72 | self.child = None 73 | elif self.child == node: 74 | self.child = node.right 75 | node.right.parent = self 76 | node.left.right = node.right 77 | node.right.left = node.left 78 | self.degree -= 1 79 | 80 | @property 81 | def siblings(self) -> Iterator['HeapNode[T, Key]']: 82 | """Iterates over this node's siblings. 83 | 84 | Equivalent to:: 85 | 86 | node = self.right 87 | while node != self: 88 | yield node 89 | node = node.right 90 | 91 | """ 92 | node = self.right 93 | while node != self: 94 | yield node 95 | node = node.right 96 | 97 | @property 98 | def children(self) -> Iterator['HeapNode[T, Key]']: 99 | """Iterates over this node's children. 100 | 101 | Equivalent to:: 102 | 103 | if self.child is not None: 104 | yield self.child 105 | yield from self.child.siblings 106 | 107 | """ 108 | assert (self.degree == 0 and self.child is None) or (self.degree == 1 + sum(1 for _ in self.child.siblings)) 109 | if self.child is not None: 110 | yield self.child 111 | yield from self.child.siblings 112 | 113 | def __iter__(self) -> Iterator['HeapNode[T, Key]']: 114 | """Iterates over all of this node's descendants, including itself.""" 115 | yield self 116 | if self.child: 117 | yield from iter(self.child) 118 | node = self.right 119 | while node != self: 120 | yield node 121 | if node.child is not None: 122 | yield from iter(node.child) 123 | node = node.right 124 | 125 | def __lt__(self, other): 126 | return (self.deleted and not other.deleted) or self.key < other.key 127 | 128 | def __le__(self, other): 129 | return self < other or self.key == other.key 130 | 131 | def __eq__(self, other): 132 | return id(self) == id(other) 133 | 134 | def __hash__(self): 135 | return hash(self.item) 136 | 137 | def __repr__(self): 138 | return f"{self.__class__.__name__}(item={self.item!r}, key={self.key!r})" 139 | 140 | 141 | class FibonacciHeap(Generic[T, Key]): 142 | """A Fibonacci Heap.""" 143 | def __init__(self, key: Optional[Callable[[T], Key]] = None): 144 | """Initializes a Fibonacci heap. 145 | 146 | Args: 147 | key: An optional function that accepts an item and returns the key to be used for comparing that item. 148 | If omitted, it is equivalent to:: 149 | 150 | lambda item: item 151 | 152 | """ 153 | if key is None: 154 | self.key = lambda a: a 155 | """The function to extract comparison keys from items.""" 156 | else: 157 | self.key: Callable[[T], Key] = key 158 | self._min: Optional[HeapNode[T, Key]] = None 159 | self._root: Optional[HeapNode[T, Key]] = None 160 | self._n: int = 0 161 | 162 | def clear(self): 163 | """Removes all items from this heap.""" 164 | self._min = None 165 | self._root = None 166 | self._n = 0 167 | 168 | def peek(self) -> T: 169 | """Returns the smallest element of the heap without removing it. 170 | 171 | Returns: 172 | T: The smallest element of the heap. 173 | 174 | """ 175 | while self._min is not None and self._min.deleted: 176 | self._extract_min() 177 | return self._min.item 178 | 179 | def remove(self, node: HeapNode[T, Key]): 180 | """Removes the given node from this heap. 181 | 182 | Args: 183 | node: The node to be removed. 184 | 185 | Warning: 186 | This function assumes that the provided node is actually a member of this heap. It also assumes (but does 187 | not check) that :attr:`node.deleted ` is :const:`False`. If either of these assumptions 188 | is incorrect, it will lead to undefined behavior and corruption of the heap. 189 | 190 | """ 191 | node.deleted = True 192 | y = node.parent 193 | if y is not None and node < y: 194 | self._cut(node, y) 195 | self._cascading_cut(y) 196 | self._min = node 197 | self._extract_min() 198 | 199 | @property 200 | def min_node(self) -> HeapNode[T, Key]: 201 | """Returns the heap node associated with the smallest item in the heap, without removing it.""" 202 | return self._min 203 | 204 | @property 205 | def _roots(self) -> Iterator[HeapNode[T, Key]]: 206 | if self._root is not None: 207 | yield self._root 208 | yield from self._root.siblings 209 | 210 | def __len__(self): 211 | return self._n 212 | 213 | def __bool__(self): 214 | return self._n > 0 215 | 216 | def __iter__(self) -> Iterator[T]: 217 | for node in self._root: 218 | yield node.item 219 | 220 | def nodes(self) -> Iterator[HeapNode[T, Key]]: 221 | """Iterates over all of the heap nodes in this heap.""" 222 | if self._root is None: 223 | return 224 | yield from iter(self._root) 225 | 226 | def _extract_min(self) -> HeapNode[T, Key]: 227 | z = self._min 228 | if z is not None: 229 | if z.child is not None: 230 | for child in list(z.children): 231 | self._append_root(child) 232 | child.parent = None 233 | self._remove_root(z) 234 | if z == z.right: 235 | self._min = self._root = None 236 | else: 237 | self._min = z.right 238 | self._consolidate() 239 | self._n -= 1 240 | return z 241 | 242 | def push(self, item: T) -> HeapNode[T, Key]: 243 | """Adds a new item to this heap. 244 | 245 | Returns: 246 | HeapNode[T, Key]: The heap node created to store the new item. 247 | 248 | """ 249 | node = HeapNode(item=item, key=self.key(item)) 250 | node.left = node.right = node 251 | self._append_root(node) 252 | if self._min is None or node < self._min: 253 | self._min = node 254 | self._n += 1 255 | return node 256 | 257 | def decrease_key(self, x: HeapNode[T, Key], k: Key): 258 | """Decreases the key value associated with the given node. 259 | 260 | Args: 261 | x: The node to modify. 262 | k: The new key value. 263 | 264 | Raises: 265 | ValueError: If :attr:`x.key ` is less than :obj:`k`. 266 | 267 | """ 268 | if x.key < k: 269 | raise ValueError(f"The key can only decrease! New key {k!r} > old key {x.key!r}.") 270 | x.key = k 271 | y = x.parent 272 | if y is not None and x < y: 273 | self._cut(x, y) 274 | self._cascading_cut(y) 275 | if x < self._min: 276 | self._min = x 277 | 278 | def __add__(self, other): 279 | if not other: 280 | return self 281 | elif not self: 282 | return other 283 | merged = FibonacciHeap(key=self.key) 284 | merged._root, merged._min = self._root, self._min 285 | merged.key = self.key 286 | last = other._root.left 287 | other._root.left = merged._root.left 288 | merged._root.left.right = other._root 289 | merged._root.left = last 290 | merged._root.left.right = merged._root 291 | if other._min < merged._min: 292 | merged._min = other._min 293 | merged._n = self._n + other._n 294 | return merged 295 | 296 | def _cut(self, x: HeapNode[T, Key], y: HeapNode[T, Key]): 297 | y.remove_child(x) 298 | self._append_root(x) 299 | x.parent = None 300 | x.mark = False 301 | 302 | def _cascading_cut(self, y: HeapNode[T, Key]): 303 | z = y.parent 304 | if z is not None: 305 | if y.mark is False: 306 | y.mark = True 307 | else: 308 | self._cut(y, z) 309 | self._cascading_cut(z) 310 | 311 | def _consolidate(self): 312 | a = [None] * self._n 313 | for x in list(self._roots): 314 | d = x.degree 315 | while a[d] is not None: 316 | y = a[d] 317 | if y < x: 318 | x, y = y, x 319 | self._link(y, x) 320 | a[d] = None 321 | d += 1 322 | a[d] = x 323 | for i in range(0, len(a)): 324 | if a[i] is not None: 325 | if a[i] <= self._min: 326 | self._min = a[i] 327 | 328 | def _link(self, y: HeapNode[T, Key], x: HeapNode[T, Key]): 329 | self._remove_root(y) 330 | y.left = y.right = y 331 | x.add_child(y) 332 | y.parent = x 333 | y.mark = False 334 | 335 | def _append_root(self, node: HeapNode[T, Key]): 336 | if self._root is None: 337 | self._root = node 338 | else: 339 | node.right = self._root.right 340 | node.left = self._root 341 | self._root.right.left = node 342 | self._root.right = node 343 | 344 | def _remove_root(self, node: HeapNode[T, Key]): 345 | if node == self._root: 346 | self._root = node.right 347 | node.left.right = node.right 348 | node.right.left = node.left 349 | 350 | def pop(self) -> T: 351 | """Returns and removes the smallest item from this heap.""" 352 | while self._min is not None and self._min.deleted: 353 | self._extract_min() 354 | return self._extract_min().item 355 | 356 | 357 | class ReversedComparator(Generic[Key]): 358 | """A wrapper that reverses the semantics of its comparison operators.""" 359 | def __init__(self, key: Key): 360 | self.key = key 361 | 362 | def __lt__(self, other): 363 | return self.key > other.key 364 | 365 | def __le__(self, other): 366 | return self.key >= other.key 367 | 368 | def __eq__(self, other): 369 | return self.key == other.key 370 | 371 | def __hash__(self): 372 | return hash(self.key) 373 | 374 | 375 | class MaxFibonacciHeap(Generic[T, Key], FibonacciHeap[T, ReversedComparator[Key]]): 376 | """A Fibonacci Heap that yields items in decreasing order, using a :class:`ReversedComparator`.""" 377 | def __init__(self, key: Optional[Callable[[T], Key]] = None): 378 | if key is None: 379 | def key(n: T): 380 | return n 381 | super().__init__(key=lambda n: ReversedComparator(key(n))) 382 | -------------------------------------------------------------------------------- /graphtage/json.py: -------------------------------------------------------------------------------- 1 | """A :class:`graphtage.Filetype` for parsing, diffing, and rendering `JSON files`_. 2 | 3 | .. _JSON files: 4 | https://tools.ietf.org/html/std90 5 | 6 | """ 7 | 8 | import json 9 | import json5 10 | import os 11 | from typing import Optional, Union 12 | 13 | from .graphtage import BoolNode, BuildOptions, DictNode, Filetype, FixedKeyDictNode, \ 14 | FloatNode, IntegerNode, KeyValuePairNode, LeafNode, ListNode, NullNode, StringFormatter, StringNode 15 | from .printer import DEFAULT_PRINTER, Fore, Printer 16 | from .sequences import SequenceFormatter 17 | from .tree import ContainerNode, GraphtageFormatter, TreeNode 18 | 19 | 20 | def build_tree( 21 | python_obj: Union[int, float, bool, str, bytes, list, dict], 22 | options: Optional[BuildOptions] = None, 23 | force_leaf_node: bool = False) -> TreeNode: 24 | """Builds a Graphtage tree from an arbitrary Python object. 25 | 26 | Args: 27 | python_obj: The object from which to build the tree. 28 | options: An optional set of options for building the tree. 29 | force_leaf_node: If :const:`True`, assume that :obj:`python_obj` is *not* a :func:`list` or :func:`dict`. 30 | 31 | Returns: 32 | TreeNode: The resulting tree. 33 | 34 | Raises: 35 | ValueError: If :obj:`force_leaf_node` is :const:`True` and :obj:`python_obj` is *not* one of :class:`int`, 36 | :class:`float`, :class:`bool`, :class:`str`, or :class:`bytes`. 37 | ValueError: If the object is of an unsupported type. 38 | 39 | """ 40 | if options is None: 41 | options = BuildOptions() 42 | if isinstance(python_obj, bool): 43 | return BoolNode(python_obj) 44 | elif isinstance(python_obj, int): 45 | return IntegerNode(python_obj) 46 | elif isinstance(python_obj, float): 47 | return FloatNode(python_obj) 48 | elif isinstance(python_obj, str): 49 | return StringNode(python_obj) 50 | elif isinstance(python_obj, bytes): 51 | return StringNode(python_obj.decode('utf-8')) 52 | elif force_leaf_node: 53 | raise ValueError(f"{python_obj!r} was expected to be an int or string, but was instead a {type(python_obj)}") 54 | elif isinstance(python_obj, list) or isinstance(python_obj, tuple): 55 | return ListNode( 56 | [build_tree(n, options=options) for n in 57 | DEFAULT_PRINTER.tqdm(python_obj, delay=2.0, desc="Loading JSON List", leave=False)], 58 | allow_list_edits=options.allow_list_edits, 59 | allow_list_edits_when_same_length=options.allow_list_edits_when_same_length 60 | ) 61 | elif isinstance(python_obj, dict): 62 | dict_items = { 63 | build_tree(k, options=options, force_leaf_node=True): 64 | build_tree(v, options=options) for k, v in 65 | DEFAULT_PRINTER.tqdm(python_obj.items(), delay=2.0, desc="Loading JSON Dict", leave=False) 66 | } 67 | if options.allow_key_edits: 68 | dict_node = DictNode.from_dict(dict_items) 69 | dict_node.auto_match_keys = options.auto_match_keys 70 | return dict_node 71 | else: 72 | return FixedKeyDictNode.from_dict(dict_items) 73 | elif python_obj is None: 74 | return NullNode() 75 | else: 76 | raise ValueError(f"Unsupported Python object {python_obj!r} of type {type(python_obj)}") 77 | 78 | 79 | class JSONListFormatter(SequenceFormatter): 80 | """A sub-formatter for JSON lists.""" 81 | is_partial = True 82 | 83 | def __init__(self): 84 | """Initializes the JSON list formatter. 85 | 86 | Equivalent to:: 87 | 88 | super().__init__('[', ']', ',') 89 | 90 | """ 91 | super().__init__('[', ']', ',') 92 | 93 | def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False): 94 | if not hasattr(printer, 'join_lists') or not printer.join_lists: 95 | printer.newline() 96 | 97 | def print_ListNode(self, *args, **kwargs): 98 | """Prints a :class:`graphtage.ListNode`. 99 | 100 | Equivalent to:: 101 | 102 | super().print_SequenceNode(*args, **kwargs) 103 | 104 | """ 105 | super().print_SequenceNode(*args, **kwargs) 106 | 107 | def print_SequenceNode(self, *args, **kwargs): 108 | """Prints a non-List sequence. 109 | 110 | This delegates to the parent formatter's implementation:: 111 | 112 | self.parent.print(*args, **kwargs) 113 | 114 | which should invoke :meth:`JSONFormatter.print`, thereby delegating to the :class:`JSONDictFormatter` in 115 | instances where a list contains a dict. 116 | 117 | """ 118 | self.parent.print(*args, **kwargs) 119 | 120 | 121 | class JSONDictFormatter(SequenceFormatter): 122 | """A sub-formatter for JSON dicts.""" 123 | is_partial = True 124 | 125 | def __init__(self): 126 | super().__init__('{', '}', ',') 127 | 128 | def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False): 129 | if not hasattr(printer, 'join_dict_items') or not printer.join_dict_items: 130 | printer.newline() 131 | 132 | def print_MultiSetNode(self, *args, **kwargs): 133 | """Prints a :class:`graphtage.MultiSetNode`. 134 | 135 | Equivalent to:: 136 | 137 | super().print_SequenceNode(*args, **kwargs) 138 | 139 | """ 140 | super().print_SequenceNode(*args, **kwargs) 141 | 142 | def print_MappingNode(self, *args, **kwargs): 143 | """Prints a :class:`graphtage.MappingNode`. 144 | 145 | Equivalent to:: 146 | 147 | super().print_SequenceNode(*args, **kwargs) 148 | 149 | """ 150 | super().print_SequenceNode(*args, **kwargs) 151 | 152 | def print_SequenceNode(self, *args, **kwargs): 153 | """Prints a non-Dict sequence. 154 | 155 | This delegates to the parent formatter's implementation:: 156 | 157 | self.parent.print(*args, **kwargs) 158 | 159 | which should invoke :meth:`JSONFormatter.print`, thereby delegating to the :class:`JSONListFormatter` in 160 | instances where a dict contains a list. 161 | 162 | """ 163 | self.parent.print(*args, **kwargs) 164 | 165 | 166 | class JSONStringFormatter(StringFormatter): 167 | """A JSON formatter for strings.""" 168 | is_partial = True 169 | 170 | def write_start_quote(self, printer: Printer, _): 171 | """Prints a starting quote for the string""" 172 | # JSON strings are always quoted 173 | self.is_quoted = True 174 | printer.write('"') 175 | 176 | def write_end_quote(self, printer: Printer, _): 177 | """Prints an ending quote for the string""" 178 | # JSON strings are always quoted 179 | self.is_quoted = True 180 | printer.write('"') 181 | 182 | def escape(self, c: str) -> str: 183 | """String escape. 184 | 185 | This function is called once for each character in the string. 186 | 187 | Returns: 188 | str: The escaped version of `c`, or `c` itself if no escaping is required. 189 | 190 | This is equivalent to:: 191 | 192 | printer.write(json.dumps(c)[1:-1]) 193 | 194 | """ 195 | # json.dumps will enclose the string in quotes, so remove them 196 | return json.dumps(c)[1:-1] 197 | 198 | 199 | class JSONFormatter(GraphtageFormatter): 200 | """The default JSON formatter.""" 201 | sub_format_types = [JSONStringFormatter, JSONListFormatter, JSONDictFormatter] 202 | 203 | def print_LeafNode(self, printer: Printer, node: LeafNode): 204 | """Prints a :class:`graphtage.LeafNode`. 205 | 206 | This is equivalent to:: 207 | 208 | printer.write(json.dumps(node.object)) 209 | 210 | """ 211 | printer.write(json.dumps(node.object)) 212 | 213 | def print_KeyValuePairNode(self, printer: Printer, node: KeyValuePairNode): 214 | """Prints a :class:`graphtage.KeyValuePairNode`. 215 | 216 | By default, the key is printed in blue, followed by a bright ": ", followed by the value. 217 | 218 | """ 219 | with printer.color(Fore.BLUE): 220 | self.print(printer, node.key) 221 | with printer.bright(): 222 | printer.write(": ") 223 | self.print(printer, node.value) 224 | 225 | def print_ContainerNode(self, printer: Printer, node: ContainerNode): 226 | """Prints a :class:`graphtage.ContainerNode`. 227 | 228 | This is a fallback to permit the printing of custom containers, like :class:`graphtage.xml.XMLElement`. 229 | 230 | """ 231 | # Treat the container like a list 232 | list_node = ListNode((c.copy() for c in node.children())) 233 | self.print(printer, list_node) 234 | 235 | 236 | class JSON(Filetype): 237 | """The JSON file type.""" 238 | def __init__(self): 239 | """Initializes the JSON file type. 240 | 241 | By default, JSON associates itself with the "json", "application/json", "application/x-javascript", 242 | "text/javascript", "text/x-javascript", and "text/x-json" MIME types. 243 | 244 | """ 245 | super().__init__( 246 | 'json', 247 | 'application/json', 248 | 'application/x-javascript', 249 | 'text/javascript', 250 | 'text/x-javascript', 251 | 'text/x-json' 252 | ) 253 | 254 | def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode: 255 | with open(path) as f: 256 | return build_tree(json.load(f), options) 257 | 258 | def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]: 259 | try: 260 | return self.build_tree(path=path, options=options) 261 | except json.decoder.JSONDecodeError as de: 262 | return f'Error parsing {os.path.basename(path)}: {de.msg}: line {de.lineno}, column {de.colno} ' \ 263 | f'(char {de.pos})' 264 | 265 | def get_default_formatter(self) -> JSONFormatter: 266 | return JSONFormatter.DEFAULT_INSTANCE 267 | 268 | 269 | class JSON5(Filetype): 270 | """The JSON5 file type.""" 271 | def __init__(self): 272 | """Initializes the JSON5 file type. 273 | 274 | By default, JSON5 associates itself with the "json5", "application/json5", and "text/x-json5" MIME types. 275 | 276 | """ 277 | super().__init__( 278 | 'json5', 279 | 'application/json5', 280 | 'text/x-json5' 281 | ) 282 | 283 | def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode: 284 | with open(path) as f: 285 | return build_tree(json5.load(f), options) 286 | 287 | def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]: 288 | try: 289 | return self.build_tree(path=path, options=options) 290 | except ValueError as ve: 291 | return f'Error parsing {os.path.basename(path)}: {ve:!s}' 292 | 293 | def get_default_formatter(self) -> JSONFormatter: 294 | return JSONFormatter.DEFAULT_INSTANCE 295 | -------------------------------------------------------------------------------- /graphtage/multiset.py: -------------------------------------------------------------------------------- 1 | """A module for representing an edit on a multiset. 2 | 3 | This is used by :class:`graphtage.MultiSetNode` and :class:`graphtage.DictNode`, since the latter is a multiset containg 4 | :class:`graphtage.KeyValuePairNode` objects. 5 | 6 | """ 7 | 8 | from typing import Iterator, List 9 | 10 | import graphtage 11 | from .bounds import Range 12 | from .edits import Insert, Match, Remove 13 | from .matching import WeightedBipartiteMatcher 14 | from .sequences import SequenceEdit, SequenceNode 15 | from .tree import Edit, TreeNode 16 | from .utils import HashableCounter, largest 17 | 18 | 19 | class MultiSetEdit(SequenceEdit): 20 | """An edit matching one unordered collection of items to another. 21 | 22 | It works by using a :class:`graphtage.matching.WeightedBipartiteMatcher` to find the minimum cost matching from 23 | the elements of one collection to the elements of the other. 24 | 25 | """ 26 | def __init__( 27 | self, 28 | from_node: SequenceNode, 29 | to_node: SequenceNode, 30 | from_set: HashableCounter[TreeNode], 31 | to_set: HashableCounter[TreeNode], 32 | auto_match_keys: bool = True 33 | ): 34 | """Initializes the edit. 35 | 36 | Args: 37 | from_node: Any sequence node from which to match. 38 | to_node: Any sequence node to which to match. 39 | from_set: The set of nodes from which to match. These should typically be children of :obj:`from_node`, but 40 | this is neither checked nor enforced. 41 | to_set: The set of nodes to which to match. These should typically be children of :obj:`to_node`, but this 42 | is neither checked nor enforced. 43 | auto_match_keys: If `True`, any :class:`graphtage.KeyValuePairNode`s in :obj:`from_set` that have keys 44 | equal to :class:`graphtage.KeyValuePairNode`s in :obj:`to_set` will automatically be matched. Setting 45 | this to `False` will require a significant amount more computation for larger dictionaries. 46 | 47 | """ 48 | self._matched_kvp_edits: List[Edit] = [] 49 | if auto_match_keys: 50 | to_set = HashableCounter(to_set) 51 | from_set = HashableCounter(from_set) 52 | to_remove_from = [] 53 | for f in from_set.keys(): 54 | if not isinstance(f, graphtage.KeyValuePairNode): 55 | continue 56 | for t in to_set.keys(): 57 | if not isinstance(f, graphtage.KeyValuePairNode): 58 | continue 59 | if f.key == t.key: 60 | num_matched = min(from_set[f], to_set[t]) 61 | for _ in range(num_matched): 62 | self._matched_kvp_edits.append(f.edits(t)) 63 | to_remove_from.append((f, num_matched)) 64 | break 65 | else: 66 | continue 67 | to_set[t] -= num_matched 68 | for f, num_matched in to_remove_from: 69 | from_set[f] -= num_matched 70 | self.to_insert = to_set - from_set 71 | """The set of nodes in :obj:`to_set` that do not exist in :obj:`from_set`.""" 72 | self.to_remove = from_set - to_set 73 | """The set of nodes in :obj:`from_set` that do not exist in :obj:`to_set`.""" 74 | to_match = from_set & to_set 75 | self._edits: List[Edit] = [Match(n, n, 0) for n in to_match.elements()] 76 | self._matcher = WeightedBipartiteMatcher( 77 | from_nodes=self.to_remove.elements(), 78 | to_nodes=self.to_insert.elements(), 79 | get_edge=lambda f, t: f.edits(t) 80 | ) 81 | super().__init__( 82 | from_node=from_node, 83 | to_node=to_node 84 | ) 85 | 86 | def is_complete(self) -> bool: 87 | return self._matcher.is_complete() 88 | 89 | def edits(self) -> Iterator[Edit]: 90 | yield from self._edits 91 | yield from self._matched_kvp_edits 92 | remove_matched: HashableCounter[TreeNode] = HashableCounter() 93 | insert_matched: HashableCounter[TreeNode] = HashableCounter() 94 | for (rem, (ins, edit)) in self._matcher.matching.items(): 95 | yield edit 96 | remove_matched[rem] += 1 97 | insert_matched[ins] += 1 98 | for rm in (self.to_remove - remove_matched).elements(): 99 | yield Remove(to_remove=rm, remove_from=self.from_node) 100 | for ins in (self.to_insert - insert_matched).elements(): 101 | yield Insert(to_insert=ins, insert_into=self.from_node) 102 | 103 | def tighten_bounds(self) -> bool: 104 | """Delegates to :meth:`WeightedBipartiteMatcher.tighten_bounds`.""" 105 | for kvp_edit in self._matched_kvp_edits: 106 | if kvp_edit.tighten_bounds(): 107 | return True 108 | return self._matcher.tighten_bounds() 109 | 110 | def bounds(self) -> Range: 111 | b = self._matcher.bounds() 112 | for kvp_edit in self._matched_kvp_edits: 113 | b = b + kvp_edit.bounds() 114 | if len(self.to_remove) > len(self.to_insert): 115 | for edit in largest( 116 | *(Remove(to_remove=r, remove_from=self.from_node) for r in self.to_remove), 117 | n=len(self.to_remove) - len(self.to_insert), 118 | key=lambda e: e.bounds() 119 | ): 120 | b = b + edit.bounds() 121 | elif len(self.to_remove) < len(self.to_insert): 122 | for edit in largest( 123 | *(Insert(to_insert=i, insert_into=self.from_node) for i in self.to_insert), 124 | n=len(self.to_insert) - len(self.to_remove), 125 | key=lambda e: e.bounds() 126 | ): 127 | b = b + edit.bounds() 128 | return b 129 | -------------------------------------------------------------------------------- /graphtage/object_set.py: -------------------------------------------------------------------------------- 1 | """ 2 | A data structure that can hold a set of unique Python objects, even if those objects are not hashable. 3 | Uniqueness is determined based upon identity. 4 | """ 5 | 6 | from collections.abc import MutableSet 7 | from typing import Any, Iterable, Set 8 | 9 | 10 | class IdentityHash: 11 | def __init__(self, obj): 12 | self.obj = obj 13 | 14 | def __hash__(self): 15 | return id(self.obj) 16 | 17 | def __eq__(self, other): 18 | if not isinstance(other, IdentityHash): 19 | return False 20 | return id(self.obj) == id(other.obj) 21 | 22 | 23 | class ObjectSet(MutableSet): 24 | """A set that can hold unhashable Python objects 25 | 26 | Uniqueness is determined based upon identity. 27 | 28 | """ 29 | def __init__(self, initial_objs: Iterable[Any] = ()): 30 | self.objs: Set[IdentityHash] = set() 31 | for obj in initial_objs: 32 | self.add(obj) 33 | 34 | def add(self, value): 35 | self.objs.add(IdentityHash(value)) 36 | 37 | def discard(self, value): 38 | value = IdentityHash(value) 39 | self.objs.remove(value) 40 | 41 | def __contains__(self, x): 42 | x = IdentityHash(x) 43 | return x in self.objs 44 | 45 | def __len__(self): 46 | return len(self.objs) 47 | 48 | def __iter__(self): 49 | for obj in self.objs: 50 | yield obj.obj 51 | 52 | def __str__(self): 53 | return f"{{{', '.join(map(str, self.objs))}}}" 54 | -------------------------------------------------------------------------------- /graphtage/pickle.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional, Union 3 | 4 | from fickling.fickle import Interpreter, Pickled, PickleDecodeError 5 | 6 | from .graphtage import BuildOptions, Filetype, TreeNode 7 | from .pydiff import ast_to_tree, PyDiffFormatter 8 | 9 | 10 | class Pickle(Filetype): 11 | """The Python Pickle file type.""" 12 | def __init__(self): 13 | """Initializes the Pickle file type. 14 | 15 | By default, Pickle associates itself with the "pickle", "application/python-pickle", 16 | and "application/x-python-pickle" MIME types. 17 | 18 | """ 19 | super().__init__( 20 | 'pickle', 21 | 'application/python-pickle', 22 | 'application/x-python-pickle' 23 | ) 24 | 25 | def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode: 26 | with open(path, "rb") as f: 27 | pickle = Pickled.load(f) 28 | interpreter = Interpreter(pickle) 29 | ast = interpreter.to_ast() 30 | return ast_to_tree(ast, options) 31 | 32 | def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]: 33 | try: 34 | return self.build_tree(path=path, options=options) 35 | except PickleDecodeError as e: 36 | return f'Error deserializing {os.path.basename(path)}: {e!s}' 37 | 38 | def get_default_formatter(self) -> PyDiffFormatter: 39 | return PyDiffFormatter.DEFAULT_INSTANCE 40 | -------------------------------------------------------------------------------- /graphtage/plist.py: -------------------------------------------------------------------------------- 1 | """A :class:`graphtage.Filetype` for parsing, diffing, and rendering Apple plist files.""" 2 | import os 3 | from xml.parsers.expat import ExpatError 4 | from typing import Optional, Tuple, Union 5 | 6 | from plistlib import dumps, load 7 | 8 | from . import json 9 | from .edits import Edit, EditCollection, Match 10 | from .graphtage import BoolNode, BuildOptions, Filetype, FloatNode, KeyValuePairNode, IntegerNode, LeafNode, StringNode 11 | from .printer import Printer 12 | from .sequences import SequenceFormatter, SequenceNode 13 | from .tree import ContainerNode, GraphtageFormatter, TreeNode 14 | 15 | 16 | class PLISTNode(ContainerNode): 17 | def __init__(self, root: TreeNode): 18 | self.root: TreeNode = root 19 | 20 | def to_obj(self): 21 | return self.root.to_obj() 22 | 23 | def edits(self, node: 'TreeNode') -> Edit: 24 | if isinstance(node, PLISTNode): 25 | return EditCollection( 26 | from_node=self, 27 | to_node=node, 28 | edits=iter(( 29 | Match(self, node, 0), 30 | self.root.edits(node.root) 31 | )), 32 | collection=list, 33 | add_to_collection=list.append, 34 | explode_edits=False 35 | ) 36 | return self.root.edits(node) 37 | 38 | def calculate_total_size(self) -> int: 39 | return self.root.calculate_total_size() 40 | 41 | def print(self, printer: Printer): 42 | printer.write(PLIST_HEADER) 43 | self.root.print(printer) 44 | printer.write(PLIST_FOOTER) 45 | 46 | def __iter__(self): 47 | yield self.root 48 | 49 | def __len__(self) -> int: 50 | return 1 51 | 52 | 53 | def build_tree(path: str, options: Optional[BuildOptions] = None, *args, **kwargs) -> PLISTNode: 54 | """Constructs a PLIST tree from an PLIST file.""" 55 | with open(path, "rb") as stream: 56 | data = load(stream) 57 | return PLISTNode(json.build_tree(data, options=options, *args, **kwargs)) 58 | 59 | 60 | class PLISTSequenceFormatter(SequenceFormatter): 61 | is_partial = True 62 | 63 | def __init__(self): 64 | super().__init__('', '', '') 65 | 66 | def print_SequenceNode(self, printer: Printer, node: SequenceNode): 67 | self.parent.print(printer, node) 68 | 69 | def print_ListNode(self, printer: Printer, *args, **kwargs): 70 | printer.write("") 71 | super().print_SequenceNode(printer, *args, **kwargs) 72 | printer.write("") 73 | 74 | def print_MultiSetNode(self, printer: Printer, *args, **kwargs): 75 | printer.write("") 76 | super().print_SequenceNode(printer, *args, **kwargs) 77 | printer.write("") 78 | 79 | def print_KeyValuePairNode(self, printer: Printer, node: KeyValuePairNode): 80 | printer.write("") 81 | if isinstance(node.key, StringNode): 82 | printer.write(node.key.object) 83 | else: 84 | self.print(printer, node.key) 85 | printer.write("") 86 | printer.newline() 87 | self.print(printer, node.value) 88 | 89 | print_MappingNode = print_MultiSetNode 90 | 91 | 92 | def _plist_header_footer() -> Tuple[str, str]: 93 | string = "1234567890" 94 | encoded = dumps(string).decode("utf-8") 95 | expected = f"{string}" 96 | body_offset = encoded.find(expected) 97 | if body_offset <= 0: 98 | raise ValueError("Unexpected plist encoding!") 99 | return encoded[:body_offset], encoded[body_offset+len(expected):] 100 | 101 | 102 | PLIST_HEADER: str 103 | PLIST_FOOTER: str 104 | PLIST_HEADER, PLIST_FOOTER = _plist_header_footer() 105 | 106 | 107 | class PLISTFormatter(GraphtageFormatter): 108 | sub_format_types = [PLISTSequenceFormatter] 109 | 110 | def print(self, printer: Printer, *args, **kwargs): 111 | # PLIST uses an eight-space indent 112 | printer.indent_str = " " * 8 113 | super().print(printer, *args, **kwargs) 114 | 115 | @staticmethod 116 | def write_obj(printer: Printer, obj): 117 | encoded = dumps(obj).decode("utf-8") 118 | printer.write(encoded[len(PLIST_HEADER):-len(PLIST_FOOTER)]) 119 | 120 | def print_StringNode(self, printer: Printer, node: StringNode): 121 | printer.write(f"{node.object}") 122 | 123 | def print_IntegerNode(self, printer: Printer, node: IntegerNode): 124 | printer.write(f"{node.object}") 125 | 126 | def print_FloatNode(self, printer: Printer, node: FloatNode): 127 | printer.write(f"{node.object}") 128 | 129 | def print_BoolNode(self, printer, node: BoolNode): 130 | if node.object: 131 | printer.write("") 132 | else: 133 | printer.write("") 134 | 135 | def print_LeafNode(self, printer: Printer, node: LeafNode): 136 | self.write_obj(printer, node.object) 137 | 138 | def print_PLISTNode(self, printer: Printer, node: PLISTNode): 139 | printer.write(PLIST_HEADER) 140 | self.print(printer, node.root) 141 | printer.write(PLIST_FOOTER) 142 | 143 | 144 | class PLIST(Filetype): 145 | """The Apple PLIST filetype.""" 146 | def __init__(self): 147 | """Initializes the PLIST file type. 148 | 149 | By default, PLIST associates itself with the "plist" and "application/x-plist" MIME types. 150 | 151 | """ 152 | super().__init__( 153 | 'plist', 154 | 'application/x-plist' 155 | ) 156 | 157 | def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode: 158 | tree = build_tree(path=path, options=options) 159 | for node in tree.dfs(): 160 | if isinstance(node, StringNode): 161 | node.quoted = False 162 | return tree 163 | 164 | def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]: 165 | try: 166 | return self.build_tree(path=path, options=options) 167 | except ExpatError as ee: 168 | return f'Error parsing {os.path.basename(path)}: {ee})' 169 | 170 | def get_default_formatter(self) -> PLISTFormatter: 171 | return PLISTFormatter.DEFAULT_INSTANCE 172 | -------------------------------------------------------------------------------- /graphtage/progress.py: -------------------------------------------------------------------------------- 1 | """A module for printing status messages and progress bars to the command line.""" 2 | 3 | import io 4 | import sys 5 | from types import TracebackType 6 | from typing import AnyStr, Iterable, Iterator, IO, List, Optional, TextIO, Type 7 | 8 | from tqdm import tqdm, trange 9 | 10 | 11 | class StatusWriter(IO[str]): 12 | """A writer compatible with the :class:`graphtage.printer.Writer` protocol that can print status. 13 | 14 | See :meth:`StatusWriter.tqdm` and :meth:`StatusWriter.trange`. If :attr:`StatusWriter.status_stream` is either 15 | :attr:`sys.stdout` or :attr:`sys.stderr`, then bytes printed to this writer will be buffered. For each full line 16 | buffered, a call to :func:`tqdm.write` will be made. 17 | 18 | A status writer whose lifetime is not controlled by instantiation in a ``with`` block must be manually flushed 19 | with :meth:`StatusWriter.flush(final=True)` after its final write, or else the last line 20 | written may be lost. 21 | 22 | """ 23 | def __init__(self, out_stream: Optional[TextIO] = None, quiet: bool = False): 24 | """Initializes a status writer. 25 | 26 | Args: 27 | out_stream: An optional stream to which to write. If omitted this defaults to :attr:`sys.stdout`. 28 | quiet: Whether or not :mod:`tqdm` status messages and progress should be suppressed. 29 | 30 | """ 31 | self.quiet = quiet 32 | """Whether or not :mod:`tqdm` status messages and progress should be suppressed.""" 33 | self._reentries: int = 0 34 | if out_stream is None: 35 | out_stream = sys.stdout 36 | self.status_stream: TextIO = out_stream 37 | """The status stream to which to print.""" 38 | self._buffer: List[str] = [] 39 | try: 40 | self.write_raw = self.quiet or ( 41 | out_stream.fileno() != sys.stderr.fileno() and out_stream.fileno() != sys.stdout.fileno() 42 | ) 43 | """If :const:`True`, this writer *will not* buffer output and use :func:`tqdm.write`. 44 | 45 | This defaults to:: 46 | 47 | self.write_raw = self.quiet or ( 48 | out_stream.fileno() != sys.stderr.fileno() and out_stream.fileno() != sys.stdout.fileno() 49 | ) 50 | 51 | """ 52 | except io.UnsupportedOperation as e: 53 | self.write_raw = True 54 | 55 | def tqdm(self, *args, **kwargs) -> tqdm: 56 | """Returns a :class:`tqdm.tqdm` object.""" 57 | if self.quiet or 'disable' not in kwargs: 58 | kwargs['disable'] = self.quiet 59 | return tqdm(*args, **kwargs) 60 | 61 | def trange(self, *args, **kwargs) -> trange: 62 | """Returns a :class:`tqdm.trange` object.""" 63 | if self.quiet or 'disable' not in kwargs: 64 | kwargs['disable'] = self.quiet 65 | return trange(*args, **kwargs) 66 | 67 | def flush(self, final=False): 68 | """Flushes this writer. 69 | 70 | If :obj:`final` is :const:`True`, any extra bytes will be flushed along with a final newline. 71 | 72 | """ 73 | if final and self._buffer and not self._buffer[-1].endswith('\n'): 74 | self._buffer.append('\n') 75 | while self._buffer: 76 | if '\n' in self._buffer[0]: 77 | trailing_newline = self._buffer[0].endswith('\n') 78 | lines = self._buffer[0].split('\n') 79 | if not trailing_newline: 80 | if len(self._buffer) == 1: 81 | self._buffer.append(lines[-1]) 82 | else: 83 | self._buffer[1] = f"{lines[-1]}{self._buffer[1]}" 84 | for line in lines[:-1]: 85 | tqdm.write(line, file=self.status_stream) 86 | self._buffer = self._buffer[1:] 87 | elif len(self._buffer) == 1: 88 | break 89 | else: 90 | self._buffer = [''.join(self._buffer)] 91 | return self.status_stream.flush() 92 | 93 | def write(self, text: str) -> int: 94 | if self.write_raw: 95 | return self.status_stream.write(text) 96 | self._buffer.append(text) 97 | if '\n' in text: 98 | self.flush() 99 | return len(text) 100 | 101 | def close(self) -> None: 102 | self.flush(final=True) 103 | return self.status_stream.close() 104 | 105 | def fileno(self) -> int: 106 | return self.status_stream.fileno() 107 | 108 | def isatty(self) -> bool: 109 | return self.status_stream.isatty() 110 | 111 | def read(self, n: int = ...) -> AnyStr: 112 | return self.status_stream.read(n) 113 | 114 | def readable(self) -> bool: 115 | return self.status_stream.readable() 116 | 117 | def readline(self, limit: int = ...) -> AnyStr: 118 | return self.status_stream.readline(limit) 119 | 120 | def readlines(self, hint: int = ...) -> List[AnyStr]: 121 | return self.status_stream.readlines(hint) 122 | 123 | def seek(self, offset: int, whence: int = ...) -> int: 124 | return self.status_stream.seek(offset, whence) 125 | 126 | def seekable(self) -> bool: 127 | return self.status_stream.seekable() 128 | 129 | def tell(self) -> int: 130 | return self.status_stream.tell() 131 | 132 | def truncate(self, size: Optional[int] = ...) -> int: 133 | return self.status_stream.truncate(size) 134 | 135 | def writable(self) -> bool: 136 | return self.status_stream.writable() 137 | 138 | def writelines(self, lines: Iterable[AnyStr]) -> None: 139 | return self.status_stream.writelines(lines) 140 | 141 | @property 142 | def closed(self) -> bool: 143 | return self.status_stream.closed 144 | 145 | @property 146 | def mode(self) -> str: 147 | return self.status_stream.mode 148 | 149 | @property 150 | def name(self) -> str: 151 | return self.status_stream.name 152 | 153 | def __next__(self) -> AnyStr: 154 | return next(self.status_stream) 155 | 156 | def __iter__(self) -> Iterator[AnyStr]: 157 | return iter(self.status_stream) 158 | 159 | def __enter__(self) -> IO[AnyStr]: 160 | self._reentries += 1 161 | return self 162 | 163 | def __exit__(self, t: Optional[Type[BaseException]], value: Optional[BaseException], 164 | traceback: Optional[TracebackType]) -> Optional[bool]: 165 | self._reentries -= 1 166 | if self._reentries == 0: 167 | self.flush(final=True) 168 | 169 | def __delete__(self, instance): 170 | self.flush(final=True) 171 | -------------------------------------------------------------------------------- /graphtage/search.py: -------------------------------------------------------------------------------- 1 | """A module for solving a search problem in an iteratively revealed search space. 2 | 3 | **Given:** an iterator that yields an unknown but finite number of integer range objects, *e.g.*, ``[100, 200]``, 4 | ``[50, 1000]``, ``[60, 500]``, …. Each integer range object has a member function that is guaranteed to tighten the 5 | bounds of the range, such that the range monotonically shrinks and converges toward a specific number (*i.e.*, it 6 | conforms to the :class:`graphtage.bounds.Bounded` protocol). For example, ``[100, 200].tighten()`` → 7 | ``[150, 160].tighten()`` → ``[150, 155].tighten()`` → ``[153, 153]`` → ``153``. Each object might have a different 8 | tighten function; we cannot make any assumptions about the rate of convergence, other than that the bounds are 9 | guaranteed to shrink with each call to ``tighten()``. 10 | 11 | **Goal:** Create the most computationally efficient algorithm to determine the range object that converges to the 12 | smallest integer (*i.e.*, with the fewest possible tightenings). 13 | 14 | """ 15 | 16 | from typing import Generic, Iterator, Optional, TypeVar 17 | 18 | from .bounds import Bounded, NEGATIVE_INFINITY, POSITIVE_INFINITY, Range 19 | from .fibonacci import FibonacciHeap, HeapNode 20 | 21 | B = TypeVar('B', bound=Bounded) 22 | 23 | 24 | class IterativeTighteningSearch(Bounded, Generic[B]): 25 | """Implementation of iterative tightening search on a given sequence of :class:`graphtage.bounds.Bounded` objects. 26 | 27 | The search class itself is :class:`graphtage.bounds.Bounded`, with bounds on the value of the optimal solution. 28 | Each call to :meth:`IterativeTighteningSearch.tighten_bounds` will improve these bounds, if possible. 29 | 30 | """ 31 | def __init__(self, 32 | possibilities: Iterator[B], 33 | initial_bounds: Optional[Range] = None): 34 | """Initializes the search. 35 | 36 | Args: 37 | possibilities: An iterator yielding :class:`graphtage.bounded.Bounded` objects over which to search. 38 | initial_bounds: Bounds on the optimal solution, if known. Having good initial bounds can greatly speed up 39 | the search. However, if the initial bounds are incorrect (*i.e.*, if the true optimal solution lies 40 | outside of :obj:`initial_bounds`, then the resulting solution may be incorrect. 41 | 42 | """ 43 | 44 | def get_range(bounded: Bounded) -> Range: 45 | return bounded.bounds() 46 | 47 | self._unprocessed: Iterator[B] = possibilities 48 | 49 | # Heap to track the ranges with the lowest upper bound 50 | self._untightened: FibonacciHeap[B, Range] = FibonacciHeap(key=get_range) 51 | 52 | # Fully tightened (`definitive`) ranges, sorted by increasing bound 53 | self._tightened: FibonacciHeap[B, Range] = FibonacciHeap(key=get_range) 54 | 55 | if initial_bounds is None: 56 | self.initial_bounds = Range(NEGATIVE_INFINITY, POSITIVE_INFINITY) 57 | else: 58 | self.initial_bounds = initial_bounds 59 | 60 | def __bool__(self): 61 | """Returns whether or not this search's bounds are :meth:`definitive`.""" 62 | return bool(self._unprocessed or ((self._untightened or self._tightened) and not self.bounds().definitive())) 63 | 64 | @property 65 | def best_match(self) -> Optional[B]: 66 | """Returns the best solution the search has thus found. 67 | 68 | Returns: 69 | Optional[B]: The best solution the search has thus found, or :const:`None` if it has not yet found a 70 | feasible solution. 71 | 72 | """ 73 | if self._unprocessed is not None or not (self._untightened or self._tightened): 74 | return None 75 | elif self._tightened and self._untightened: 76 | if self._untightened.peek().bounds() < self._tightened.peek().bounds(): 77 | return self._untightened.peek() 78 | else: 79 | return self._tightened.peek() 80 | elif self._tightened: 81 | return self._tightened.peek() 82 | else: 83 | return self._untightened.peek() 84 | 85 | def remove_best(self) -> Optional[B]: 86 | """Removes and returns the current best solution found by the search, if one exists. 87 | 88 | This enables one to iteratively sort the input sequence. However, this function is only guaranteed to return 89 | the globally optimal item if :meth:`IterativeTighteningSearch.goal_test` returns :const:`True`. Therefore, 90 | to generate a total ordering over the input sequence, you should tighten bounds until the goal is reached before 91 | each call to this function:: 92 | 93 | while search.tighten_bounds(): 94 | while not search.goal_test() and search.tighten_bounds(): 95 | pass 96 | if search.goal_test(): 97 | yield search.remove_best() 98 | while search.goal_test(): 99 | yield search.remove_best() 100 | 101 | However, if your goal is to produce a total ordering, :func:`graphtage.bounds.sort` is more efficient. 102 | 103 | """ 104 | if self._unprocessed is not None or not (self._untightened or self._tightened): 105 | return None 106 | elif self._tightened and self._untightened: 107 | if self._untightened.peek().bounds() < self._tightened.peek().bounds(): 108 | heap = self._untightened 109 | else: 110 | heap = self._tightened 111 | elif self._tightened: 112 | heap = self._tightened 113 | else: 114 | heap = self._untightened 115 | return heap.pop() 116 | 117 | def search(self) -> B: 118 | """Finds and returns the smallest item, fully tightened. 119 | 120 | This is equivalent to:: 121 | 122 | while self.tighten_bounds(): 123 | pass 124 | return self.best_match 125 | 126 | """ 127 | while self.tighten_bounds(): 128 | pass 129 | return self.best_match 130 | 131 | def _nodes(self) -> Iterator[HeapNode[B, Range]]: 132 | yield from self._untightened.nodes() 133 | yield from self._tightened.nodes() 134 | 135 | def bounds(self) -> Range: 136 | if self.best_match is None: 137 | return self.initial_bounds 138 | else: 139 | if self._unprocessed is None and (self._untightened or self._tightened): 140 | lb = POSITIVE_INFINITY 141 | for node in self._nodes(): 142 | if not node.deleted: 143 | lb = min(node.key.lower_bound, lb) 144 | if lb == POSITIVE_INFINITY or lb < self.initial_bounds.lower_bound: 145 | lb = self.initial_bounds.lower_bound 146 | else: 147 | lb = self.initial_bounds.lower_bound 148 | return Range(min(lb, self.best_match.bounds().upper_bound), self.best_match.bounds().upper_bound) 149 | 150 | def _delete_node(self, node: HeapNode[B, Range]): 151 | self._untightened.decrease_key(node, Range(NEGATIVE_INFINITY, NEGATIVE_INFINITY)) 152 | self._untightened.pop() 153 | node.deleted = True 154 | 155 | def _update_bounds(self, node: HeapNode[B, Range]): 156 | if self.best_match is not None \ 157 | and self.best_match != node.item \ 158 | and self.best_match.bounds().dominates(node.item.bounds()): 159 | self._delete_node(node) 160 | return 161 | elif self.initial_bounds.dominates(node.item.bounds()): 162 | self._delete_node(node) 163 | return 164 | bounds: Range = node.item.bounds() 165 | if bounds.definitive(): 166 | self._delete_node(node) 167 | self._tightened.push(node.item) 168 | elif bounds.lower_bound > node.key.lower_bound: 169 | # The lower bound increased, so we need to remove and re-add the node 170 | # because the Fibonacci heap only permits making keys smaller 171 | self._untightened.decrease_key(node, Range(NEGATIVE_INFINITY, NEGATIVE_INFINITY)) 172 | self._untightened.pop() 173 | self._untightened.push(node.item) 174 | 175 | def goal_test(self) -> bool: 176 | """Returns whether :meth:`best_match` is the optimal solution.""" 177 | if self._unprocessed is not None: 178 | return False 179 | best = self.best_match 180 | return best is not None and best.bounds().dominates(self.bounds()) 181 | 182 | def tighten_bounds(self) -> bool: 183 | starting_bounds = self.bounds() 184 | while True: 185 | if self._unprocessed is not None: 186 | try: 187 | next_best: B = next(self._unprocessed) 188 | if self.initial_bounds.lower_bound > NEGATIVE_INFINITY and \ 189 | self.initial_bounds.lower_bound >= next_best.bounds().upper_bound: 190 | # We can't do any better than this choice! 191 | self._unprocessed = None 192 | self._untightened.clear() 193 | self._tightened.clear() 194 | if next_best.bounds().definitive(): 195 | self._tightened.push(next_best) 196 | else: 197 | self._untightened.push(next_best) 198 | return True 199 | if starting_bounds.dominates(next_best.bounds()) or \ 200 | (self.best_match is not None 201 | and self.best_match.bounds().dominates(next_best.bounds())) or \ 202 | self.initial_bounds.dominates(next_best.bounds()): 203 | # No need to add this new edit if it is strictly worse than the current best! 204 | pass 205 | if next_best.bounds().definitive(): 206 | self._tightened.push(next_best) 207 | else: 208 | self._untightened.push(next_best) 209 | except StopIteration: 210 | self._unprocessed = None 211 | tightened = False 212 | if self._untightened: 213 | if self._unprocessed is None: 214 | if len(self._untightened) == 1: 215 | untightened = self._untightened.peek() 216 | if untightened.tighten_bounds() and untightened.bounds().definitive(): 217 | self._untightened.clear() 218 | self._tightened.push(untightened) 219 | if self.goal_test(): 220 | best = self.best_match 221 | self._untightened.clear() 222 | self._tightened.clear() 223 | ret = best.tighten_bounds() 224 | if best.bounds().definitive(): 225 | self._tightened.push(best) 226 | else: 227 | self._untightened.push(best) 228 | assert self.best_match == best 229 | return ret 230 | for node in list(self._untightened.min_node): 231 | if node.deleted: 232 | continue 233 | tightened = node.item.tighten_bounds() 234 | if tightened: 235 | self._update_bounds(node) 236 | break 237 | if starting_bounds.lower_bound < self.bounds().lower_bound \ 238 | or starting_bounds.upper_bound > self.bounds().upper_bound: 239 | return True 240 | elif self._unprocessed is None and not tightened: 241 | return False 242 | -------------------------------------------------------------------------------- /graphtage/version.py: -------------------------------------------------------------------------------- 1 | """A module that centralizes the version information for Graphtage. 2 | 3 | Changing the version here not only affects the version printed with the ``--version`` command line option, but it also 4 | automatically updates the version used in ``setup.py`` and rendered in the documentation. 5 | 6 | Attributes: 7 | DEV_BUILD (bool): Sets whether this build is a development build. 8 | This should only be set to :const:`True` to coincide with a release. It should *always* be :const:`True` before 9 | deploying to PyPI. 10 | 11 | If :const:`False`, the git branch will be included in :attr:`graphtage.version.__version__`. 12 | 13 | __version__ (Tuple[Union[int, str], ...]): The version of Graphtage. This tuple can contain any sequence of ints and 14 | strings. Typically this will be three ints: major/minor/revision number. However, it can contain additional 15 | ints and strings. If :attr:`graphtage.version.DEV_BUILD`, then `("git", git_branch())` will be appended to the 16 | version. 17 | 18 | VERSION_STRING (str): A rendered string containing the version of Graphtage. Each element of 19 | :attr:`graphtage.version.__version__` is appended to the string, delimited by a "." if the element is an ``int`` 20 | or a "-" if the element is a string. 21 | 22 | """ 23 | 24 | import os 25 | import subprocess 26 | from typing import Optional, Tuple, Union 27 | 28 | 29 | def git_branch() -> Optional[str]: 30 | """Returns the git branch for the codebase, or :const:`None` if it could not be determined. 31 | 32 | The git branch is determined by running 33 | 34 | .. code-block:: console 35 | 36 | $ git symbolic-ref -q HEAD 37 | 38 | """ 39 | try: 40 | branch = subprocess.check_output( 41 | ['git', 'symbolic-ref', '-q', 'HEAD'], 42 | cwd=os.path.dirname(os.path.realpath(__file__)), 43 | stderr=subprocess.DEVNULL 44 | ) 45 | branch = branch.decode('utf-8').strip().split('/')[-1] 46 | return branch 47 | except Exception: 48 | return None 49 | 50 | 51 | DEV_BUILD = False 52 | """Sets whether this build is a development build. 53 | 54 | This should only be set to :const:`False` to coincide with a release. It should *always* be :const:`False` before 55 | deploying to PyPI. 56 | 57 | If :const:`True`, the git branch will be included in the version string. 58 | 59 | """ 60 | 61 | 62 | __version__: Tuple[Union[int, str], ...] = (0, 3, 1) 63 | 64 | if DEV_BUILD: 65 | branch_name = git_branch() 66 | if branch_name is None: 67 | __version__ = __version__ + ('git',) 68 | else: 69 | __version__ = __version__ + ('git', branch_name) 70 | 71 | VERSION_STRING = '' 72 | 73 | for element in __version__: 74 | if isinstance(element, int): 75 | if VERSION_STRING: 76 | VERSION_STRING += f'.{element}' 77 | else: 78 | VERSION_STRING = str(element) 79 | else: 80 | if VERSION_STRING: 81 | VERSION_STRING += f'-{element!s}' 82 | else: 83 | VERSION_STRING += str(element) 84 | 85 | 86 | if __name__ == '__main__': 87 | print(VERSION_STRING) 88 | -------------------------------------------------------------------------------- /graphtage/yaml.py: -------------------------------------------------------------------------------- 1 | """A :class:`graphtage.Filetype` for parsing, diffing, and rendering YAML files.""" 2 | import os 3 | from io import StringIO 4 | from typing import Optional, Union 5 | 6 | from yaml import dump, load_all, YAMLError 7 | try: 8 | from yaml import CLoader as Loader, CDumper as Dumper 9 | except ImportError: 10 | from yaml import Loader, Dumper 11 | 12 | from . import json 13 | from .edits import Insert, Match 14 | from .graphtage import BuildOptions, Filetype, KeyValuePairNode, LeafNode, ListNode, MappingNode, StringNode, \ 15 | StringEdit, StringFormatter 16 | from .printer import Fore, Printer 17 | from .sequences import SequenceFormatter, SequenceNode 18 | from .tree import ContainerNode, Edit, GraphtageFormatter, TreeNode 19 | 20 | 21 | def build_tree(path: str, options: Optional[BuildOptions] = None, *args, **kwargs) -> TreeNode: 22 | """Constructs a YAML tree from an YAML file.""" 23 | with open(path, 'rb') as stream: 24 | document_stream = load_all(stream, Loader=Loader) 25 | documents = list(document_stream) 26 | if len(documents) == 0: 27 | return json.build_tree(None, options=options, *args, **kwargs) 28 | elif len(documents) > 1: 29 | return json.build_tree(documents, options=options, *args, **kwargs) 30 | else: 31 | singleton = documents[0] 32 | return json.build_tree(singleton, options=options, *args, **kwargs) 33 | 34 | 35 | class YAMLListFormatter(SequenceFormatter): 36 | is_partial = True 37 | 38 | def __init__(self): 39 | super().__init__('', '', '') 40 | 41 | def print_SequenceNode(self, printer: Printer, node: SequenceNode): 42 | self.parent.print(printer, node) 43 | 44 | def print_ListNode(self, printer: Printer, *args, **kwargs): 45 | printer.newline() 46 | super().print_SequenceNode(printer, *args, **kwargs) 47 | 48 | def edit_print(self, printer: Printer, edit: Edit): 49 | printer.indents += 1 50 | self.print(printer, edit) 51 | printer.indents -= 1 52 | 53 | def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False): 54 | if not is_last: 55 | if not is_first: 56 | printer.newline() 57 | with printer.bright().color(Fore.WHITE): 58 | printer.write('- ') 59 | 60 | def items_indent(self, printer: Printer): 61 | return printer 62 | 63 | 64 | class YAMLKeyValuePairFormatter(GraphtageFormatter): 65 | is_partial = True 66 | 67 | def print_KeyValuePairNode(self, printer: Printer, node: KeyValuePairNode): 68 | if printer.context().fore is None: 69 | with printer.color(Fore.BLUE) as p: 70 | self.print(p, node.key) 71 | else: 72 | self.print(printer, node.key) 73 | with printer.bright().color(Fore.CYAN): 74 | printer.write(": ") 75 | if isinstance(node.value, MappingNode): 76 | printer.newline() 77 | printer.indents += 1 78 | self.parent.print(printer, node.value) 79 | printer.indents -= 1 80 | elif isinstance(node.value, SequenceNode): 81 | self.parent.parent.print(printer, node.value) 82 | else: 83 | self.print(printer, node.value) 84 | 85 | 86 | class YAMLDictFormatter(SequenceFormatter): 87 | is_partial = True 88 | sub_format_types = [YAMLKeyValuePairFormatter] 89 | 90 | def __init__(self): 91 | super().__init__('', '', '') 92 | 93 | def print_MultiSetNode(self, *args, **kwargs): 94 | super().print_SequenceNode(*args, **kwargs) 95 | 96 | def print_MappingNode(self, *args, **kwargs): 97 | super().print_SequenceNode(*args, **kwargs) 98 | 99 | def print_SequenceNode(self, *args, **kwargs): 100 | self.parent.print(*args, **kwargs) 101 | 102 | def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False): 103 | if not is_first and not is_last: 104 | printer.newline() 105 | 106 | def items_indent(self, printer: Printer): 107 | return printer 108 | 109 | 110 | class YAMLStringFormatter(StringFormatter): 111 | is_partial = True 112 | has_newline = False 113 | 114 | def write_start_quote(self, printer: Printer, edit: StringEdit): 115 | for sub_edit in edit.edit_distance.edits(): 116 | if isinstance(sub_edit, Match) and '\n' in sub_edit.from_node.object: 117 | self.has_newline = True 118 | break 119 | elif isinstance(sub_edit, Insert) and '\n' in sub_edit.from_node.object: 120 | self.has_newline = True 121 | break 122 | else: 123 | self.has_newline = False 124 | if self.has_newline: 125 | printer.write('|') 126 | printer.indents += 1 127 | printer.newline() 128 | 129 | def context(self, printer: Printer): 130 | return printer 131 | 132 | def write_end_quote(self, printer: Printer, edit: StringEdit): 133 | if self.has_newline: 134 | printer.indents -= 1 135 | 136 | def print_StringNode(self, printer: Printer, node: 'StringNode'): 137 | s = node.object 138 | if '\n' in s: 139 | if printer.context().fore is None: 140 | context = printer.color(Fore.CYAN) 141 | else: 142 | context = printer 143 | with context as c: 144 | c.write('|') 145 | with c.indent(): 146 | lines = s.split('\n') 147 | if lines[-1] == '': 148 | # Remove trailing newline 149 | lines = lines[:-1] 150 | for line in lines: 151 | c.newline() 152 | self.parent.write_obj(c, line) 153 | else: 154 | self.parent.write_obj(printer, s) 155 | 156 | def write_char(self, printer: Printer, c: str, index: int, num_edits: int, removed=False, inserted=False): 157 | if c == '\n': 158 | if removed or inserted: 159 | super().write_char(printer, '\u23CE', index, num_edits, removed, inserted) 160 | if not removed and index < num_edits - 1: 161 | # Do not print a trailing newline 162 | printer.newline() 163 | else: 164 | super().write_char(printer, c, index, num_edits, removed, inserted) 165 | 166 | 167 | class YAMLFormatter(GraphtageFormatter): 168 | sub_format_types = [YAMLStringFormatter, YAMLDictFormatter, YAMLListFormatter] 169 | 170 | def print(self, printer: Printer, *args, **kwargs): 171 | # YAML only gets a two-space indent 172 | printer.indent_str = ' ' 173 | super().print(printer, *args, **kwargs) 174 | 175 | @staticmethod 176 | def write_obj(printer: Printer, obj): 177 | if obj == '': 178 | return 179 | s = StringIO() 180 | dump(obj, stream=s, Dumper=Dumper) 181 | ret = s.getvalue() 182 | if isinstance(obj, str) and obj.strip().startswith('#'): 183 | if ret.startswith("'"): 184 | ret = ret[1:] 185 | if ret.endswith("\n"): 186 | ret = ret[:-1] 187 | if ret.endswith("'"): 188 | ret = ret[:-1] 189 | if ret.endswith('\n...\n'): 190 | ret = ret[:-len('\n...\n')] 191 | elif ret.endswith('\n'): 192 | ret = ret[:-1] 193 | printer.write(ret) 194 | 195 | def print_LeafNode(self, printer: Printer, node: LeafNode): 196 | self.write_obj(printer, node.object) 197 | 198 | def print_ContainerNode(self, printer: Printer, node: ContainerNode): 199 | """Prints a :class:`graphtage.ContainerNode`. 200 | 201 | This is a fallback to permit the printing of custom containers, like :class:`graphtage.xml.XMLElement`. 202 | 203 | """ 204 | # Treat the container like a list 205 | list_node = ListNode(node.children()) 206 | self.print(printer, list_node) 207 | 208 | 209 | class YAML(Filetype): 210 | """The YAML filetype.""" 211 | def __init__(self): 212 | """Initializes the YAML file type. 213 | 214 | By default, YAML associates itself with the "yaml", "application/x-yaml", "application/yaml", "text/yaml", 215 | "text/x-yaml", and "text/vnd.yaml" MIME types. 216 | 217 | """ 218 | super().__init__( 219 | 'yaml', 220 | 'application/x-yaml', 221 | 'application/yaml', 222 | 'text/yaml', 223 | 'text/x-yaml', 224 | 'text/vnd.yaml' 225 | ) 226 | 227 | def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode: 228 | tree = build_tree(path=path, options=options) 229 | for node in tree.dfs(): 230 | if isinstance(node, StringNode): 231 | node.quoted = False 232 | return tree 233 | 234 | def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]: 235 | try: 236 | return self.build_tree(path=path, options=options) 237 | except YAMLError as ye: 238 | return f'Error parsing {os.path.basename(path)}: {ye})' 239 | 240 | def get_default_formatter(self) -> YAMLFormatter: 241 | return YAMLFormatter.DEFAULT_INSTANCE 242 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | 4 | HERE = os.path.realpath(os.path.dirname(__file__)) 5 | 6 | VERSION_MODULE_PATH = os.path.join(HERE, "graphtage", "version.py") 7 | README_PATH = os.path.join(HERE, "README.md") 8 | 9 | 10 | def get_version_string(): 11 | version = {} 12 | with open(VERSION_MODULE_PATH) as f: 13 | exec(f.read(), version) 14 | return version['VERSION_STRING'] 15 | 16 | 17 | def get_readme(): 18 | with open(README_PATH, encoding='utf-8') as f: 19 | return f.read() 20 | 21 | 22 | setup( 23 | name='graphtage', 24 | description='A utility to diff tree-like files such as JSON and XML.', 25 | license="LGPL-3.0-or-later", 26 | long_description=get_readme(), 27 | long_description_content_type="text/markdown", 28 | url='https://github.com/trailofbits/graphtage', 29 | project_urls={ 30 | 'Documentation': 'https://trailofbits.github.io/graphtage', 31 | 'Source': 'https://github.com/trailofbits/graphtage', 32 | 'Tracker': 'https://github.com/trailofbits/graphtage/issues', 33 | }, 34 | author='Trail of Bits', 35 | version=get_version_string(), 36 | packages=find_packages(exclude=['test']), 37 | python_requires='>=3.8', 38 | install_requires=[ 39 | "colorama", 40 | "fickling>=0.0.8", 41 | "intervaltree", 42 | "json5==0.9.5", 43 | "numpy>=1.19.4", 44 | "PyYAML", 45 | "scipy>=1.4.0", 46 | "tqdm", 47 | "typing_extensions>=3.7.4.3" 48 | ], 49 | entry_points={ 50 | 'console_scripts': [ 51 | 'graphtage = graphtage.__main__:main' 52 | ] 53 | }, 54 | extras_require={ 55 | "dev": [ 56 | "flake8", 57 | "Sphinx", 58 | "pytest", 59 | "sphinx_rtd_theme==1.2.2", 60 | "twine", 61 | # workaround for https://github.com/python/importlib_metadata/issues/406: 62 | "importlib_metadata<5; python_version == '3.7'" 63 | ] 64 | }, 65 | classifiers=[ 66 | 'Development Status :: 4 - Beta', 67 | 'Environment :: Console', 68 | 'Intended Audience :: Science/Research', 69 | 'License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)', 70 | 'Programming Language :: Python :: 3 :: Only', 71 | 'Topic :: Utilities' 72 | ], 73 | include_package_data=True 74 | ) 75 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trailofbits/graphtage/23654acf488eb803a60ce27ac515ee0755feb1a7/test/__init__.py -------------------------------------------------------------------------------- /test/test_bounds.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import Optional 3 | from unittest import TestCase 4 | 5 | from tqdm import trange 6 | 7 | from graphtage.bounds import Bounded, make_distinct, Range, sort 8 | 9 | 10 | class RandomDecreasingRange(Bounded): 11 | def __init__(self, fixed_lb: int = 0, fixed_ub: int = 2000000, final_value: Optional[int] = None): 12 | if final_value is None: 13 | self.final_value = random.randint(fixed_lb, fixed_lb + (fixed_ub - fixed_lb) // 2) 14 | elif final_value < fixed_lb: 15 | raise ValueError(f"final_value of {final_value} < fixed lower bound of {fixed_lb}") 16 | elif final_value > fixed_ub: 17 | raise ValueError(f"final_value of {final_value} > fixed upper bound of {fixed_ub}") 18 | else: 19 | self.final_value = final_value 20 | self._lb = random.randint(fixed_lb, self.final_value) 21 | self._ub = random.randint(self.final_value, fixed_ub) 22 | self.tightenings: int = 0 23 | 24 | def bounds(self) -> Range: 25 | return Range(self._lb, self._ub) 26 | 27 | def tighten_bounds(self) -> bool: 28 | bounds_before = self.bounds() 29 | lb_diff = self.final_value - self._lb 30 | ub_diff = self._ub - self.final_value 31 | if lb_diff == ub_diff == 0: 32 | return False 33 | if lb_diff <= 1: 34 | self._lb = self.final_value 35 | else: 36 | self._lb += random.randint(max(int(0.5 * lb_diff), 1), lb_diff) 37 | if ub_diff <= 1: 38 | self._ub = self.final_value 39 | else: 40 | self._ub -= random.randint(max(int(0.5 * ub_diff), 1), ub_diff) 41 | if bounds_before.lower_bound < self._lb or bounds_before.upper_bound > self._ub: 42 | self.tightenings += 1 43 | return True 44 | else: 45 | return False 46 | 47 | def __repr__(self): 48 | return repr(self.bounds()) 49 | 50 | 51 | class TestBounds(TestCase): 52 | def test_random_decreasing_range(self): 53 | for _ in range(1000): 54 | r = RandomDecreasingRange() 55 | last_range = r.bounds() 56 | while r.tighten_bounds(): 57 | next_range = r.bounds() 58 | self.assertTrue(next_range.lower_bound >= last_range.lower_bound 59 | and next_range.upper_bound <= last_range.upper_bound 60 | and ( 61 | next_range.lower_bound > last_range.lower_bound or 62 | next_range.upper_bound < last_range.upper_bound 63 | )) 64 | last_range = next_range 65 | 66 | def test_sort(self): 67 | for _ in trange(100): 68 | ranges = [RandomDecreasingRange() for _ in range(100)] 69 | sorted_ranges = sorted(ranges, key=lambda r: r.final_value) 70 | for expected, actual in zip(sorted_ranges, sort(ranges)): 71 | self.assertEqual(expected.final_value, actual.final_value) 72 | 73 | def test_make_distinct(self): 74 | speedups = 0 75 | tests = 0 76 | try: 77 | with trange(0, 100) as t: 78 | for i in t: 79 | ranges = [RandomDecreasingRange() for _ in range(i)] 80 | make_distinct(*ranges) 81 | last_range = None 82 | for r in sort(ranges): 83 | rbounds = r.bounds() 84 | if last_range is not None: 85 | self.assertTrue((last_range.definitive() and rbounds.definitive() and last_range == rbounds) or 86 | last_range.upper_bound < rbounds.lower_bound, 87 | f"{last_range!r} was followed by {rbounds!r}") 88 | last_range = rbounds 89 | tightenings = sum(r.tightenings for r in ranges) 90 | if tightenings > 0: 91 | untightened = 0 92 | for r in ranges: 93 | t_before = r.tightenings 94 | while r.tighten_bounds(): 95 | pass 96 | untightened += r.tightenings - t_before 97 | t.desc = f"{(untightened + tightenings) / tightenings:.01f}x Speedup" 98 | speedups += (untightened + tightenings) / tightenings 99 | tests += 1 100 | finally: 101 | print(f"Average speedup: {speedups / tests:.01f}x") 102 | -------------------------------------------------------------------------------- /test/test_builder.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from unittest import TestCase 3 | 4 | from graphtage import IntegerNode, ListNode, TreeNode 5 | from graphtage.builder import BasicBuilder, Builder 6 | 7 | 8 | class TestBuilder(TestCase): 9 | def test_basic_builder(self): 10 | result = BasicBuilder().build_tree([1, "a", (2, "b"), {1, 2}, {"a": "b"}, None]) 11 | self.assertIsInstance(result, ListNode) 12 | self.assertEqual(6, len(result.children())) 13 | 14 | def test_custom_builder(self): 15 | test = self 16 | 17 | class Foo: 18 | def __init__(self, bar): 19 | self.bar = bar 20 | 21 | class Tester(BasicBuilder): 22 | @Builder.expander(Foo) 23 | def expand_foo(self, obj: Foo): 24 | yield obj.bar 25 | 26 | @Builder.builder(Foo) 27 | def build_foo(self, obj: Foo, children: List[TreeNode]): 28 | test.assertEqual(1, len(children)) 29 | return children[0] 30 | 31 | tree = Tester().build_tree(Foo(10)) 32 | self.assertIsInstance(tree, IntegerNode) 33 | self.assertEqual(10, tree.object) 34 | -------------------------------------------------------------------------------- /test/test_constraints.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | import graphtage 4 | from graphtage.constraints import MatchIf, MatchUnless 5 | from graphtage.json import build_tree 6 | from graphtage import expressions 7 | 8 | 9 | class TestConstraints(TestCase): 10 | def test_match_if(self): 11 | expr = expressions.parse("from.key == 'foo' && to.key == 'bar'") 12 | from_tree = build_tree({ 13 | "foo": [1, 2, 3] 14 | }) 15 | for node in from_tree.dfs(): 16 | MatchIf.apply(node, expr) 17 | to_tree = build_tree({ 18 | "bar": [1, 2, 4] 19 | }) 20 | diff = from_tree.diff(to_tree) 21 | self.assertIsInstance(diff.edit, graphtage.Replace) 22 | 23 | def test_match_unless(self): 24 | expr = expressions.parse("from.key == 'foo' && to.key == 'bar'") 25 | from_tree = build_tree({ 26 | "foo": [1, 2, 3] 27 | }) 28 | for node in from_tree.dfs(): 29 | MatchUnless.apply(node, expr) 30 | to_tree = build_tree({ 31 | "bar": [1, 2, 4] 32 | }) 33 | diff = from_tree.diff(to_tree) 34 | self.assertIsInstance(diff.edit, graphtage.MultiSetEdit) 35 | -------------------------------------------------------------------------------- /test/test_dataclasses.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from graphtage import IntegerNode, Replace, StringNode 4 | from graphtage.dataclasses import DataClassEdit, DataClassNode 5 | 6 | 7 | class TestDataclasses(TestCase): 8 | def test_inheritance(self): 9 | class Foo(DataClassNode): 10 | foo: IntegerNode 11 | initialized = False 12 | 13 | def post_init(self): 14 | self.initialized = True 15 | 16 | class Bar(Foo): 17 | bar: StringNode 18 | initialized = False 19 | 20 | def post_init(self): 21 | self.initialized = True 22 | 23 | self.assertEqual(("foo",), Foo._SLOTS) 24 | self.assertEqual(0, len(Foo._DATA_CLASS_ANCESTORS)) 25 | self.assertEqual(("foo", "bar",), Bar._SLOTS) 26 | self.assertEqual(1, len(Bar._DATA_CLASS_ANCESTORS)) 27 | 28 | b = Bar(foo=IntegerNode(10), bar=StringNode("bar")) 29 | self.assertEqual(10, b.foo.object) 30 | self.assertEqual("bar", b.bar.object) 31 | self.assertTrue(b.initialized) 32 | 33 | # now test a mixture of positional and keyword arguments 34 | b = Bar(StringNode("bar"), foo=IntegerNode(10)) 35 | self.assertEqual(10, b.foo.object) 36 | self.assertEqual("bar", b.bar.object) 37 | self.assertTrue(b.initialized) 38 | 39 | # test equality 40 | self.assertEqual(Bar(IntegerNode(10), StringNode("bar")), b) 41 | self.assertNotEqual(Bar(IntegerNode(11), StringNode("bar")), b) 42 | 43 | # test diffing of different dataclasses 44 | f = Foo(IntegerNode(10)) 45 | edit = f.edits(b) 46 | self.assertIsInstance(edit, Replace) 47 | c = Foo(IntegerNode(12)) 48 | edit = f.edits(c) 49 | self.assertIsInstance(edit, DataClassEdit) 50 | 51 | def test_inheritance_with_duplicate(self): 52 | def define_duplicate(): 53 | class BaseFoo(DataClassNode): 54 | foo: StringNode 55 | 56 | class DuplicateFoo(BaseFoo): 57 | bar: IntegerNode 58 | foo: IntegerNode 59 | 60 | self.assertRaises(TypeError, define_duplicate) 61 | 62 | def test_runtime_type_checking(self): 63 | class Foo(DataClassNode): 64 | foo: IntegerNode 65 | 66 | def try_wrong_type(): 67 | return Foo(StringNode("foo")) 68 | 69 | self.assertRaises(ValueError, try_wrong_type) 70 | -------------------------------------------------------------------------------- /test/test_expressions.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from graphtage.expressions import parse, ParseError, StringToken 4 | 5 | 6 | class TestExpressions(TestCase): 7 | def test_string_parsing(self): 8 | input_str = 'This is a test' 9 | ret = parse(f'"{input_str}"').eval() 10 | self.assertIsInstance(ret, StringToken) 11 | self.assertEqual(input_str, str(ret)) 12 | 13 | def test_string_escaping(self): 14 | input_str = 'foo " bar' 15 | escaped_input = input_str.replace('"', '\\"') 16 | ret = parse(f'"{escaped_input}"').eval() 17 | self.assertIsInstance(ret, StringToken) 18 | self.assertEqual(input_str, str(ret)) 19 | with self.assertRaises(ParseError): 20 | parse(f'{input_str}') 21 | 22 | def test_getitem(self): 23 | self.assertEqual(1234, parse('foo[(bar + 10) * 2]').eval({ 24 | 'foo': { 25 | 40: 1234 26 | }, 27 | 'bar': 10 28 | })) 29 | 30 | def test_bracket_parsing(self): 31 | with self.assertRaises(ParseError): 32 | parse('foo[bar(])') 33 | with self.assertRaises(ParseError): 34 | parse('(bar[)]') 35 | 36 | def test_evaluation(self): 37 | assignments = { 38 | 'sampling_factors': 1234, 39 | 'thumbnail_x': 5, 40 | 'thumbnail_y': 7 41 | } 42 | self.assertEqual(65, parse('(sampling_factors & -0xf0) >> 4').eval(assignments)) 43 | self.assertEqual(105, parse('thumbnail_x * thumbnail_y * 3').eval(assignments)) 44 | 45 | def test_functions(self): 46 | self.assertEqual(sum([1, 2, 3, 4]), parse('sum([1, 2, 3, 4])').eval()) 47 | self.assertEqual('a, b, c, d', parse('", ".join(["a", "b", "c", "d"])').eval()) 48 | 49 | def test_member_access(self): 50 | class Foo: 51 | def __init__(self, bar): 52 | self.bar = bar 53 | 54 | assignments = { 55 | 'foo': Foo(1234) 56 | } 57 | 58 | self.assertEqual(1234, parse('foo.bar').eval(assignments)) 59 | with self.assertRaises(ParseError): 60 | parse('foo.__dict__').eval(assignments) 61 | 62 | def test_containers(self): 63 | self.assertEqual([[1, (3,)]], parse('[[1, (3,)]]').eval()) 64 | self.assertEqual([1, 2, 3, 4], parse('[1, 2, 3, 4]').eval()) 65 | self.assertEqual((1, 2, 3, 4), parse('(1, 2, 3, 4)').eval()) 66 | self.assertEqual([[1, 2, [3], 4]], parse('[[1, 2, [3], 4]]').eval()) 67 | self.assertEqual((1,), parse('(1,)').eval()) 68 | self.assertEqual([1], parse('[1]').eval()) 69 | with self.assertRaises(ParseError): 70 | self.assertEqual([1], parse('[1,]').eval()) 71 | -------------------------------------------------------------------------------- /test/test_fibonacci.py: -------------------------------------------------------------------------------- 1 | import random 2 | from collections import defaultdict 3 | from typing import Callable, Dict, List, Optional, Set 4 | from unittest import TestCase 5 | 6 | from tqdm import tqdm, trange 7 | 8 | from graphtage.fibonacci import FibonacciHeap, HeapNode, MaxFibonacciHeap 9 | 10 | 11 | class TestFibonacciHeap(TestCase): 12 | @classmethod 13 | def setUpClass(cls): 14 | cls.random_list: List[int] = [random.randint(0, 10000) for _ in range(10000)] 15 | cls.sorted_list: List[int] = sorted(cls.random_list) 16 | 17 | def test_duplicate_items(self): 18 | heap = FibonacciHeap() 19 | test_list = [2, 1, 2] 20 | for i in test_list: 21 | heap.push(i) 22 | heap_sorted = [heap.pop() for _ in range(len(test_list))] 23 | self.assertEqual(sorted(test_list), heap_sorted) 24 | 25 | def random_heap(self) -> FibonacciHeap[int, int]: 26 | heap: FibonacciHeap[int, int] = FibonacciHeap() 27 | for rand_int in self.random_list: 28 | heap.push(rand_int) 29 | return heap 30 | 31 | def random_max_heap(self, key: Optional[Callable[[int], int]] = None) -> MaxFibonacciHeap[int, int]: 32 | heap: FibonacciHeap[int, int] = MaxFibonacciHeap(key=key) 33 | for rand_int in self.random_list: 34 | heap.push(rand_int) 35 | return heap 36 | 37 | def test_fibonacci_heap(self): 38 | heap = self.random_heap() 39 | heap_sorted = [heap.pop() for _ in range(len(self.random_list))] 40 | self.assertEqual(self.sorted_list, heap_sorted) 41 | 42 | def test_max_fibonacci_heap(self): 43 | heap = self.random_max_heap() 44 | heap_sorted = [heap.pop() for _ in range(len(self.random_list))] 45 | self.assertEqual(list(reversed(self.sorted_list)), heap_sorted) 46 | 47 | def test_max_fibonacci_heap_with_key(self): 48 | heap = self.random_max_heap(key=lambda i: -i) 49 | heap_sorted = [heap.pop() for _ in range(len(self.random_list))] 50 | self.assertEqual(self.sorted_list, heap_sorted) 51 | 52 | def test_node_traversal(self): 53 | heap = self.random_heap() 54 | self.assertEqual(sum(1 for _ in heap.nodes()), len(heap)) 55 | 56 | def test_manual_node_deletion(self): 57 | heap = self.random_heap() 58 | for i in trange(len(self.random_list)//20): 59 | random_node: HeapNode[int, int] = random.choice(list(heap.nodes())) 60 | heap.decrease_key(random_node, -1) 61 | heap.pop() 62 | random_node.deleted = True 63 | self.assertEqual(len(heap), len(self.random_list) - i - 1) 64 | 65 | def test_node_deletion(self): 66 | heap = self.random_heap() 67 | for i in trange(len(self.random_list)//20): 68 | random_node: HeapNode[int, int] = random.choice(list(heap.nodes())) 69 | heap.remove(random_node) 70 | self.assertEqual(len(heap), len(self.random_list) - i - 1) 71 | 72 | def test_decrease_key(self): 73 | heap = self.random_heap() 74 | nodes_by_value: Dict[int, Set[HeapNode[int, int]]] = defaultdict(set) 75 | for node in heap.nodes(): 76 | nodes_by_value[node.key].add(node) 77 | changes: Dict[int, int] = {} 78 | for _ in trange(len(self.random_list)//20): 79 | while True: 80 | random_sorted_index = random.randint(0, len(self.random_list) - 1) 81 | if random_sorted_index not in changes: 82 | break 83 | random_node: HeapNode[int, int] = next(iter(nodes_by_value[self.sorted_list[random_sorted_index]])) 84 | self.assertEqual(random_node.key, self.sorted_list[random_sorted_index]) 85 | if random_node.key <= 0: 86 | continue 87 | new_key = random.randint(0, random_node.key - 1) 88 | nodes_by_value[random_node.key].remove(random_node) 89 | nodes_by_value[new_key].add(random_node) 90 | changes[random_sorted_index] = new_key 91 | heap.decrease_key(random_node, new_key) 92 | updated_list = [] 93 | for i, expected in enumerate(self.sorted_list): 94 | if i in changes: 95 | updated_list.append(changes[i]) 96 | else: 97 | updated_list.append(expected) 98 | expected_list = sorted(updated_list) 99 | for expected in tqdm(expected_list): 100 | node = heap.min_node 101 | heap.pop() 102 | self.assertEqual(node.key, expected) 103 | -------------------------------------------------------------------------------- /test/test_formatting.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | import plistlib 4 | import random 5 | from functools import partial, wraps 6 | from io import StringIO 7 | from typing import FrozenSet, Optional, Tuple, Type, Union 8 | from unittest import TestCase 9 | 10 | import yaml 11 | from tqdm import trange 12 | 13 | import graphtage 14 | from graphtage import xml 15 | 16 | 17 | STR_BYTES: FrozenSet[str] = frozenset([ 18 | chr(i) for i in range(32, 127) 19 | ] + ['\n', '\t', '\r']) 20 | LETTERS: Tuple[str, ...] = tuple( 21 | chr(i) for i in range(ord('a'), ord('z')) 22 | ) + tuple( 23 | chr(i) for i in range(ord('A'), ord('Z')) 24 | ) 25 | 26 | FILETYPE_TEST_PREFIX = 'test_' 27 | FILETYPE_TEST_SUFFIX = '_formatting' 28 | 29 | 30 | def filetype_test(test_func=None, *, test_equality: bool = True, iterations: int = 1000): 31 | if test_func is None: 32 | return partial(filetype_test, test_equality=test_equality, iterations=iterations) 33 | 34 | @wraps(test_func) 35 | def wrapper(self: 'TestFormatting'): 36 | name = test_func.__name__ 37 | if not name.startswith(FILETYPE_TEST_PREFIX): 38 | raise ValueError(f'@filetype_test {name} must start with "{FILETYPE_TEST_PREFIX}"') 39 | elif not name.endswith(FILETYPE_TEST_SUFFIX): 40 | raise ValueError(f'@filetype_test {name} must end with "{FILETYPE_TEST_SUFFIX}"') 41 | filetype_name = name[len(FILETYPE_TEST_PREFIX):-len(FILETYPE_TEST_SUFFIX)] 42 | if filetype_name not in graphtage.FILETYPES_BY_TYPENAME: 43 | raise ValueError(f'Filetype "{filetype_name}" for @filetype_test {name} not found in graphtage.FILETYPES_BY_TYPENAME') 44 | filetype = graphtage.FILETYPES_BY_TYPENAME[filetype_name] 45 | formatter = filetype.get_default_formatter() 46 | 47 | for _ in trange(iterations): 48 | orig_obj, representation = test_func(self) 49 | if isinstance(representation, str): 50 | representation = representation.encode("utf-8") 51 | with graphtage.utils.Tempfile(representation) as t: 52 | tree = filetype.build_tree(t) 53 | stream = StringIO() 54 | printer = graphtage.printer.Printer(out_stream=stream, ansi_color=False) 55 | formatter.print(printer, tree) 56 | formatted_str = stream.getvalue() 57 | with graphtage.utils.Tempfile(formatted_str.encode('utf-8')) as t: 58 | try: 59 | new_obj = filetype.build_tree(t) 60 | except Exception as e: 61 | self.fail(f"""{filetype_name.upper()} decode error {e}: Original object: 62 | {orig_obj!r} 63 | Expected format: 64 | {representation.decode("utf-8")} 65 | Actual format: 66 | {formatted_str!s}""") 67 | if test_equality: 68 | self.assertEqual(tree, new_obj) 69 | 70 | return wrapper 71 | 72 | 73 | class TestFormatting(TestCase): 74 | @staticmethod 75 | def make_random_int() -> int: 76 | return random.randint(-1000000, 1000000) 77 | 78 | @staticmethod 79 | def make_random_float() -> float: 80 | return random.random() 81 | 82 | @staticmethod 83 | def make_random_bool() -> bool: 84 | return random.choice([True, False]) 85 | 86 | @staticmethod 87 | def make_random_str(exclude_bytes: FrozenSet[str] = frozenset(), allow_empty_strings: bool = True) -> str: 88 | if allow_empty_strings: 89 | min_length = 0 90 | else: 91 | min_length = 1 92 | return ''.join(random.choices(list(STR_BYTES - exclude_bytes), k=random.randint(min_length, 128))) 93 | 94 | @staticmethod 95 | def make_random_non_container(exclude_bytes: FrozenSet[str] = frozenset(), allow_empty_strings: bool = True): 96 | return random.choice([ 97 | TestFormatting.make_random_int, 98 | TestFormatting.make_random_bool, 99 | TestFormatting.make_random_float, 100 | partial( 101 | TestFormatting.make_random_str, exclude_bytes=exclude_bytes, allow_empty_strings=allow_empty_strings 102 | ) 103 | ])() 104 | 105 | @staticmethod 106 | def _make_random_obj(obj_stack, force_container_type: Optional[Type[Union[dict, list]]] = None, *args, **kwargs): 107 | r = random.random() 108 | NON_CONTAINER_PROB = 0.1 109 | CONTAINER_PROB = (1.0 - NON_CONTAINER_PROB) / 2.0 110 | if r <= NON_CONTAINER_PROB: 111 | ret = TestFormatting.make_random_non_container(*args, **kwargs) 112 | elif r <= NON_CONTAINER_PROB + CONTAINER_PROB: 113 | if force_container_type is not None: 114 | ret = force_container_type() 115 | else: 116 | ret = [] 117 | obj_stack.append(ret) 118 | else: 119 | if force_container_type is not None: 120 | ret = force_container_type() 121 | else: 122 | ret = {} 123 | obj_stack.append(ret) 124 | return ret 125 | 126 | @staticmethod 127 | def make_random_obj( 128 | force_string_keys: bool = False, 129 | allow_empty_containers: bool = True, 130 | alternate_containers: bool = False, 131 | *args, **kwargs): 132 | obj_stack = [] 133 | ret = TestFormatting._make_random_obj(obj_stack, *args, **kwargs) 134 | 135 | while obj_stack: 136 | expanding = obj_stack.pop() 137 | size = int(random.betavariate(0.75, 5) * 10) 138 | if isinstance(expanding, dict): 139 | if size == 0 and not allow_empty_containers: 140 | if force_string_keys: 141 | expanding[TestFormatting.make_random_str(*args, **kwargs)] = \ 142 | TestFormatting.make_random_non_container(*args, **kwargs) 143 | else: 144 | expanding[TestFormatting.make_random_non_container(*args, **kwargs)] = \ 145 | TestFormatting.make_random_non_container(*args, **kwargs) 146 | else: 147 | if alternate_containers: 148 | force_container_type = list 149 | else: 150 | force_container_type = None 151 | for _ in range(size): 152 | if force_string_keys: 153 | expanding[TestFormatting.make_random_str(*args, **kwargs)] = \ 154 | TestFormatting._make_random_obj( 155 | obj_stack, force_container_type=force_container_type, *args, **kwargs 156 | ) 157 | else: 158 | expanding[TestFormatting.make_random_non_container(*args, **kwargs)] = \ 159 | TestFormatting._make_random_obj( 160 | obj_stack, force_container_type=force_container_type, *args, **kwargs 161 | ) 162 | else: 163 | if size == 0 and not allow_empty_containers: 164 | expanding.append(TestFormatting.make_random_non_container(*args, **kwargs)) 165 | else: 166 | if alternate_containers: 167 | force_container_type = dict 168 | else: 169 | force_container_type = None 170 | for _ in range(size): 171 | expanding.append(TestFormatting._make_random_obj( 172 | obj_stack, force_container_type=force_container_type, *args, **kwargs 173 | )) 174 | return ret 175 | 176 | def test_formatter_coverage(self): 177 | for name in graphtage.FILETYPES_BY_TYPENAME.keys(): 178 | if not hasattr(self, f'test_{name}_formatting'): 179 | self.fail(f"Filetype {name} is missing a `test_{name}_formatting` test function") 180 | 181 | @filetype_test 182 | def test_json_formatting(self): 183 | orig_obj = TestFormatting.make_random_obj(force_string_keys=True) 184 | return orig_obj, json.dumps(orig_obj) 185 | 186 | @filetype_test 187 | def test_csv_formatting(self): 188 | orig_obj = [ 189 | [TestFormatting.make_random_non_container( 190 | exclude_bytes=frozenset('\n\r\t,"\'') 191 | ) for _ in range(random.randint(0, 10))] 192 | for _ in range(random.randint(0, 10)) 193 | ] 194 | s = StringIO() 195 | writer = csv.writer(s) 196 | for row in orig_obj: 197 | writer.writerow(row) 198 | return orig_obj, s.getvalue() 199 | 200 | @staticmethod 201 | def make_random_xml() -> xml.XMLElementObj: 202 | ret = xml.XMLElementObj('', {}) 203 | elem_stack = [ret] 204 | while elem_stack: 205 | elem = elem_stack.pop() 206 | elem.tag = ''.join(random.choices(LETTERS, k=random.randint(1, 20))) 207 | elem.attrib = { 208 | ''.join(random.choices(LETTERS, k=random.randint(1, 10))): TestFormatting.make_random_str() 209 | for _ in range(int(random.betavariate(0.75, 5) * 10)) 210 | } 211 | if random.random() <= 0.5: 212 | elem.text = TestFormatting.make_random_str() 213 | elem.children = [xml.XMLElementObj('', {}) for _ in range(int(random.betavariate(0.75, 5) * 10))] 214 | elem_stack.extend(elem.children) 215 | return ret 216 | 217 | # Do not test equality for XML because the XMLFormatter auto-indents and thereby adds extra spaces to element text 218 | @filetype_test(test_equality=False, iterations=250) 219 | def test_xml_formatting(self): 220 | orig_obj = self.make_random_xml() 221 | return orig_obj, str(orig_obj) 222 | 223 | def test_html_formatting(self): 224 | # For now, HTML support is implemented through XML, so we don't need a separate test. 225 | # However, test_formatter_coverage will complain unless this function is here! 226 | pass 227 | 228 | def test_json5_formatting(self): 229 | # For now, JSON5 support is implemented using the regular JSON formatter, so we don't need a separate test. 230 | # However, test_formatter_coverage will complain unless this function is here! 231 | pass 232 | 233 | def test_pickle_formatting(self): 234 | # test_formatter_coverage will complain unless this function is here! 235 | # TODO: Implement a Pickle formatting test 236 | pass 237 | 238 | @filetype_test 239 | def test_yaml_formatting(self): 240 | orig_obj = TestFormatting.make_random_obj( 241 | allow_empty_containers=False, 242 | # The YAML formatter doesn't properly handle certain special characters 243 | # TODO: Relax the excluded bytes in the following argument once the formatter properly handles special chars 244 | exclude_bytes=frozenset('\t \\\'"\r:[]{}&\n()`|+%<>#*^%$@!~_+-=.,;\n?/'), 245 | # The YAML formatter doesn't properly handle nested lists yet 246 | # TODO: Remove the next argument once the formatter properly formats nested lists 247 | alternate_containers=True, 248 | # The YAML formatter also doesn't properly handle empty strings that are dict keys: 249 | # TODO: Remove the next argument once the formatter properly formats empty strings as dict keys 250 | allow_empty_strings=False 251 | ) 252 | 253 | s = StringIO() 254 | yaml.dump(orig_obj, s, Dumper=graphtage.yaml.Dumper) 255 | return orig_obj, s.getvalue() 256 | 257 | @filetype_test(test_equality=False) 258 | def test_plist_formatting(self): 259 | orig_obj = TestFormatting.make_random_obj(force_string_keys=True, exclude_bytes=frozenset('<>/\n&?|@{}[]')) 260 | return orig_obj, plistlib.dumps(orig_obj) 261 | -------------------------------------------------------------------------------- /test/test_graphtage.py: -------------------------------------------------------------------------------- 1 | from io import StringIO 2 | from unittest import TestCase 3 | 4 | import graphtage 5 | import graphtage.json 6 | import graphtage.multiset 7 | import graphtage.tree 8 | 9 | from graphtage.printer import Printer 10 | 11 | 12 | class TestGraphtage(TestCase): 13 | @classmethod 14 | def setUpClass(cls): 15 | cls.small_from = graphtage.json.build_tree({ 16 | "test": "foo", 17 | "baz": 1 18 | }) 19 | cls.small_to = graphtage.json.build_tree({ 20 | "test": "bar", 21 | "baz": 2 22 | }) 23 | cls.list_from = graphtage.json.build_tree([0, 1, 2, 3, 4, 5]) 24 | cls.list_to = graphtage.json.build_tree([1, 2, 3, 4, 5]) 25 | 26 | def test_string_diff_printing(self): 27 | s1 = graphtage.StringNode("abcdef") 28 | s2 = graphtage.StringNode("azced") 29 | diff = s1.diff(s2) 30 | out_stream = StringIO() 31 | p = Printer(ansi_color=True, out_stream=out_stream) 32 | diff.print(p) 33 | self.assertEqual(diff.edited_cost(), 5) 34 | self.assertEqual('\x1b[32m"\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32ma\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1mz̟\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1mb̶\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32mc\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1md̶\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32me\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1md̟\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1mf̶\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m"\x1b[39m', out_stream.getvalue()) 35 | 36 | def test_string_diff_remove_insert_reordering(self): 37 | s1 = graphtage.StringNode('abcdefg') 38 | s2 = graphtage.StringNode('abhijfg') 39 | diff = s1.diff(s2) 40 | out_stream = StringIO() 41 | p = Printer(ansi_color=True, out_stream=out_stream) 42 | diff.print(p) 43 | self.assertEqual('\x1b[32m"\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32ma\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32mb\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1mh̟\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1mi̟\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1mj̟\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1mc̶\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1md̶\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1me̶\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32mf\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32mg\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m"\x1b[39m', out_stream.getvalue()) 44 | 45 | def test_small_diff(self): 46 | diff = self.small_from.diff(self.small_to) 47 | self.assertIsInstance(diff, graphtage.DictNode) 48 | self.assertIsInstance(diff, graphtage.tree.EditedTreeNode) 49 | self.assertEqual(1, len(diff.edit_list)) 50 | self.assertIsInstance(diff.edit_list[0], graphtage.multiset.MultiSetEdit) 51 | has_test_match = False 52 | has_baz_match = False 53 | for edit in diff.edit_list[0].edits(): 54 | if edit.bounds().upper_bound > 0: 55 | self.assertIsInstance(edit, graphtage.KeyValuePairEdit) 56 | key_edit = edit.key_edit 57 | value_edit = edit.value_edit 58 | if isinstance(value_edit.from_node, graphtage.StringNode): 59 | self.assertIsInstance(key_edit.to_node, graphtage.StringNode) 60 | self.assertEqual(key_edit.from_node.object, 'test') 61 | self.assertEqual(value_edit.from_node.object, 'foo') 62 | self.assertEqual(value_edit.to_node.object, 'bar') 63 | self.assertEqual(edit.bounds().upper_bound, 6) 64 | self.assertFalse(has_test_match) 65 | has_test_match = True 66 | elif isinstance(value_edit.from_node, graphtage.IntegerNode): 67 | self.assertIsInstance(value_edit.to_node, graphtage.IntegerNode) 68 | self.assertEqual(value_edit.from_node.object, 1) 69 | self.assertEqual(value_edit.to_node.object, 2) 70 | self.assertEqual(value_edit.bounds().upper_bound, 1) 71 | self.assertFalse(has_baz_match) 72 | has_baz_match = True 73 | else: 74 | self.fail() 75 | self.assertTrue(has_test_match) 76 | self.assertTrue(has_baz_match) 77 | 78 | def test_list_diff(self): 79 | diff = self.list_from.diff(self.list_to) 80 | self.assertIsInstance(diff, graphtage.ListNode) 81 | self.assertIsInstance(diff, graphtage.tree.EditedTreeNode) 82 | self.assertEqual(1, len(diff.edit_list)) 83 | self.assertIsInstance(diff.edit_list[0], graphtage.EditDistance) 84 | for edit in diff.edit_list[0].edits(): 85 | if edit.bounds().upper_bound > 0: 86 | self.assertIsInstance(edit, graphtage.Remove) 87 | self.assertIsInstance(edit.from_node, graphtage.IntegerNode) 88 | self.assertEqual(edit.from_node.object, 0) 89 | self.assertIsInstance(edit.to_node, graphtage.ListNode) 90 | self.assertEqual(edit.to_node, self.list_from) 91 | else: 92 | self.assertIsInstance(edit, graphtage.Match) 93 | 94 | def test_single_element_list(self): 95 | diff = graphtage.json.build_tree([1]).diff(graphtage.json.build_tree([2])) 96 | self.assertIsInstance(diff, graphtage.ListNode) 97 | self.assertIsInstance(diff, graphtage.tree.EditedTreeNode) 98 | self.assertEqual(1, len(diff.edit_list)) 99 | self.assertIsInstance(diff.edit_list[0], graphtage.FixedLengthSequenceEdit) 100 | 101 | def test_empty_list(self): 102 | diff = graphtage.ListNode(()).diff(graphtage.ListNode(())) 103 | self.assertEqual(1, len(diff.edit_list)) 104 | self.assertIsInstance(diff.edit_list[0], graphtage.Match) 105 | self.assertEqual(0, diff.edit_list[0].bounds().upper_bound) 106 | 107 | def test_null_json(self): 108 | diff = graphtage.json.build_tree([None]).diff(graphtage.json.build_tree([1])) 109 | self.assertIsInstance(diff, graphtage.ListNode) 110 | self.assertIsInstance(diff, graphtage.tree.EditedTreeNode) 111 | self.assertEqual(1, len(diff.edit_list)) 112 | self.assertIsInstance(diff.edit_list[0], graphtage.FixedLengthSequenceEdit) 113 | -------------------------------------------------------------------------------- /test/test_levenshtein.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import List 3 | from unittest import TestCase 4 | 5 | from tqdm import trange 6 | 7 | from graphtage.edits import Edit, Insert, Match, Remove 8 | from graphtage import EditDistance, string_edit_distance 9 | 10 | 11 | LETTERS: str = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' 12 | 13 | 14 | class TestEditDistance(TestCase): 15 | def test_string_edit_distance_reconstruction(self): 16 | for _ in trange(200): 17 | str1_len = random.randint(10, 30) 18 | str2_len = random.randint(10, 30) 19 | str_from = ''.join(random.choices(LETTERS, k=str1_len)) 20 | str_to = ''.join(random.choices(LETTERS, k=str2_len)) 21 | distance: EditDistance = string_edit_distance(str_from, str_to) 22 | edits: List[Edit] = list(distance.edits()) 23 | reconstructed_from = '' 24 | reconstructed_to = '' 25 | for edit in edits: 26 | if isinstance(edit, Match): 27 | reconstructed_from += edit.from_node.object 28 | reconstructed_to += edit.to_node.object 29 | elif isinstance(edit, Remove): 30 | reconstructed_from += edit.from_node.object 31 | elif isinstance(edit, Insert): 32 | reconstructed_to += edit.from_node.object 33 | else: 34 | self.fail() 35 | self.assertEqual(str_from, reconstructed_from) 36 | self.assertEqual(str_to, reconstructed_to) 37 | 38 | def test_string_edit_distance_optimality(self): 39 | for _ in trange(200): 40 | str_len = random.randint(10, 30) 41 | str_from = ''.join(random.choices(LETTERS, k=str_len)) 42 | num_ground_truth_edits: int = 0 43 | str_to = '' 44 | for i in range(str_len): 45 | while random.random() < 0.2: 46 | # 20% chance of inserting a new character 47 | str_to += random.choice(LETTERS) 48 | num_ground_truth_edits += 1 49 | num_ground_truth_edits += 1 50 | if random.random() < 0.2: 51 | # 20% chance of removing the original character 52 | pass 53 | else: 54 | str_to += str_from[i] 55 | distance: EditDistance = string_edit_distance(str_from, str_to) 56 | edits: List[Edit] = list(distance.edits()) 57 | num_edits = len(edits) 58 | if num_ground_truth_edits < num_edits: 59 | print() 60 | print('\n'.join([e.__class__.__name__ for e in edits])) 61 | print(str_from, str_to) 62 | self.assertGreaterEqual(num_ground_truth_edits, num_edits) 63 | 64 | def test_empty_string_edit_distance(self): 65 | with self.assertRaises(StopIteration): 66 | next(string_edit_distance('', '').edits()) 67 | self.assertEqual( 68 | 3, 69 | sum(1 for _ in string_edit_distance('foo', '').edits()) 70 | ) 71 | self.assertEqual( 72 | 3, 73 | sum(1 for _ in string_edit_distance('', 'foo').edits()) 74 | ) 75 | -------------------------------------------------------------------------------- /test/test_matching.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import random 3 | from unittest import TestCase 4 | 5 | import numpy as np 6 | from tqdm import tqdm, trange 7 | 8 | from graphtage.matching import get_dtype, min_weight_bipartite_matching, WeightedBipartiteMatcher 9 | 10 | from .test_bounds import RandomDecreasingRange 11 | 12 | 13 | class TestWeightedBipartiteMatcher(TestCase): 14 | def test_weighted_bipartite_matching(self): 15 | for n in trange(1, 25, 3): 16 | from_nodes = list(range(n)) 17 | to_nodes = list(range(n)) 18 | edges = [ 19 | [RandomDecreasingRange() for _ in range(len(to_nodes))] for _ in range(len(from_nodes)) 20 | ] 21 | for i in range(min(len(from_nodes), len(to_nodes))): 22 | edges[i][i] = RandomDecreasingRange(fixed_lb=0, fixed_ub=100000, final_value=0) 23 | matcher = WeightedBipartiteMatcher( 24 | from_nodes=from_nodes, 25 | to_nodes=to_nodes, 26 | get_edge=lambda n1, n2: edges[n1][n2] 27 | ) 28 | initial_bounds = matcher.bounds() 29 | prev_diff = initial_bounds.upper_bound - initial_bounds.lower_bound 30 | with tqdm(leave=False, total=prev_diff) as t: 31 | t.update(0) 32 | while matcher.tighten_bounds(): 33 | new_bounds = matcher.bounds() 34 | new_diff = new_bounds.upper_bound - new_bounds.lower_bound 35 | self.assertLess(new_diff, prev_diff) 36 | t.update(prev_diff - new_diff) 37 | prev_diff = new_diff 38 | self.assertTrue(matcher.bounds().definitive()) 39 | self.assertEqual(0, matcher.bounds().upper_bound) 40 | 41 | def test_min_weight_bipartite_matching(self): 42 | for _ in trange(50): 43 | num_from = random.randint(1, 500) 44 | num_to = random.randint(1, 500) 45 | from_nodes = [f'f{i}' for i in range(num_from)] 46 | to_nodes = [f't{i}' for i in range(num_to)] 47 | # Force an optimal, zero-value matching: 48 | expected_matching = { 49 | i: (i, 0) for i in range(min(num_from, num_to)) 50 | } 51 | edges = { 52 | (from_nodes[i], to_nodes[i]): 0 for i in range(min(num_from, num_to)) 53 | } 54 | edge_probability = 0.9 55 | edges.update({ 56 | (i, j): random.randint(1, 2**16) for i, j in itertools.product(from_nodes, to_nodes) 57 | if (i, j) not in edges and random.random() < edge_probability 58 | }) 59 | 60 | def get_edge(f, t): 61 | if (f, t) in edges: 62 | return edges[(f, t)] 63 | else: 64 | return None 65 | 66 | matching = min_weight_bipartite_matching(from_nodes=from_nodes, to_nodes=to_nodes, get_edges=get_edge) 67 | 68 | self.assertEqual(expected_matching, matching) 69 | 70 | def test_get_dtype(self): 71 | for min_range, max_range, expected in ( 72 | (0, 255, np.uint8), 73 | (-1, 127, np.int8), 74 | (-128, 255, np.int16), 75 | (0, 2**64 - 1, np.uint64), 76 | (0, 2**64, int) 77 | ): 78 | actual = get_dtype(min_range, max_range) 79 | self.assertEqual(np.dtype(expected), actual) 80 | -------------------------------------------------------------------------------- /test/test_object_set.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from graphtage.object_set import ObjectSet 4 | 5 | 6 | class UnhashableWithBrokenEquality: 7 | def __init__(self, value): 8 | self.value = value 9 | 10 | def __eq__(self, other): 11 | raise ValueError() 12 | 13 | 14 | class Unhashable(UnhashableWithBrokenEquality): 15 | def __eq__(self, other): 16 | return isinstance(other, Unhashable) and self.value == other.value 17 | 18 | 19 | class TestObjectSet(TestCase): 20 | def test_unhashability(self): 21 | self.assertRaises(TypeError, lambda: hash(Unhashable(10))) 22 | 23 | def test_object_set(self): 24 | u = Unhashable(10) 25 | u2 = Unhashable(11) 26 | objs = ObjectSet((10, u, u2)) 27 | self.assertIn(10, objs) 28 | self.assertIn(u, objs) 29 | self.assertIn(u2, objs) 30 | self.assertEqual(3, len(objs)) 31 | objs.remove(u) 32 | self.assertIn(10, objs) 33 | self.assertNotIn(u, objs) 34 | self.assertIn(u2, objs) 35 | self.assertEqual(2, len(objs)) 36 | 37 | def test_broken_equality(self): 38 | u = UnhashableWithBrokenEquality(10) 39 | u2 = UnhashableWithBrokenEquality(10) 40 | # this will default to uniqueness by identity 41 | objs = ObjectSet((10, u, u2)) 42 | self.assertIn(10, objs) 43 | self.assertIn(u, objs) 44 | self.assertIn(u2, objs) 45 | self.assertEqual(3, len(objs)) 46 | -------------------------------------------------------------------------------- /test/test_pydiff.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | from unittest import TestCase 3 | 4 | import graphtage 5 | from graphtage.pydiff import build_tree, print_diff, PyDiffFormatter 6 | 7 | from .timing import run_with_time_limit 8 | 9 | 10 | class TestPyDiff(TestCase): 11 | def test_build_tree(self): 12 | self.assertIsInstance(build_tree([1, 2, 3, 4]), graphtage.ListNode) 13 | self.assertIsInstance(build_tree({1: 2, 'a': 'b'}), graphtage.DictNode) 14 | 15 | def test_diff(self): 16 | t1 = [1, 2, {3: "three"}, 4] 17 | t2 = [1, 2, {3: 3}, "four"] 18 | printer = graphtage.printer.Printer(ansi_color=True) 19 | print_diff(t1, t2, printer=printer) 20 | 21 | def test_custom_class(self): 22 | class Foo: 23 | def __init__(self, bar, baz): 24 | self.bar = bar 25 | self.baz = baz 26 | 27 | printer = graphtage.printer.Printer(ansi_color=True) 28 | print_diff(Foo("bar", "baz"), Foo("bar", "bak"), printer=printer) 29 | 30 | def test_nested_tuple_diff(self): 31 | tree = build_tree({"a": (1, 2)}) 32 | self.assertIsInstance(tree, graphtage.DictNode) 33 | children = tree.children() 34 | self.assertEqual(1, len(children)) 35 | kvp = children[0] 36 | self.assertIsInstance(kvp, graphtage.KeyValuePairNode) 37 | self.assertIsInstance(kvp.key, graphtage.StringNode) 38 | self.assertIsInstance(kvp.value, graphtage.ListNode) 39 | 40 | def test_infinite_loop(self): 41 | """Reproduces https://github.com/trailofbits/graphtage/issues/82""" 42 | 43 | @dataclasses.dataclass 44 | class Thing: 45 | foo: str 46 | 47 | with run_with_time_limit(60): 48 | _ = graphtage.pydiff.diff([Thing("ok")], [Thing("bad")]) 49 | -------------------------------------------------------------------------------- /test/test_search.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from tqdm import trange 4 | 5 | from graphtage.search import IterativeTighteningSearch 6 | from .test_bounds import RandomDecreasingRange 7 | 8 | 9 | class TestIterativeTighteningSearch(TestCase): 10 | def test_iterative_tightening_search(self): 11 | speedups = 0 12 | tests = 0 13 | try: 14 | t = trange(100) 15 | for _ in t: 16 | ranges = [RandomDecreasingRange() for _ in range(100)] 17 | best_range: RandomDecreasingRange = None 18 | for r in ranges: 19 | if best_range is None or r.final_value < best_range.final_value: 20 | best_range = r 21 | search = IterativeTighteningSearch(iter(ranges)) 22 | while search.tighten_bounds(): 23 | pass 24 | result = search.best_match 25 | tightenings = sum(r.tightenings for r in ranges) 26 | untightened = 0 27 | for r in ranges: 28 | t_before = r.tightenings 29 | while r.tighten_bounds(): 30 | pass 31 | untightened += r.tightenings - t_before 32 | t.desc = f"{(untightened + tightenings) / tightenings:.01f}x Speedup" 33 | speedups += (untightened + tightenings) / tightenings 34 | tests += 1 35 | self.assertEqual(best_range.final_value, result.final_value) 36 | finally: 37 | print(f"Average speedup: {speedups / tests:.01f}x") 38 | -------------------------------------------------------------------------------- /test/test_timing.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from .timing import run_with_time_limit 4 | 5 | 6 | def infinite_loop(): 7 | while True: 8 | pass 9 | 10 | 11 | def limited_infinite_loop(): 12 | with run_with_time_limit(seconds=1): 13 | infinite_loop() 14 | 15 | 16 | class TestTiming(TestCase): 17 | def test_time_limit(self): 18 | self.assertRaises(TimeoutError, limited_infinite_loop) 19 | 20 | def test_non_infinite_loop(self): 21 | with run_with_time_limit(seconds=60): 22 | _ = 10 23 | -------------------------------------------------------------------------------- /test/test_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from unittest import TestCase 3 | 4 | from graphtage.utils import largest, smallest, SparseMatrix 5 | 6 | 7 | class TestSparseMatrix(TestCase): 8 | def test_matrix_bounds(self): 9 | matrix: SparseMatrix[int] = SparseMatrix(num_rows=10, num_cols=10, default_value=None) 10 | with self.assertRaises(IndexError): 11 | _ = matrix[matrix.num_rows] 12 | with self.assertRaises(IndexError): 13 | _ = matrix[0][matrix.num_cols] 14 | 15 | def test_matrix_default_value(self): 16 | matrix: SparseMatrix[int] = SparseMatrix(default_value=10) 17 | self.assertEqual(matrix[0][0], 10) 18 | matrix[0][0] = 11 19 | self.assertEqual(matrix[0][0], 11) 20 | 21 | def test_matrix_getsizeof(self): 22 | matrix: SparseMatrix[int] = SparseMatrix() 23 | size_before = matrix.getsizeof() 24 | dim = 1000 25 | int_sizes = 0 26 | for i in range(dim): 27 | for j in range(dim): 28 | matrix[i][j] = i * dim + j 29 | int_sizes += sys.getsizeof(matrix[i][j]) 30 | size_after = matrix.getsizeof() 31 | self.assertGreaterEqual(size_after - size_before, int_sizes) 32 | 33 | def test_matrix_shape(self): 34 | matrix: SparseMatrix[int] = SparseMatrix() 35 | self.assertEqual((0, 0), matrix.shape()) 36 | matrix[10][20] = 1 37 | self.assertEqual((11, 21), matrix.shape()) 38 | matrix = SparseMatrix(num_rows=10, num_cols=10) 39 | self.assertEqual((10, 10), matrix.shape()) 40 | 41 | def test_smallest(self): 42 | for i in smallest(range(1000), n=10): 43 | self.assertGreater(10, i) 44 | for i in smallest(*list(range(1000)), n=10): 45 | self.assertGreater(10, i) 46 | 47 | def test_largest(self): 48 | for i in largest(range(1000), n=10): 49 | self.assertLess(1000 - 11, i) 50 | for i in largest(*list(range(1000)), n=10): 51 | self.assertLess(1000 - 11, i) 52 | -------------------------------------------------------------------------------- /test/test_xml.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | from graphtage.utils import Tempfile 5 | from graphtage.xml import XML 6 | 7 | 8 | class TestXML(unittest.TestCase): 9 | def test_infinite_loop(self): 10 | """Reproduces https://github.com/trailofbits/graphtage/issues/32""" 11 | xml = XML.default_instance 12 | one_xml = b""" 13 | 14 | 15 | child1 16 | child2 17 | 18 | 19 | """ 20 | two_xml = b""" 21 | 22 | 23 | child1 24 | child2 25 | 26 | 27 | """ 28 | with Tempfile(one_xml) as one, Tempfile(two_xml) as two: 29 | t1 = xml.build_tree(one) 30 | t2 = xml.build_tree(two) 31 | for edit in t1.get_all_edits(t2): 32 | print(edit) 33 | -------------------------------------------------------------------------------- /test/timing.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import _thread 3 | from contextlib import contextmanager 4 | 5 | 6 | @contextmanager 7 | def run_with_time_limit(seconds: int): 8 | timer = threading.Timer(seconds, _thread.interrupt_main) 9 | timer.start() 10 | 11 | try: 12 | yield 13 | return 14 | except: 15 | pass 16 | finally: 17 | timer.cancel() 18 | raise TimeoutError(f"timeout after {seconds} seconds") 19 | --------------------------------------------------------------------------------