├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── artifacts.yml
    │   ├── check_version.yml
    │   ├── pip-audit.yml
    │   ├── publish_docs.yml
    │   ├── pythonpackage.yml
    │   └── pythonpublish.yml
├── .gitignore
├── CITATION.cff
├── CODEOWNERS
├── CODE_OF_CONDUCT.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── bindist
    ├── .gitignore
    ├── Makefile
    └── graphtage_bin.py
├── docs
    ├── .gitignore
    ├── Makefile
    ├── _static
    │   └── localtoc.js
    ├── _templates
    │   ├── layout.html
    │   └── searchbox.html
    ├── build_api.py
    ├── builders.rst
    ├── conf.py
    ├── example.png
    ├── extending.rst
    ├── filetypes.rst
    ├── howitworks.rst
    ├── index.rst
    ├── library.rst
    └── printing.rst
├── graphtage
    ├── __init__.py
    ├── __main__.py
    ├── ast.py
    ├── bounds.py
    ├── builder.py
    ├── constraints.py
    ├── csv.py
    ├── dataclasses.py
    ├── debug.py
    ├── edits.py
    ├── expressions.py
    ├── fibonacci.py
    ├── formatter.py
    ├── graphtage.py
    ├── json.py
    ├── levenshtein.py
    ├── matching.py
    ├── multiset.py
    ├── object_set.py
    ├── pickle.py
    ├── plist.py
    ├── printer.py
    ├── progress.py
    ├── pydiff.py
    ├── search.py
    ├── sequences.py
    ├── tree.py
    ├── utils.py
    ├── version.py
    ├── xml.py
    └── yaml.py
├── setup.py
└── test
    ├── __init__.py
    ├── test_bounds.py
    ├── test_builder.py
    ├── test_constraints.py
    ├── test_dataclasses.py
    ├── test_expressions.py
    ├── test_fibonacci.py
    ├── test_formatting.py
    ├── test_graphtage.py
    ├── test_levenshtein.py
    ├── test_matching.py
    ├── test_object_set.py
    ├── test_pydiff.py
    ├── test_search.py
    ├── test_timing.py
    ├── test_utils.py
    ├── test_xml.py
    └── timing.py


/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | 
3 | updates:
4 |   - package-ecosystem: github-actions
5 |     directory: /
6 |     schedule:
7 |       interval: daily
8 | 


--------------------------------------------------------------------------------
/.github/workflows/artifacts.yml:
--------------------------------------------------------------------------------
 1 | name: Build binary artifacts
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - "v*"
 7 | 
 8 | jobs:
 9 |   binaries:
10 | 
11 |     strategy:
12 |       matrix:
13 |         os: [ ubuntu-latest, macos-latest ] # windows-latest,
14 | 
15 |     runs-on: ${{ matrix.os }}
16 | 
17 |     permissions:
18 |       # NOTE: Needed to save artifacts the repository.
19 |       contents: write
20 | 
21 |     steps:
22 |       - uses: actions/checkout@v4
23 |         with:
24 |           submodules: recursive
25 |       - name: Set up Python
26 |         uses: actions/setup-python@v5
27 |         with:
28 |           python-version: '3.x'
29 |       - name: Install dependencies
30 |         run: |
31 |           python -m pip install --upgrade pip
32 |           pip install setuptools pyinstaller
33 |           pip install .
34 |       - name: Build the binary
35 |         run: |
36 |           make -C bindist
37 |           cd bindist && echo "DIST_FILE=`make dist-name | tr -d '\n'`" >> $GITHUB_ENV
38 |       - name: Release binary artifacts
39 |         uses: softprops/action-gh-release@v0.1.15
40 |         with:
41 |           files: bindist/${{ env.DIST_FILE }}
42 | 


--------------------------------------------------------------------------------
/.github/workflows/check_version.yml:
--------------------------------------------------------------------------------
 1 | name: Check Release Version
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created, edited, published]
 6 | 
 7 | jobs:
 8 |   versioncheck:
 9 |     runs-on: ubuntu-latest
10 | 
11 |     steps:
12 |       - uses: actions/checkout@v4
13 |       - name: Set up Python
14 |         uses: actions/setup-python@v5
15 |         with:
16 |           python-version: '3.x'
17 |       - name: Install dependencies
18 |         run: |
19 |           python -m pip install --upgrade pip
20 |           pip install setuptools
21 |           pip install .
22 |       - name: Ensure graphtage.version.DEV_BUILD == False
23 |         run: graphtage -dumpversion | grep -qv git
24 | 


--------------------------------------------------------------------------------
/.github/workflows/pip-audit.yml:
--------------------------------------------------------------------------------
 1 | # IMPORTANT: Read and understand this template fully before applying it.
 2 | 
 3 | name: Scan dependencies for vulnerabilities with pip-audit
 4 | 
 5 | on:
 6 |   push:
 7 |     branches: [ "master" ]
 8 |   pull_request:
 9 |     branches: [ "master" ]
10 |   schedule:
11 |     - cron: "0 12 * * *"
12 | 
13 | jobs:
14 |   pip-audit:
15 |     runs-on: ubuntu-latest
16 | 
17 |     steps:
18 |       - name: Checkout repository
19 |         uses: actions/checkout@v4
20 | 
21 |       - name: Install Python
22 |         uses: actions/setup-python@v5
23 |         with:
24 |           # IMPORTANT: You may need a more specific version here.
25 |           python-version: "3.x"
26 | 
27 |       - name: Install project
28 |         run: |
29 |           python -m venv /tmp/pip-audit-env
30 |           source /tmp/pip-audit-env/bin/activate
31 | 
32 |           python -m pip install --upgrade pip setuptools wheel
33 |           python -m pip install .
34 | 
35 | 
36 |       - name: Run pip-audit
37 |         uses: pypa/gh-action-pip-audit@v1.0.8
38 |         with:
39 |           virtual-environment: /tmp/pip-audit-env
40 | 
41 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_docs.yml:
--------------------------------------------------------------------------------
 1 | name: Publish documentation
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |     tags:
 8 |       - v*
 9 | 
10 | jobs:
11 |   deploydocs:
12 |     runs-on: ubuntu-latest
13 |     permissions:
14 |       # NOTE: Needed to push to the repository.
15 |       contents: write
16 |     steps:
17 |       - uses: actions/checkout@v4
18 |         with:
19 |           path: graphtage
20 |       - name: Get the version
21 |         id: get_version
22 |         run: echo "::set-env name=VERSION::${GITHUB_REF#refs/*/}"
23 |         env:
24 |           # The use of ::set-env here is safe!
25 |           ACTIONS_ALLOW_UNSECURE_COMMANDS: 'true'
26 |       - name: Set up Python 3.8
27 |         uses: actions/setup-python@v5
28 |         with:
29 |           python-version: 3.8
30 |       - name: Install dependencies
31 |         run: |
32 |           cd graphtage
33 |           python -m pip install --upgrade pip
34 |           pip install setuptools
35 |           pip install .[dev]
36 |       - name: Build documentation
37 |         run: |
38 |           cd graphtage/docs
39 |           make html
40 |       - name: Checkout gh-pages branch
41 |         uses: actions/checkout@v4
42 |         with:
43 |           ref: gh-pages
44 |           path: gh-pages
45 |           fetch-depth: 0
46 |       - name: Commit documentation changes
47 |         run: |
48 |           cd gh-pages
49 |           git pull
50 |           rm -rf ${VERSION}
51 |           mkdir ${VERSION}
52 |           cp -r ../graphtage/docs/_build/html/* ${VERSION}/
53 |           cd ${VERSION}
54 |           git config --local user.email "action@github.com"
55 |           git config --local user.name "GitHub Action"
56 |           git add .
57 |           if [ "$GITHUB_REF" == "refs/heads/master" ]; then
58 |             cd ..
59 |             # This is not tag, so it is the latest:
60 |             rm -f latest
61 |             ln -s ${VERSION} latest
62 |             git add latest
63 |           fi
64 |           git commit -m "Update documentation for ${GITHUB_REF}" -a || true
65 |           # The above command will fail if no changes were present, so we ignore
66 |           # the return code.
67 |       - name: Push changes
68 |         uses: ad-m/github-push-action@master
69 |         with:
70 |           branch: gh-pages
71 |           directory: gh-pages
72 |           github_token: ${{ secrets.GITHUB_TOKEN }}
73 | 


--------------------------------------------------------------------------------
/.github/workflows/pythonpackage.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python package
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches: [ master ]
11 | 
12 | jobs:
13 | 
14 |   build:
15 | 
16 |     runs-on: ubuntu-latest
17 |     strategy:
18 |       matrix:
19 |         python-version: [3.8, 3.9, "3.10", "3.11"]
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v4
23 |     - name: Set up Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v5
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install dependencies
28 |       run: |
29 |         python -m pip install --upgrade pip
30 |         pip install setuptools
31 |         pip install .[dev]
32 |     - name: Lint with flake8
33 |       run: |
34 |         pip install flake8
35 |         # stop the build if there are Python syntax errors or undefined names
36 |         flake8 graphtage test --count --select=E9,F63,F7,F82 --show-source --statistics
37 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
38 |         flake8 graphtage test --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
39 |     - name: Test building documentation
40 |       run: |
41 |         cd docs
42 |         make html
43 |     - name: Test with pytest
44 |       run: |
45 |         pip install pytest
46 |         pytest
47 | 


--------------------------------------------------------------------------------
/.github/workflows/pythonpublish.yml:
--------------------------------------------------------------------------------
 1 | # This workflows will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | name: Upload Python Package
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [published]
 9 | 
10 | jobs:
11 | 
12 |   deploy:
13 | 
14 |     runs-on: ubuntu-latest
15 | 
16 |     steps:
17 |     - uses: actions/checkout@v4
18 |     - name: Set up Python
19 |       uses: actions/setup-python@v5
20 |       with:
21 |         python-version: '3.x'
22 |     - name: Install dependencies
23 |       run: |
24 |         python -m pip install --upgrade pip
25 |         pip install setuptools wheel twine
26 |     - name: Build and publish
27 |       env:
28 |         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
29 |         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
30 |       run: |
31 |         python setup.py sdist bdist_wheel
32 |         twine upload dist/*
33 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .cache
3 | .python_history
4 | *.pyc
5 | build/
6 | dist/
7 | graphtage.egg-info
8 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | # This CITATION.cff file was generated with cffinit.
 2 | # Visit https://bit.ly/cffinit to generate yours today!
 3 | 
 4 | cff-version: 1.2.0
 5 | title: Graphtage
 6 | message: >-
 7 |   Graphtage is a command-line utility and underlying library
 8 |   for semantically comparing and merging tree-like
 9 |   structures, such as JSON, XML, HTML, YAML, plist, and CSS
10 |   files.
11 | type: software
12 | authors:
13 |   - given-names: Evan
14 |     family-names: Sultanik
15 |     email: evan.sultanik@trailofbits.com
16 |     affiliation: Trail of Bits
17 |     orcid: 'https://orcid.org/0000-0002-6246-1422'
18 | repository-code: 'https://github.com/trailofbits/graphtage'
19 | url: 'https://trailofbits.github.io/graphtage/'
20 | abstract: >-
21 |   Graphtage is a command-line utility and underlying library
22 |   for semantically comparing and merging tree-like
23 |   structures, such as JSON, XML, HTML, YAML, plist, and CSS
24 |   files. Its name is a portmanteau of “graph” and
25 |   “graftage”—the latter being the horticultural practice of
26 |   joining two trees together such that they grow as one.
27 | keywords:
28 |   - diffing
29 |   - graph isomorphism
30 |   - edit distance
31 | license: LGPL-3.0
32 | 


--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @ESultanik
2 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |  advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |  address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |  professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at jean.bisutti@gmail.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                    GNU LESSER GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 | 
  9 |   This version of the GNU Lesser General Public License incorporates
 10 | the terms and conditions of version 3 of the GNU General Public
 11 | License, supplemented by the additional permissions listed below.
 12 | 
 13 |   0. Additional Definitions.
 14 | 
 15 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 17 | General Public License.
 18 | 
 19 |   "The Library" refers to a covered work governed by this License,
 20 | other than an Application or a Combined Work as defined below.
 21 | 
 22 |   An "Application" is any work that makes use of an interface provided
 23 | by the Library, but which is not otherwise based on the Library.
 24 | Defining a subclass of a class defined by the Library is deemed a mode
 25 | of using an interface provided by the Library.
 26 | 
 27 |   A "Combined Work" is a work produced by combining or linking an
 28 | Application with the Library.  The particular version of the Library
 29 | with which the Combined Work was made is also called the "Linked
 30 | Version".
 31 | 
 32 |   The "Minimal Corresponding Source" for a Combined Work means the
 33 | Corresponding Source for the Combined Work, excluding any source code
 34 | for portions of the Combined Work that, considered in isolation, are
 35 | based on the Application, and not on the Linked Version.
 36 | 
 37 |   The "Corresponding Application Code" for a Combined Work means the
 38 | object code and/or source code for the Application, including any data
 39 | and utility programs needed for reproducing the Combined Work from the
 40 | Application, but excluding the System Libraries of the Combined Work.
 41 | 
 42 |   1. Exception to Section 3 of the GNU GPL.
 43 | 
 44 |   You may convey a covered work under sections 3 and 4 of this License
 45 | without being bound by section 3 of the GNU GPL.
 46 | 
 47 |   2. Conveying Modified Versions.
 48 | 
 49 |   If you modify a copy of the Library, and, in your modifications, a
 50 | facility refers to a function or data to be supplied by an Application
 51 | that uses the facility (other than as an argument passed when the
 52 | facility is invoked), then you may convey a copy of the modified
 53 | version:
 54 | 
 55 |    a) under this License, provided that you make a good faith effort to
 56 |    ensure that, in the event an Application does not supply the
 57 |    function or data, the facility still operates, and performs
 58 |    whatever part of its purpose remains meaningful, or
 59 | 
 60 |    b) under the GNU GPL, with none of the additional permissions of
 61 |    this License applicable to that copy.
 62 | 
 63 |   3. Object Code Incorporating Material from Library Header Files.
 64 | 
 65 |   The object code form of an Application may incorporate material from
 66 | a header file that is part of the Library.  You may convey such object
 67 | code under terms of your choice, provided that, if the incorporated
 68 | material is not limited to numerical parameters, data structure
 69 | layouts and accessors, or small macros, inline functions and templates
 70 | (ten or fewer lines in length), you do both of the following:
 71 | 
 72 |    a) Give prominent notice with each copy of the object code that the
 73 |    Library is used in it and that the Library and its use are
 74 |    covered by this License.
 75 | 
 76 |    b) Accompany the object code with a copy of the GNU GPL and this license
 77 |    document.
 78 | 
 79 |   4. Combined Works.
 80 | 
 81 |   You may convey a Combined Work under terms of your choice that,
 82 | taken together, effectively do not restrict modification of the
 83 | portions of the Library contained in the Combined Work and reverse
 84 | engineering for debugging such modifications, if you also do each of
 85 | the following:
 86 | 
 87 |    a) Give prominent notice with each copy of the Combined Work that
 88 |    the Library is used in it and that the Library and its use are
 89 |    covered by this License.
 90 | 
 91 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
 92 |    document.
 93 | 
 94 |    c) For a Combined Work that displays copyright notices during
 95 |    execution, include the copyright notice for the Library among
 96 |    these notices, as well as a reference directing the user to the
 97 |    copies of the GNU GPL and this license document.
 98 | 
 99 |    d) Do one of the following:
100 | 
101 |        0) Convey the Minimal Corresponding Source under the terms of this
102 |        License, and the Corresponding Application Code in a form
103 |        suitable for, and under terms that permit, the user to
104 |        recombine or relink the Application with a modified version of
105 |        the Linked Version to produce a modified Combined Work, in the
106 |        manner specified by section 6 of the GNU GPL for conveying
107 |        Corresponding Source.
108 | 
109 |        1) Use a suitable shared library mechanism for linking with the
110 |        Library.  A suitable mechanism is one that (a) uses at run time
111 |        a copy of the Library already present on the user's computer
112 |        system, and (b) will operate properly with a modified version
113 |        of the Library that is interface-compatible with the Linked
114 |        Version.
115 | 
116 |    e) Provide Installation Information, but only if you would otherwise
117 |    be required to provide such information under section 6 of the
118 |    GNU GPL, and only to the extent that such information is
119 |    necessary to install and execute a modified version of the
120 |    Combined Work produced by recombining or relinking the
121 |    Application with a modified version of the Linked Version. (If
122 |    you use option 4d0, the Installation Information must accompany
123 |    the Minimal Corresponding Source and Corresponding Application
124 |    Code. If you use option 4d1, you must provide the Installation
125 |    Information in the manner specified by section 6 of the GNU GPL
126 |    for conveying Corresponding Source.)
127 | 
128 |   5. Combined Libraries.
129 | 
130 |   You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 | 
136 |    a) Accompany the combined library with a copy of the same work based
137 |    on the Library, uncombined with any other library facilities,
138 |    conveyed under the terms of this License.
139 | 
140 |    b) Give prominent notice with the combined library that part of it
141 |    is a work based on the Library, and explaining where to find the
142 |    accompanying uncombined form of the same work.
143 | 
144 |   6. Revised Versions of the GNU Lesser General Public License.
145 | 
146 |   The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 | 
151 |   Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 | 
161 |   If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | recursive-include test *.*
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Graphtage
  2 | 
  3 | [![PyPI version](https://badge.fury.io/py/graphtage.svg)](https://badge.fury.io/py/graphtage)
  4 | [![Tests](https://github.com/trailofbits/graphtage/workflows/Python%20package/badge.svg)](https://github.com/trailofbits/graphtage/actions)
  5 | [![Slack Status](https://slack.empirehacking.nyc/badge.svg)](https://slack.empirehacking.nyc)
  6 | 
  7 | Graphtage is a command-line utility and [underlying library](https://trailofbits.github.io/graphtage/latest/library.html)
  8 | for semantically comparing and merging tree-like structures, such as JSON, XML, HTML, YAML, plist, and CSS files. Its name is a
  9 | portmanteau of “graph” and “graftage”—the latter being the horticultural practice of joining two trees together such
 10 | that they grow as one.
 11 | 
 12 | ```console
 13 | $ echo Original: && cat original.json && echo Modified: && cat modified.json
 14 | ```
 15 | ```json
 16 | Original:
 17 | {
 18 |     "foo": [1, 2, 3, 4],
 19 |     "bar": "testing"
 20 | }
 21 | Modified:
 22 | {
 23 |     "foo": [2, 3, 4, 5],
 24 |     "zab": "testing",
 25 |     "woo": ["foobar"]
 26 | }
 27 | ```
 28 | ```console
 29 | $ graphtage original.json modified.json
 30 | ```
 31 | ```json
 32 | {
 33 |     "z̟b̶ab̟r̶": "testing",
 34 |     "foo": [
 35 |         1̶,̶
 36 |         2,
 37 |         3,
 38 |         4,̟
 39 |         5̟
 40 |     ],̟
 41 |     "̟w̟o̟o̟"̟:̟ ̟[̟
 42 |         "̟f̟o̟o̟b̟a̟r̟"̟
 43 |     ]̟
 44 | }
 45 | ```
 46 | 
 47 | ## Installation
 48 | 
 49 | ```console
 50 | $ pip3 install graphtage
 51 | ```
 52 | 
 53 | ## Command Line Usage
 54 | 
 55 | ### Output Formatting
 56 | Graphtage performs an analysis on an intermediate representation of the trees that is divorced from the filetypes of the
 57 | input files. This means, for example, that you can diff a JSON file against a YAML file. Also, the output format can be
 58 | different from the input format(s). By default, Graphtage will format the output diff in the same file format as the
 59 | first input file. But one could, for example, diff two JSON files and format the output in YAML. There are several
 60 | command-line arguments to specify these transformations, such as `--format`; please check the `--help` output for more
 61 | information.
 62 | 
 63 | By default, Graphtage pretty-prints its output with as many line breaks and indents as possible.
 64 | ```json
 65 | {
 66 |     "foo": [
 67 |         1,
 68 |         2,
 69 |         3
 70 |     ],
 71 |     "bar": "baz"
 72 | }
 73 | ```
 74 | Use the `--join-lists` or `-jl` option to suppress linebreaks after list items:
 75 | ```json
 76 | {
 77 |     "foo": [1, 2, 3],
 78 |     "bar": "baz"
 79 | }
 80 | ```
 81 | Likewise, use the `--join-dict-items` or `-jd` option to suppress linebreaks after key/value pairs in a dict:
 82 | ```json
 83 | {"foo": [
 84 |     1,
 85 |     2,
 86 |     3
 87 | ], "bar":  "baz"}
 88 | ```
 89 | Use `--condensed` or `-j` to apply both of these options:
 90 | ```json
 91 | {"foo": [1, 2, 3], "bar": "baz"}
 92 | ```
 93 | 
 94 | The `--only-edits` or `-e` option will print out a list of edits rather than applying them to the input file in place.
 95 | 
 96 | The `--edit-digest` or `-d` option is like `--only-edits` but prints a more concise context for each edit that is more
 97 | human-readable.
 98 | 
 99 | ### Matching Options
100 | By default, Graphtage tries to match all possible pairs of elements in a dictionary.
101 | 
102 | Matching two dictionaries with each other is hard. Although computationally tractable, this can sometimes be onerous for 
103 | input files with huge dictionaries. Graphtage has three different strategies for matching dictionaries:
104 | 1. `--dict-strategy match` (the most computationally expensive) tries to match all pairs of keys and values between the
105 |    two dictionaries, resulting in a match of minimum edit distance;
106 | 2. `--dict-strategy none` (the least computationally expensive) will not attempt to match any key/value pairs unless
107 |    they have the exact same key; and
108 | 3. `--dict-strategy auto` (the default) will automatically match the values of any key-value pairs that have identical
109 |    keys and then use the `match` strategy for the remainder of key/value pairs.
110 | 
111 | See [Pull Request #51](https://github.com/trailofbits/graphtage/pull/51) for some examples of how these strategies
112 | affect output.
113 | 
114 | The `--no-list-edits` or `-l` option will not consider interstitial insertions and removals when comparing two lists.
115 | The `--no-list-edits-when-same-length` or `-ll` option is a less drastic version of `-l` that will behave normally for
116 | lists that are of different lengths but behave like `-l` for lists that are of the same length.
117 | 
118 | ### ANSI Color
119 | By default, Graphtage will only use ANSI color in its output if it is run from a TTY. If, for example, you would like
120 | to have Graphtage emit colorized output from a script or pipe, use the `--color` or `-c` argument. To disable color even
121 | when running on a TTY, use `--no-color`.
122 | 
123 | ### HTML Output
124 | Graphtage can optionally emit the diff in HTML with the `--html` option.
125 | ```console
126 | $ graphtage --html original.json modified.json > diff.html
127 | ```
128 | 
129 | ### Status and Logging
130 | By default, Graphtage prints status messages and a progress bar to STDERR. To suppress this, use the `--no-status`
131 | option. To additionally suppress all but critical log messages, use `--quiet`. Fine-grained control of log messages is
132 | via the `--log-level` option.
133 | 
134 | ## Why does Graphtage exist?
135 | 
136 | Diffing tree-like structures with unordered elements is tough. Say you want to compare two JSON files.
137 | There are [limited tools available](https://github.com/zgrossbart/jdd), which are effectively equivalent to
138 | canonicalizing the JSON (_e.g._, sorting dictionary elements by key) and performing a standard diff. This is not always
139 | sufficient. For example, if a key in a dictionary is changed but its value is not, a traditional diff
140 | will conclude that the entire key/value pair was replaced by the new one, even though the only change was the key
141 | itself. See [our documentation](https://trailofbits.github.io/graphtage/latest/howitworks.html) for more information.
142 | 
143 | ## Using Graphtage as a Library
144 | 
145 | Graphtage has a complete API for programmatically operating its diffing capabilities.
146 | When using Graphtage as a library, it is also capable of diffing in-memory Python objects.
147 | This can be useful for debugging Python code, for example, to determine a differential between two objects.
148 | See [our documentation](https://trailofbits.github.io/graphtage/latest/library.html) for more information.
149 | 
150 | ## Extending Graphtage
151 | 
152 | Graphtage is designed to be extensible: New filetypes can easily be defined, as well as new node types, edit types,
153 | formatters, and printers. See [our documentation](https://trailofbits.github.io/graphtage/latest/extending.html) for
154 | more information.
155 | 
156 | Complete API documentation is available [here](https://trailofbits.github.io/graphtage/latest/package.html).
157 | 
158 | ## License and Acknowledgements
159 | 
160 | This research was developed by [Trail of Bits](https://www.trailofbits.com/) with partial funding from the Defense
161 | Advanced Research Projects Agency (DARPA) under the SafeDocs program as a subcontractor to [Galois](https://galois.com).
162 | It is licensed under the [GNU Lesser General Public License v3.0](LICENSE).
163 | [Contact us](mailto:opensource@trailofbits.com) if you're looking for an exception to the terms.
164 | © 2020–2023, Trail of Bits.
165 | 


--------------------------------------------------------------------------------
/bindist/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | dist
3 | graphtage.spec
4 | graphtage-*.tgz


--------------------------------------------------------------------------------
/bindist/Makefile:
--------------------------------------------------------------------------------
 1 | GRAPHTAGE_VERSION=$(shell graphtage --version 2>&1 | sed "s/Graphtage version //")
 2 | DIST_VERSION=$(shell uname | tr '[:upper:]' '[:lower:]')-$(shell uname -m | tr '[:upper:]' '[:lower:]')
 3 | DIST_NAME=graphtage-$(GRAPHTAGE_VERSION)-$(DIST_VERSION)
 4 | DIST_FILE=$(DIST_NAME).zip
 5 | 
 6 | .PHONY: $(DIST_FILE)
 7 | $(DIST_FILE):
 8 | 	pyinstaller -F -y --name graphtage graphtage_bin.py
 9 | 	@rm -rf $(DIST_NAME)
10 | 	mkdir $(DIST_NAME)
11 | 	cp dist/graphtage $(DIST_NAME)/
12 | 	cp -p ../README.md $(DIST_NAME)/
13 | 	cp -p ../LICENSE $(DIST_NAME)/
14 | 	zip -r $(DIST_FILE) $(DIST_NAME)
15 | 	rm -rf $(DIST_NAME)
16 | 
17 | .PHONY: dist-name
18 | dist-name:
19 | 	@echo $(DIST_FILE)
20 | 
21 | .PHONY: clean
22 | clean:
23 | 	rm -rf graphtage.spec dist build $(DIST_FILE) $(DIST_NAME)
24 | 


--------------------------------------------------------------------------------
/bindist/graphtage_bin.py:
--------------------------------------------------------------------------------
1 | from graphtage.__main__ import main
2 | 
3 | if __name__ == "__main__":
4 |     import sys
5 |     sys.exit(main())
6 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 | graphtage*.rst
3 | package.rst
4 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile graphtage.rst package.rst
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 
22 | .PHONY: graphtage.rst
23 | graphtage.rst package.rst:
24 | # sphinx-apidoc wasn't configurable enough, so I wrote my own version:
25 | 	python3 build_api.py
26 | # graphtage.py, edits.py, and tree.py are all merged into the main graphtage module by __init__.py,
27 | # so we should not generate separate submodules for them:
28 | #sphinx-apidoc -f -e -M -T -o . ../graphtage ../graphtage/graphtage.py ../graphtage/edits.py ../graphtage/tree.py
29 | 
30 | .PHONY: clean
31 | clean:
32 | 	rm -rf _build graphtage*.rst package.rst
33 | 


--------------------------------------------------------------------------------
/docs/_static/localtoc.js:
--------------------------------------------------------------------------------
 1 | $( document ).ready(function (){
 2 | 
 3 | var createList = function(selector){
 4 | 
 5 |     var ul = $('<ul class="current"></ul>');
 6 |     var selected = $(selector);
 7 | 
 8 |     if (selected.length === 0){
 9 |         return;
10 |     }
11 | 
12 |     selected.clone().each(function (i,e){
13 | 
14 |         var p = $(e).children('.descclassname');
15 |         var n = $(e).children('.descname');
16 |         var l = $(e).children('.headerlink');
17 | 
18 |         var a = $('<a>');
19 |         a.attr('href',l.attr('href')).attr('title', 'Link to this definition');
20 | 
21 |         a.append(p).append(n);
22 | 
23 |         var entry = $('<li class="toctree-l4">').append(a);
24 |         ul.append(entry);
25 |     });
26 |     return ul;
27 | }
28 | 
29 | if($('dl.class > dt').length || $('dl.function > dt').length || $('dl.data > dt').length) {
30 |     /* collapse any open menus */
31 |     var menu = $('.wy-menu ul:first');
32 |     menu.find('.current').removeClass("current");
33 | 
34 |     var pagename = $("h1")[0].innerText;
35 | 
36 |     if(pagename === "graphtage package") {
37 |         pagename = "graphtage module";
38 |     }
39 | 
40 |     var header = $('<li class="toctree-l2 current"><a class="reference internal" href="#">' + pagename + '</a></li>')
41 |     var ul = $('<ul class="current"></ul>');
42 |     header.append(ul);
43 | 
44 |     menu.find('ul:first').prepend(header);
45 | 
46 |     var x = [];
47 |     x.push(['Classes','dl.class > dt']);
48 |     x.push(['Functions','dl.function > dt']);
49 |     x.push(['Variables','dl.data > dt']);
50 | 
51 |     var first = true;
52 | 
53 |     x.forEach(function (e) {
54 |         var l = createList(e[1]);
55 |         if (l) {
56 |             var li = $('<li class="toctree-l3"><a class="reference internal" href="#">' + e[0] + '</a></li>')
57 |             if(first) {
58 |                 li.addClass("current");
59 |                 first = false;
60 |             }
61 |             li.append(l);
62 |             ul.append(li);
63 |         }
64 |     });
65 | }
66 | 
67 | });
68 | 


--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
 1 | {%- extends "!layout.html" %}
 2 | 
 3 | {% block footer %}
 4 |   {% if not READTHEDOCS %}
 5 |     <div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions">
 6 |       <span class="rst-current-version" data-toggle="rst-current-version">
 7 |         <span class="fa fa-book"> Graphtage Documentation</span>
 8 |         {{ version }}
 9 |         <span class="fa fa-caret-down"></span>
10 |       </span>
11 |       <div class="rst-other-versions">
12 |         <dl>
13 |           <dt>{{ _('Versions') }}</dt>
14 |           {% if test_versions %}
15 |             {% for version in test_versions %}
16 |             <dd><a href="#">{{ version }}</a></dd>
17 |             {% endfor %}
18 |           {% else %}
19 |             <dd><a href="/graphtage/latest">latest</a></dd>
20 |             <dd><a href="/graphtage/v0.3.1">0.3.1</a></dd>
21 |             <dd><a href="/graphtage/v0.3.0">0.3.0</a></dd>
22 |             <dd><a href="/graphtage/v0.2.9">0.2.9</a></dd>
23 |             <dd><a href="/graphtage/v0.2.8">0.2.8</a></dd>
24 |             <dd><a href="/graphtage/v0.2.7">0.2.7</a></dd>
25 |             <dd><a href="/graphtage/v0.2.6">0.2.6</a></dd>
26 |             <dd><a href="/graphtage/v0.2.5">0.2.5</a></dd>
27 |             <dd><a href="/graphtage/v0.2.4">0.2.4</a></dd>
28 |             <dd><a href="/graphtage/v0.2.3">0.2.3</a></dd>
29 |             <dd><a href="/graphtage/v0.2.2">0.2.2</a></dd>
30 |             <dd><a href="/graphtage/v0.2.1">0.2.1</a></dd>
31 |             <dd><a href="/graphtage/v0.2.0">0.2.0</a></dd>
32 |             <dd><a href="/graphtage/v0.1.1">0.1.1</a></dd>
33 |             <dd><a href="/graphtage/v0.1.0">0.1.0</a></dd>
34 |           {% endif %}
35 |         </dl>
36 |         <dl>
37 |           <dt>{{ _('Source Code') }}</dt>
38 |             <dd>
39 |               <a href="https://github.com/trailofbits/graphtage">{{ _('GitHub Page') }}</a>
40 |             </dd>
41 |         </dl>
42 |       </div>
43 |     </div>
44 |   {% endif %}
45 | {% endblock %}


--------------------------------------------------------------------------------
/docs/_templates/searchbox.html:
--------------------------------------------------------------------------------
 1 | {%- if builder != 'singlehtml' %}
 2 | <a href="{{ github_url }}"><div class="version">{{ version }}</div></a>
 3 | <div role="search">
 4 |   <form id="rtd-search-form" class="wy-form" action="{{ pathto('search') }}" method="get">
 5 |     <input type="text" name="q" placeholder="{{ _('Search docs') }}" />
 6 |     <input type="hidden" name="check_keywords" value="yes" />
 7 |     <input type="hidden" name="area" value="default" />
 8 |   </form>
 9 | </div>
10 | {%- endif %}
11 | 


--------------------------------------------------------------------------------
/docs/build_api.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | import os
  3 | import sys
  4 | from pathlib import Path
  5 | 
  6 | 
  7 | DOCS_PATH = os.path.dirname(os.path.realpath(__file__))
  8 | ROOT_PATH = Path(DOCS_PATH).parents[0]
  9 | 
 10 | sys.path = [ROOT_PATH] + sys.path
 11 | 
 12 | import graphtage
 13 | 
 14 | MODULES = []
 15 | 
 16 | 
 17 | def process_module(module):
 18 |     shortname = module.__name__.split('.')[-1]
 19 |     with open(os.path.join(DOCS_PATH, f"{module.__name__}.rst"), 'w') as f:
 20 |         f.write(f"{module.__name__}\n")
 21 |         f.write(f"{'=' * len(module.__name__)}\n")
 22 |         f.write(f"""
 23 | .. automodule:: {module.__name__}
 24 | """)
 25 |         classes = []
 26 |         for name, c in inspect.getmembers(module, inspect.isclass):
 27 |             if hasattr(c, '__module__') and c.__module__ == module.__name__ and not name.startswith('_'):
 28 |                 classes.append(c)
 29 |         if classes:
 30 |             f.write(f"""
 31 | {shortname} classes
 32 | {'-' * len(shortname)}--------
 33 | """)
 34 |             for cls in sorted(classes, key=lambda c: c.__name__):
 35 |                 f.write(f"""
 36 | {cls.__name__}
 37 | {'*' * len(cls.__name__)}
 38 | 
 39 | .. autoclass:: {cls.__name__}
 40 |    :members:
 41 |    :undoc-members:
 42 |    :inherited-members:
 43 |    :show-inheritance:
 44 | """)
 45 | 
 46 |         functions = []
 47 |         for name, func in inspect.getmembers(module, inspect.isfunction):
 48 |             if hasattr(func, '__module__') and func.__module__ == module.__name__ and not name.startswith('_'):
 49 |                 functions.append(func)
 50 |         if functions:
 51 |             f.write(f"""
 52 | {shortname} functions
 53 | {'-' * len(shortname)}----------
 54 | """)
 55 |             for func in sorted(functions, key=lambda o: o.__name__):
 56 |                 f.write(f"""
 57 | {func.__name__}
 58 | {'*' * len(func.__name__)}
 59 | 
 60 | .. autofunction:: {func.__name__}
 61 | """)
 62 | 
 63 | #         attrs = []
 64 | #         for name in dir(module):
 65 | #             if name.startswith('_'):
 66 | #                 continue
 67 | #             attr = getattr(module, name)
 68 | #             if not inspect.isfunction(attr) and not inspect.isclass(attr) and not inspect.ismodule(attr) and (
 69 | #                 not hasattr(attr, '__module__') or attr.__module__ == module.__name__
 70 | #             ) and inspect.getattr_static(attr, '__doc__') is not None:
 71 | #                 attrs.append(name)
 72 | #         if attrs:
 73 | #             f.write(f"""
 74 | # {shortname} attributes
 75 | # {'-' * len(shortname)}-----------
 76 | # """)
 77 | #             for name in sorted(attrs):
 78 | #                 f.write(f"""
 79 | # {name}
 80 | # {'*' * len(name)}
 81 | #
 82 | # .. autoattribute:: {name}
 83 | # """)
 84 | 
 85 | 
 86 | 
 87 | for name, obj in inspect.getmembers(graphtage, inspect.ismodule):
 88 |     if obj.__name__.startswith('graphtage') and name not in ('graphtage', 'tree', 'edits'):
 89 |         MODULES.append(obj)
 90 | 
 91 | MODULES = [graphtage] + sorted(MODULES, key=lambda m: m.__name__)
 92 | 
 93 | for m in MODULES:
 94 |     process_module(m)
 95 | 
 96 | with open(os.path.join(DOCS_PATH, "package.rst"), 'w') as f:
 97 |     f.write("""Graphtage API
 98 | -------------
 99 | 
100 | .. toctree::
101 |    :maxdepth: 4
102 | 
103 | """)
104 |     f.write('\n'.join(f'   {m.__name__}' for m in MODULES))
105 | 


--------------------------------------------------------------------------------
/docs/builders.rst:
--------------------------------------------------------------------------------
  1 | .. _Builders:
  2 | 
  3 | Constructing Graphtage Trees
  4 | ============================
  5 | 
  6 | Graphtage operates on trees represented by the :class:`graphtage.TreeNode` base class.
  7 | There are various predefined specializations of tree nodes, such as :class:`graphtage.IntegerNode` for integers, :class:`graphtage.ListNode` for lists, and :class:`graphtage.DictNode` for dictionaries. :class:`graphtage.TreeNode` has an optional :attr:`parent <graphtage.TreeNode.parent>` and a potentially empty set of :func:`children <graphtage.TreeNode.children>`.
  8 | 
  9 | Graphtage provides a :class:`graphtage.builder.Builder` class for conveniently converting arbitrary objects into a tree of :class:`TreeNode <graphtage.TreeNode>` objects. It uses Python magic to define the conversions.
 10 | 
 11 | .. code-block:: python
 12 | 
 13 |     from graphtage import IntegerNode, TreeNode
 14 |     from graphtage.builder import Builder
 15 | 
 16 |     class CustomBuilder(Builder):
 17 |         @Builder.builder(int)
 18 |         def build_int(self, node: int, children: list[TreeNode]):
 19 |             return IntegerNode(node)
 20 | 
 21 | >>> CustomBuilder().build_tree(10)
 22 | IntegerNode(10)
 23 | 
 24 | The :func:`@Builder.builder(int) <graphtage.Builder.builder>` decorator specifies that the function is able to build a Graphtage `TreeNode` object from inputs that are :func:`instanceof` the type `int`. If there are multiple builder functions that match a given object, the function associated with the most specialized type is chosen. For example:
 25 | 
 26 | .. code-block:: python
 27 | 
 28 |     class Foo:
 29 |         pass
 30 | 
 31 | 
 32 |     class Bar(Foo):
 33 |         pass
 34 | 
 35 | 
 36 |     class CustomBuilder(Builder):
 37 |         @Builder.builder(Foo)
 38 |         def build_foo(self, node: Foo, children: list[TreeNode]):
 39 |             return StringNode("foo")
 40 | 
 41 |         @Build.builder(Bar)
 42 |         def build_bar(self, node: Bar, children: list[TreeNode]):
 43 |             return StringNode("bar")
 44 | 
 45 | >>> CustomBuilder().build_tree(Foo())
 46 | StringNode("foo")
 47 | >>> CustomBuilder().build_tree(Bar())
 48 | StringNode("bar")
 49 | 
 50 | Expanding Children
 51 | ------------------
 52 | 
 53 | So far we have only given examples of the production of leaf nodes, like integers and strings.
 54 | What if a node has children, like a list? We can handle this using the :func:`@Builder.expander <graphtage.Builder.expander>` decorator. Here is an example of how a list can be built:
 55 | 
 56 | .. code-block:: python
 57 | 
 58 |     class CustomBuilder(Builder):
 59 |         ...
 60 | 
 61 |         @Builder.expander(list)
 62 |         def expand_list(self, node: list):
 63 |             """Returns an iterable over the node's children"""
 64 |             yield from node
 65 | 
 66 |         @Builder.builder(list)
 67 |         def build_list(self, node: list, children: list[TreeNode]):
 68 |             return ListNode(children)
 69 | 
 70 | >>> CustomBuilder().build_tree([1, 2, 3, 4])
 71 | ListNode([IntegerNode(1), IntegerNode(2), IntegerNode(3), IntegerNode(4)])
 72 | 
 73 | If an expander is not defined for a type, it is assumed that the type is a leaf with no children.
 74 | 
 75 | If the root node or one of its descendants is of a type that has no associated builder function, a :exc:`NotImplementedError` is raised.
 76 | 
 77 | Graphtage has a subclassed builder :class:`graphtage.builder.BasicBuilder` that has builders and expanders for the Python basic types like :class:`int`, :class:`float`, :class:`str`, :class:`bytes`, :class:`list`, :class:`dict`, :class:`set`, and :class:`tuple`. You can extend :class:`graphtage.builder.BasicBuilder` to implement support for additional types.
 78 | 
 79 | Custom Nodes
 80 | ------------
 81 | 
 82 | Graphtage provides abstract classes like :class:`graphtage.ContainerNode` and :class:`graphtage.SequenceNode` to aid in the implementation of custom node types. But the easiest way to define a custom node type is to extend off of :class:`graphtage.dataclasses.DataClass`.
 83 | 
 84 | 
 85 | .. code-block:: python
 86 | 
 87 |     from graphtage import IntegerNode, ListNode, StringNode
 88 |     from graphtage.dataclasses import DataClass
 89 | 
 90 |     class CustomNode(DataClass):
 91 |         name: StringNode
 92 |         value: IntegerNode
 93 |         attributes: ListNode
 94 | 
 95 | This will automatically build a node type that has three children: a string, an integer, and a list.
 96 | 
 97 | >>> CustomNode(name=StringNode("the name"), value=IntegerNode(1337), attributes=ListNode((IntegerNode(1), IntegerNode(2), IntegerNode(3))))
 98 | 
 99 | Let's say you have another, non-graphtage class that corresponds to :class:`CustomNode`:
100 | 
101 | .. code-block:: python
102 | 
103 |     class NonGraphtageClass:
104 |         name: str
105 |         value: int
106 |         attributes: list[int]
107 | 
108 | You can add support for building Graphtage nodes from this custom class as follows:
109 | 
110 | .. code-block:: python
111 | 
112 |     class CustomBuilder(BasicBuilder):
113 |         @Builder.expander(NonGraphtageClass)
114 |         def expand_non_graphtage_class(node: NonGraphtageClass):
115 |             yield node.name
116 |             yield node.value
117 |             yield node.attributes
118 | 
119 |         @Builder.builder(NonGraphtageClass)
120 |         def build_non_graphtage_class(node: NonGraphtageClass, children: List[TreeNode]) -> CustomNode:
121 |             return CustomNode(*children)
122 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | # If extensions (or modules to document with autodoc) are in another directory,
 10 | # add these directories to sys.path here. If the directory is relative to the
 11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 12 | 
 13 | import os
 14 | from pathlib import Path
 15 | 
 16 | VERSION_MODULE_PATH = os.path.join(Path(os.path.dirname(__file__)).parents[0], "graphtage", "version.py")
 17 | 
 18 | 
 19 | def get_version_string():
 20 |     attrs = {}
 21 |     with open(VERSION_MODULE_PATH) as f:
 22 |         exec(f.read(), attrs)
 23 |     vstring = attrs['VERSION_STRING']
 24 |     if 'git' in vstring:
 25 |         return vstring
 26 |     else:
 27 |         return f"v{vstring}"
 28 | 
 29 | 
 30 | # -- Project information -----------------------------------------------------
 31 | 
 32 | project = 'Graphtage'
 33 | copyright = '2020, Trail of Bits'
 34 | author = 'Evan Sultanik'
 35 | 
 36 | # The full version, including alpha/beta/rc tags
 37 | release = get_version_string()
 38 | version = release
 39 | github_url = 'https://github.com/trailofbits/graphtage/'
 40 | if 'git' not in version:
 41 |     github_url = f"{github_url}releases/tag/{ version }"
 42 | 
 43 | 
 44 | # -- General configuration ---------------------------------------------------
 45 | 
 46 | # Add any Sphinx extension module names here, as strings. They can be
 47 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 48 | # ones.
 49 | extensions = [
 50 |     'sphinx.ext.autodoc',
 51 |     'sphinx.ext.napoleon',
 52 |     'sphinx.ext.intersphinx',
 53 |     'sphinx.ext.todo',
 54 |     'sphinx.ext.autosectionlabel',
 55 |     'sphinx_rtd_theme',
 56 |     #'sphinxcontrib.fulltoc'
 57 | ]
 58 | 
 59 | # Add any paths that contain templates here, relative to this directory.
 60 | templates_path = ['_templates']
 61 | 
 62 | # List of patterns, relative to source directory, that match files and
 63 | # directories to ignore when looking for source files.
 64 | # This pattern also affects html_static_path and html_extra_path.
 65 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 66 | 
 67 | 
 68 | # -- Options for HTML output -------------------------------------------------
 69 | 
 70 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 71 | # a list of builtin themes.
 72 | #
 73 | #html_theme = 'classic'
 74 | html_theme = 'sphinx_rtd_theme'
 75 | 
 76 | html_theme_options = {
 77 |     'canonical_url': f'https://trailofbits.github.io/graphtage/latest/',
 78 |     'logo_only': False,
 79 |     'display_version': False,   # This manually configured in our custom templates
 80 |     'prev_next_buttons_location': 'bottom',
 81 |     'style_external_links': True,
 82 |     #'vcs_pageview_mode': '',
 83 |     #'style_nav_header_background': 'white',
 84 |     # Toc options
 85 |     'collapse_navigation': True,
 86 |     'sticky_navigation': True,
 87 |     'navigation_depth': 4,
 88 |     'includehidden': True,
 89 |     'titles_only': False
 90 | }
 91 | 
 92 | html_context = {
 93 |     'github_url': github_url
 94 | }
 95 | 
 96 | # Add any paths that contain custom static files (such as style sheets) here,
 97 | # relative to this directory. They are copied after the builtin static files,
 98 | # so a file named "default.css" will overwrite the builtin "default.css".
 99 | html_static_path = ['_static']
100 | 
101 | #html_js_files = [
102 | #    'localtoc.js',
103 | #]
104 | 
105 | 
106 | def skip(app, what, name, obj, would_skip, options):
107 |     if name == "__init__":
108 |         return False
109 |     return would_skip
110 | 
111 | 
112 | def docstring_callback(app, what, name, obj, options, lines: list):
113 |     if what == 'class' or what == 'function':
114 |         if lines and lines[0].strip():
115 |             lines.insert(1, '')
116 |             lines.insert(2, name)
117 |             lines.insert(3, '*' * len(name))
118 |             if len(lines) == 4:
119 |                 lines.append('')
120 | 
121 | 
122 | def setup(app):
123 |     app.connect("autodoc-skip-member", skip)
124 |     #app.connect('autodoc-process-docstring', docstring_callback)
125 | 
126 | 
127 | add_package_names = False
128 | # prefix each section label with the name of the document it is in, followed by a colon
129 | autosectionlabel_prefix_document = True
130 | intersphinx_mapping = {'python': ('https://docs.python.org/3', None)}
131 | napoleon_include_private_with_doc = True
132 | napoleon_include_special_with_doc = True
133 | todo_include_todos = True
134 | 
135 | #autodoc_default_options = {
136 | #    'inherited-members': True
137 | #}
138 | 


--------------------------------------------------------------------------------
/docs/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trailofbits/graphtage/23654acf488eb803a60ce27ac515ee0755feb1a7/docs/example.png


--------------------------------------------------------------------------------
/docs/extending.rst:
--------------------------------------------------------------------------------
 1 | Extending Graphtage
 2 | ===================
 3 | 
 4 | Graphtage is designed to be extensible; new filetypes can easily be defined, as well as new node types, edit types,
 5 | formatters, and printers. This section will give some examples on how to implement each.
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    builders
11 |    filetypes
12 |    printing
13 | 


--------------------------------------------------------------------------------
/docs/filetypes.rst:
--------------------------------------------------------------------------------
 1 | .. _Filetypes:
 2 | 
 3 | Defining New Filetypes
 4 | ======================
 5 | 
 6 | Implementing support for a new Graphtage filetype entails extending the :class:`graphtage.Filetype` class. Subclassing :class:`graphtage.Filetype` automatically registers it with Graphtage.
 7 | 
 8 | Filetype Matching
 9 | -----------------
10 | 
11 | Input files are matched to an associated :class:`graphtage.Filetype` using MIME types. Each :class:`graphtage.Filetype` registers one or more MIME types for which it will be responsible. Input file MIME types are classified using the :mod:`mimetypes` module. Sometimes a filetype does not have a standardized MIME type or is not properly classified by the :mod:`mimetypes` module. For example, Graphtage's :class:`graphtage.pickle.Pickle` filetype has neither. You can add support for such a filetype as follows:
12 | 
13 | .. code-block:: python
14 | 
15 |     import mimetypes
16 | 
17 |     if '.pkl' not in mimetypes.types_map and '.pickle' not in mimetypes.types_map:
18 |         mimetypes.add_type('application/x-python-pickle', '.pkl')
19 |         mimetypes.suffix_map['.pickle'] = '.pkl'
20 | 
21 | Implementing a New Filetype
22 | ---------------------------
23 | 
24 | With the MIME type registered, here is a sketch of how one might define the Pickle filetype:
25 | 
26 | .. code-block:: python
27 | 
28 |     from graphtage import BuildOptions, Filetype, Formatter, TreeNode
29 | 
30 |     class Pickle(Filetype):
31 |         def __init__(self):
32 |             super().__init__(
33 |                 "pickle",                      # a unique identifier
34 |                 "application/python-pickle",   # the primary MIME type
35 |                 "application/x-python-pickle"  # an optional secondary MIME type
36 |             )
37 | 
38 |         def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode:
39 |             # return the root node of the tree built from the given pickle file
40 | 
41 |         def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]:
42 |             # the same as the build_tree() function,
43 |             # but on error return a string containing the error message
44 |             #
45 |             # for example:
46 |             try:
47 |                 return self.build_tree(path=path, options=options)
48 |             except PickleDecodeError as e:
49 |                 return f"Error deserializing {os.path.basename(path)}: {e!s}"
50 | 
51 |         def get_default_formatter(self) -> GraphtageFormatter:
52 |             # return the formatter associated with this file type
53 | 


--------------------------------------------------------------------------------
/docs/howitworks.rst:
--------------------------------------------------------------------------------
 1 | How Graphtage Works
 2 | ===================
 3 | 
 4 | In general, optimally mapping one graph to another
 5 | cannot be executed in polynomial time [#]_, and is therefore not
 6 | tractable for graphs of any useful size [*]_. This is true even for restricted classes of graphs like DAGs [#]_.
 7 | However, trees and forests are a special case that *can* be mapped in polynomial time, with reasonable constraints on
 8 | the types of edits possible. Graphtage exploits this.
 9 | 
10 | Why Mapping Trees is Complex
11 | ----------------------------
12 | 
13 | Ordered nodes in the tree (*e.g.*, JSON lists) and, in particular, mappings (*e.g.*, JSON dicts) are challenging. Most
14 | extant diffing algorithms and utilities assume that the structures are ordered. Take this JSON as an example:
15 | 
16 | .. list-table::
17 |     :class: align-center
18 | 
19 |     * - Original JSON
20 |       - Modified JSON
21 |     * - .. code-block:: json
22 | 
23 |             {
24 |                 "foo": [1, 2, 3, 4],
25 |                 "bar": "testing"
26 |             }
27 | 
28 |       - .. code-block:: json
29 | 
30 |             {
31 |                 "foo": [2, 3, 4, 5],
32 |                 "zab": "testing",
33 |                 "woo": ["foobar"]
34 |             }
35 | 
36 | Existing tools effectively canonicalize the JSON (*e.g.*, sort dictionary elements by key and format lists with one
37 | item per line), and then perform a traditional diff:
38 | 
39 | .. code-block:: console
40 | 
41 |     $ cat original.json | jq -M --sort-keys > original.canonical.json
42 |     $ cat modified.json | jq -M --sort-keys > modified.canonical.json
43 |     $ diff -u original.canonical.json modified.canonical.json
44 | 
45 | .. code-block:: diff
46 |     :linenos:
47 | 
48 |     {
49 |     -  "bar": "testing",
50 |        "foo": [
51 |     -    1,
52 |          2,
53 |          3,
54 |     -    4
55 |     -  ]
56 |     +    4,
57 |     +    5
58 |     +  ],
59 |     +  "woo": [
60 |     +    "foobar"
61 |     +  ],
62 |     +  "zab": "testing"
63 |     }
64 | 
65 | Not entirely useful, particularly if the input files are large. The problem is that changing dict keys breaks the diff:
66 | Since "bar" was changed to "zab", the canonical representation changes and they are considered separate edits (lines 2
67 | and 15 of the diff).
68 | 
69 | Matching Ordered Sequences
70 | --------------------------
71 | 
72 | Graphtage matches ordered sequences like lists using an "online" [#]_, "constructive" [#]_ implementation of the
73 | Levenshtein distance metric [#]_, similar to the Wagner–Fischer algorithm [#]_. The algorithm starts with an
74 | unbounded mapping and iteratively improves it until the bounds converge, at which point the optimal edit sequence is
75 | discovered. This is implemented in the :mod:`graphtage.levenshtein` module.
76 | 
77 | Matching Unordered Collections
78 | ------------------------------
79 | 
80 | Dicts are matched by solving the minimum weight matching problem [#]_ on the complete bipartite graph from key/value
81 | pairs in the source dict to key/value pairs in the destination dict. This is implemented in the
82 | :mod:`graphtage.matching` module.
83 | 
84 | Footnotes
85 | ---------
86 | 
87 | .. [#] https://en.wikipedia.org/wiki/Graph_isomorphism_problem
88 | .. [#] https://en.wikipedia.org/wiki/Directed_acyclic_graph
89 | .. [#] https://en.wikipedia.org/wiki/Online_algorithm
90 | .. [#] https://en.wikipedia.org/wiki/Constructive_proof
91 | .. [#] https://en.wikipedia.org/wiki/Levenshtein_distance
92 | .. [#] https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm
93 | .. [#] https://en.wikipedia.org/wiki/Assignment_problem
94 | .. [*] Unless |pvsnp|_.
95 | .. _pvsnp:
96 |     https://en.wikipedia.org/wiki/P_versus_NP_problem
97 | .. |pvsnp| replace:: :math:`P = NP`
98 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Graphtage Documentation
 2 | =======================
 3 | 
 4 | Graphtage is *both* a commandline utility *and* a general purpose library for semantically comparing and merging
 5 | tree-like structures, such as JSON, XML, HTML, YAML, and CSV files. Its name is a portmanteau of “graph” and
 6 | “graftage”—the latter being the practice of joining two trees together such that they grow as one.
 7 | 
 8 | There are several reasons why you might be here…
 9 | 
10 | .. topic:: You want to learn how to use Graphtage as a command line utility.
11 | 
12 |     This documentation focuses on Graphtage’ use as a library, specifically how to extend it by implementing new file
13 |     formats. For instructions on using Graphtage as a utility, see the documentation in its `GitHub page`_.
14 | 
15 | .. topic:: You want to programmatically interact with Graphtage as a library.
16 | 
17 |     You should start by reading about :doc:`Using Graphtage Programmatically <library>`.
18 | 
19 | .. topic:: You want to modify or extend Graphtage.
20 | 
21 |     For example, you might want to implement support for a new file format or edit type. You should start by reading
22 |     the :doc:`Extending Graphtage <extending>` section.
23 | 
24 | .. topic:: You are already familiar with Graphtage and just need an API reference.
25 | 
26 |     The API documentation is :doc:`here <package>`.
27 | 
28 | .. topic:: You are curious and want to learn more about how Graphtage works.
29 | 
30 |     Documentation on how Graphtage works is :doc:`here <howitworks>`.
31 | 
32 | .. _GitHub page: https://github.com/trailofbits/graphtage
33 | 
34 | .. toctree::
35 |    :maxdepth: 4
36 |    :caption: Contents:
37 | 
38 |    library
39 |    extending
40 |    howitworks
41 |    package
42 | 
43 | Indices and tables
44 | ==================
45 | 
46 | * :ref:`genindex`
47 | * :ref:`modindex`
48 | * :ref:`search`
49 | 


--------------------------------------------------------------------------------
/docs/library.rst:
--------------------------------------------------------------------------------
  1 | Using Graphtage Programmatically
  2 | ================================
  3 | 
  4 | Graphtage is a command line utility, but it can just as easily be used as a library. This section documents how to
  5 | interact with Graphtage directly from Python.
  6 | 
  7 | The Intermediate Representation
  8 | -------------------------------
  9 | 
 10 | Graphtage's diffing algorithms operate on an
 11 | `intermediate representation <https://en.wikipedia.org/wiki/Intermediate_representation>`__ rather than on the data
 12 | structures of the original file format. This allows Graphtage to have generic comparison algorithms that can work on
 13 | *any* input file type. The intermediate representation is a tree of :class:`graphtage.TreeNode` objects.
 14 | 
 15 | Therefore, the first step is to convert the files being diffed into Graphtage's intermediate representation. The JSON
 16 | filetype has a function to convert arbitrary Python objects (comprised of standard Python types) into Graphtage trees::
 17 | 
 18 |     >>> from graphtage import json
 19 |     >>> from_tree = json.build_tree({"foo": [1, 2, 3, 4]})
 20 |     >>> from_tree
 21 |     DictNode([KeyValuePairNode(key=StringNode('foo'), value=ListNode((IntegerNode(1), IntegerNode(2), IntegerNode(3), IntegerNode(4))))])
 22 | 
 23 | Transforming Nodes with Edits
 24 | -----------------------------
 25 | 
 26 | To see the sequence of edits to transform this tree to another, we call :meth:`graphtage.TreeNode.get_all_edits`::
 27 | 
 28 |     >>> to_tree = json.build_tree({"bar": [2, 3, 4]})
 29 |     >>> to_tree
 30 |     DictNode([KeyValuePairNode(key=StringNode('bar'), value=ListNode((IntegerNode(2), IntegerNode(3), IntegerNode(4))))])
 31 |     >>> for edit in from_tree.get_all_edits(to_tree):
 32 |     ...     print(edit)
 33 |     Remove(IntegerNode(1), remove_from=ListNode((IntegerNode(1), IntegerNode(2), IntegerNode(3), IntegerNode(4))))
 34 |     StringEdit(from_node=StringNode('foo'), to_node=StringNode('bar'))
 35 | 
 36 | Applying Edits to Nodes
 37 | -----------------------
 38 | 
 39 | Both nodes and edits are immutable. We can perform a diff to apply edits to nodes, producing a new tree constructed of
 40 | :class:`graphtage.EditedTreeNode` objects. Using some Python magic, the new tree's nodes maintain all of the same
 41 | characteristics of the source nodes—including their source node class types—but are *also* :func:`instanceof`
 42 | :class:`graphtage.EditedTreeNode`, too.
 43 | 
 44 | Here is how to diff two nodes::
 45 | 
 46 |     >>> from_node.diff(to_node)
 47 |     >>> diff = from_tree.diff(to_tree)
 48 |     >>> diff
 49 |     EditedDictNode([EditedKeyValuePairNode(key=EditedStringNode('foo'), value=EditedListNode((EditedIntegerNode(1), EditedIntegerNode(2), EditedIntegerNode(3), EditedIntegerNode(4))))])
 50 | 
 51 | As you can see, the tree was reconstructed with edited versions of each node. Each node will have a new member variable,
 52 | :attr:`graphtage.EditedTreeNode.edit`, containing the edit that that chose to apply to itself (or :const:`None` if the
 53 | node did not need to be edited). There are also additional member variables to indicate whether the node has been
 54 | removed from its parent container.
 55 | 
 56 | Formatting and Printing Results
 57 | -------------------------------
 58 | 
 59 | There are two components to outputting a tree or diff: a :class:`graphtage.formatter.Formatter`, which is responsible
 60 | for the syntax of the output, and a :class:`graphtage.printer.Printer`, which is responsible for rendering that output
 61 | to a stream. For example, to print our diff in JSON format to the default printer (STDOUT), we would do::
 62 | 
 63 |     >>> from graphtage import printer
 64 |     >>> with printer.DEFAULT_PRINTER as p:
 65 |     ...     json.JSONFormatter.DEFAULT_INSTANCE.print(printer.DEFAULT_PRINTER, diff)
 66 |     ...
 67 |     {
 68 |         "++bar++~~foo~~": [
 69 |             ~~1~~,
 70 |             2,
 71 |             3,
 72 |             4
 73 |         ]
 74 |     }
 75 | 
 76 | Since Graphtage's formatters are independent of the input format, thanks to the intermediate representation, we can
 77 | just as easily output the diff in another format, like YAML::
 78 | 
 79 |     >>> from graphtage import yaml
 80 |     >>> with printer.DEFAULT_PRINTER as p:
 81 |     ...     yaml.YAMLFormatter.DEFAULT_INSTANCE.print(printer.DEFAULT_PRINTER, diff)
 82 |     ...
 83 |     ++bar++~~foo~~:
 84 |     - ~~1~~
 85 |     - 2
 86 |     - 3
 87 |     - 4
 88 | 
 89 | Diffing In-Memory Python Objects
 90 | --------------------------------
 91 | 
 92 | When used as a library, Graphtage has the ability to diff in-memory Python objects. This can be useful when debugging,
 93 | for example, to quickly determine the difference between two Python objects that cause a differential.::
 94 | 
 95 |     >>> from graphtage.pydiff import print_diff
 96 |     >>> with printer.DEFAULT_PRINTER as p:
 97 |     ...     obj1 = [1, 2, {3: "three"}, 4]
 98 |     ...     obj2 = [1, 2, {3: 3}, "four"]
 99 |     ...     print_diff(obj1, obj2, printer=p)
100 |     [1,2,{3: "three" -> 3},++"four"++~~4~~]
101 | 
102 | Python object diffing also works with custom classes::
103 | 
104 |     >>> class Foo:
105 |     ...     def __init__(self, bar, baz):
106 |     ...         self.bar = bar
107 |     ...         self.baz = baz
108 |     >>> with printer.DEFAULT_PRINTER as p:
109 |     ...     print_diff(Foo("bar", "baz"), Foo("bar", "bak"), printer=p)
110 |     Foo(bar="bar", baz="ba++k++~~z~~")
111 | 


--------------------------------------------------------------------------------
/docs/printing.rst:
--------------------------------------------------------------------------------
 1 | .. _Printing Protocol:
 2 | 
 3 | Printing Protocol
 4 | =================
 5 | 
 6 | The protocol for delegating how a :class:`graphtage.TreeNode` or :class:`graphtage.Edit` is printed in
 7 | :meth:`graphtage.GraphtageFormatter.print` is as follows:
 8 | 
 9 | #. Determine the actual object to be printed:
10 |     * If ``node_or_edit`` is an :class:`graphtage.Edit`:
11 |         * If ``with_edits``, then choose the edit
12 |         * Otherwise, choose :attr:`node_or_edit.from_node <graphtage.Edit.from_node>`
13 |     * If ``node_or_edit`` is a :class:`graphtage.TreeNode`:
14 |         * If ``with_edits`` *and* the node is edited and has a non-zero cost,
15 |             then choose :attr:`node_or_edit.edit <graphtage.EditedTreeNode.edit>`::
16 | 
17 |                 node_or_edit.edit is not None and node_or_edit.edit.bounds().lower_bound > 0
18 | 
19 |         * Otherwise choose ``node_or_edit``
20 | #. If the chosen object is an edit:
21 |     * See if there is a specialized formatter for this edit by calling
22 |       :meth:`graphtage.formatter.Formatter.get_formatter`
23 |     * If so, delegate to that formatter and return.
24 |     * If not, try calling the edit's :func:`graphtage.Edit.print` method. If :exc:`NotImplementedError` is
25 |       *not* raised, return.
26 | #. If the chosen object is a node, or if we failed to find a printer for the edit:
27 |     * See if there is a specialized formatter for this node by calling
28 |       :meth:`graphtage.formatter.Formatter.get_formatter`
29 |     * If so, delegate to that formatter and return.
30 |     * If not, print a debug warning and delegate to the node's internal print implementation
31 |       :meth:`graphtage.TreeNode.print`.
32 | 
33 | This is implemented in :meth:`graphtage.GraphtageFormatter.print`. See the :ref:`Formatting Protocol` for how formatters
34 | are chosen.
35 | 


--------------------------------------------------------------------------------
/graphtage/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import graphtage
 2 | 
 3 | from .graphtage import *
 4 | from .tree import *
 5 | from .edits import *
 6 | 
 7 | from .version import __version__, VERSION_STRING
 8 | from . import (
 9 |     ast, bounds, builder, constraints, dataclasses, edits, expressions, fibonacci, formatter, levenshtein, matching,
10 |     object_set, pickle, printer, pydiff, search, sequences, tree, utils
11 | )
12 | from . import csv, json, xml, yaml, plist
13 | 
14 | import inspect
15 | 
16 | # All of the classes in SUBMODULES_TO_SUBSUME should really be in the top-level `graphtage` module.
17 | # They are separated into submodules solely for making the Python file sizes more manageable.
18 | # So the following code loops over those submodules and reassigns all of the classes to the top-level module.
19 | SUBMODULES_TO_SUBSUME = (graphtage, tree, edits)
20 | for module_to_subsume in SUBMODULES_TO_SUBSUME:
21 |     for name, obj in inspect.getmembers(module_to_subsume):
22 |         if hasattr(obj, '__module__') and obj.__module__ == module_to_subsume.__name__:
23 |             obj.__module__ = 'graphtage'
24 |     del module_to_subsume
25 | 
26 | del inspect, SUBMODULES_TO_SUBSUME
27 | 


--------------------------------------------------------------------------------
/graphtage/ast.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Generic node types for representing abstract syntax trees.
  3 | """
  4 | from colorama import Fore
  5 | 
  6 | from . import KeyValuePairNode, ListNode, Printer, TreeNode, DictNode, StringNode
  7 | from .dataclasses import DataClassNode
  8 | from .sequences import SequenceFormatter
  9 | 
 10 | 
 11 | class KeywordArgument(KeyValuePairNode):
 12 |     pass
 13 | 
 14 | 
 15 | class Module(ListNode):
 16 |     def print(self, printer: Printer):
 17 |         SequenceFormatter('', '', '\n').print(printer, self)
 18 | 
 19 | 
 20 | class Assignment(DataClassNode):
 21 |     """A node representing an assignment."""
 22 | 
 23 |     targets: ListNode
 24 |     value: TreeNode
 25 | 
 26 |     def print(self, printer: Printer):
 27 |         """Prints this node."""
 28 |         SequenceFormatter('', '', ', ').print(printer, self.targets)
 29 |         with printer.bright():
 30 |             printer.write(" = ")
 31 |         self.value.print(printer)
 32 | 
 33 |     def __str__(self):
 34 |         return f"{', '.join(map(str, self.targets.children()))} = {self.value!s}"
 35 | 
 36 | 
 37 | class CallArguments(ListNode):
 38 |     pass
 39 | 
 40 | 
 41 | class CallKeywords(DictNode):
 42 |     pass
 43 | 
 44 | 
 45 | class Call(DataClassNode):
 46 |     """A node representing a function call."""
 47 | 
 48 |     func: TreeNode
 49 |     args: CallArguments
 50 |     kwargs: CallKeywords
 51 | 
 52 |     def __init__(self, *args, **kwargs):
 53 |         super().__init__(*args, **kwargs)
 54 |         if isinstance(self.func, StringNode):
 55 |             self.func.quoted = False
 56 | 
 57 |     def print(self, printer: Printer):
 58 |         with printer.color(Fore.YELLOW):
 59 |             self.func.print(printer)
 60 |         printer.write("(")
 61 |         SequenceFormatter('', '', ', ').print(printer, self.args)
 62 |         if self.args and len(self.kwargs) > 0:
 63 |             printer.write(", ")
 64 |         for kvp in self.kwargs:
 65 |             with printer.color(Fore.RED):
 66 |                 kvp.key.print(printer)
 67 |             with printer.bright():
 68 |                 printer.write("=")
 69 |             kvp.value.print(printer)
 70 |         printer.write(")")
 71 | 
 72 |     def __str__(self):
 73 |         args = ", ".join([str(a) for a in self.args] + [
 74 |             f"{kvp.key!s}={kvp.value!s}"
 75 |             for kvp in self.kwargs
 76 |         ])
 77 |         return f"{self.func!s}({args})"
 78 | 
 79 | 
 80 | class Subscript(DataClassNode):
 81 |     """A node representing an object subscript (i.e., the `[]` operator)"""
 82 | 
 83 |     value: TreeNode
 84 |     slice: TreeNode
 85 | 
 86 |     def print(self, printer: Printer):
 87 |         self.value.print(printer)
 88 |         with printer.color(Fore.LIGHTBLUE_EX):
 89 |             printer.write("[")
 90 |         self.slice.write(printer)
 91 |         with printer.color(Fore.LIGHTBLUE_EX):
 92 |             printer.write("]")
 93 | 
 94 | 
 95 | class Import(DataClassNode):
 96 |     names: ListNode
 97 |     from_name: StringNode
 98 | 
 99 |     def __init__(self, names: ListNode, from_name: StringNode):
100 |         super().__init__(names=names, from_name=from_name)
101 |         self.from_name.quoted = False
102 |         for child in self.names:
103 |             if isinstance(child, StringNode):
104 |                 child.quoted = False
105 | 
106 |     def print(self, printer: Printer):
107 |         if self.from_name.object:
108 |             with printer.color(Fore.YELLOW):
109 |                 printer.write("from ")
110 |             self.from_name.print(printer)
111 |             printer.write(" ")
112 |         with printer.color(Fore.YELLOW):
113 |             printer.write("import ")
114 |         SequenceFormatter('', '', ', ').print(printer, self.names)
115 | 


--------------------------------------------------------------------------------
/graphtage/builder.py:
--------------------------------------------------------------------------------
  1 | """A module intended to simplify building Graphtage IR trees from other tree-like data structures."""
  2 | 
  3 | from abc import ABC
  4 | import logging
  5 | from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Type, TypeVar
  6 | 
  7 | from . import (
  8 |     BoolNode, BuildOptions, DictNode, FixedKeyDictNode, FloatNode, IntegerNode, LeafNode, ListNode, MultiSetNode,
  9 |     NullNode, StringNode, TreeNode
 10 | )
 11 | from .object_set import IdentityHash
 12 | 
 13 | C = TypeVar("C")
 14 | T = TypeVar("T")
 15 | 
 16 | log = logging.getLogger(__name__)
 17 | 
 18 | 
 19 | class CyclicReference(LeafNode):
 20 |     def __init__(self, obj):
 21 |         super().__init__(IdentityHash(obj))
 22 | 
 23 |     def __hash__(self):
 24 |         return id(self.object)
 25 | 
 26 |     def __eq__(self, other):
 27 |         return isinstance(other, CyclicReference) and other.object is self.object
 28 | 
 29 | 
 30 | class Builder(ABC):
 31 |     EXPANDERS: Dict[Type[Any], Callable[["Builder", Any], Optional[Iterable[Any]]]]
 32 |     BUILDERS: Dict[Type[Any], Callable[["Builder", Any, List[TreeNode]], TreeNode]]
 33 | 
 34 |     def __init__(self, options: Optional[BuildOptions] = None):
 35 |         if options is None:
 36 |             self.options: BuildOptions = BuildOptions()
 37 |         else:
 38 |             self.options = options
 39 | 
 40 |     @staticmethod
 41 |     def expander(node_type: Type[T]):
 42 |         def wrapper(func: Callable[[C, T], Iterable[Any]]) -> Callable[[C, T], Iterable[Any]]:
 43 |             if hasattr(func, "_visitor_expander_for_type"):
 44 |                 func._visitor_expander_for_type = func._visitor_expander_for_type + (node_type,)
 45 |             else:
 46 |                 setattr(func, "_visitor_expander_for_type", (node_type,))
 47 |             return func
 48 | 
 49 |         return wrapper
 50 | 
 51 |     @staticmethod
 52 |     def builder(node_type: Type[T]):
 53 |         def wrapper(func: Callable[[C, T, List[TreeNode]], TreeNode]) -> Callable[[C, T, List[TreeNode]], TreeNode]:
 54 |             if hasattr(func, "_visitor_builder_for_type"):
 55 |                 func._visitor_builder_for_type = func._visitor_builder_for_type + (node_type,)
 56 |             else:
 57 |                 setattr(func, "_visitor_builder_for_type", (node_type,))
 58 |             return func
 59 | 
 60 |         return wrapper
 61 | 
 62 |     def __init_subclass__(cls, **kwargs):
 63 |         super().__init_subclass__(**kwargs)
 64 |         if not hasattr(cls, "EXPANDERS") or cls.EXPANDERS is None:
 65 |             setattr(cls, "EXPANDERS", {})
 66 |         else:
 67 |             setattr(cls, "EXPANDERS", dict(cls.EXPANDERS))
 68 |         if not hasattr(cls, "BUILDERS") or cls.BUILDERS is None:
 69 |             setattr(cls, "BUILDERS", {})
 70 |         else:
 71 |             setattr(cls, "BUILDERS", dict(cls.BUILDERS))
 72 |         new_expanders = {}
 73 |         new_builders = {}
 74 |         for member_name, member in cls.__dict__.items():
 75 |             if hasattr(member, "_visitor_expander_for_type"):
 76 |                 for expander_type in getattr(member, "_visitor_expander_for_type"):
 77 |                     if not isinstance(expander_type, type):
 78 |                         raise TypeError(f"{cls.__name__}.{member_name} was registered as an expander for "
 79 |                                         f"{expander_type!r}, which is not a type")
 80 |                     elif expander_type in cls.EXPANDERS:
 81 |                         raise TypeError(f"An expander for type {expander_type.__name__} is already registered to "
 82 |                                         f"{cls.EXPANDERS[expander_type]!r} and cannot be re-registered to "
 83 |                                         f"{cls.__name__}.{member_name}")
 84 |                     elif expander_type in new_expanders:
 85 |                         raise TypeError(f"An expander for type {expander_type.__name__} is already registered to "
 86 |                                         f"{new_expanders[expander_type]!r} and cannot be re-registered to "
 87 |                                         f"{cls.__name__}.{member_name}")
 88 |                     new_expanders[expander_type] = member
 89 |             if hasattr(member, "_visitor_builder_for_type"):
 90 |                 for builder_type in getattr(member, "_visitor_builder_for_type"):
 91 |                     if not isinstance(builder_type, type):
 92 |                         raise TypeError(f"{cls.__name__}.{member_name} was registered as an builder for "
 93 |                                         f"{builder_type!r}, which is not a type")
 94 |                     elif builder_type in cls.EXPANDERS:
 95 |                         raise TypeError(f"A builder for type {builder_type.__name__} is already registered to "
 96 |                                         f"{cls.BUILDERS[builder_type]!r} and cannot be re-registered to "
 97 |                                         f"{cls.__name__}.{builder_type}")
 98 |                     elif builder_type in new_builders:
 99 |                         raise TypeError(f"A builder for type {builder_type.__name__} is already registered to "
100 |                                         f"{new_builders[builder_type]!r} and cannot be re-registered to "
101 |                                         f"{cls.__name__}.{builder_type}")
102 |                     new_builders[builder_type] = member
103 |         cls.EXPANDERS.update(new_expanders)
104 |         cls.BUILDERS.update(new_builders)
105 | 
106 |     def default_expander(self, node: Any) -> Iterable[Any]:
107 |         return ()
108 | 
109 |     def default_builder(self, node: Any, children: List[TreeNode]) -> TreeNode:
110 |         raise NotImplementedError(f"A builder for type {node.__class__.__name__} is not defined for object {node!r}")
111 | 
112 |     @classmethod
113 |     def _resolve(cls, obj_type: Type[Any], choices: Dict[Type[Any], T]) -> Optional[T]:
114 |         """Resolves the most specialized expander or builder for `obj_type`"""
115 |         for t in obj_type.__mro__:
116 |             if t in choices:
117 |                 return choices[t]
118 |         return None
119 | 
120 |     @classmethod
121 |     def resolve_expander(cls, obj_type: Type[Any]) -> Optional[Callable[[Any], Optional[Iterable[Any]]]]:
122 |         """Resolves the most specialized expander for `obj_type`"""
123 |         return cls._resolve(obj_type, cls.EXPANDERS)
124 | 
125 |     @classmethod
126 |     def resolve_builder(cls, obj_type: Type[Any]) -> Optional[Callable[[Any, List[TreeNode]], TreeNode]]:
127 |         """Resolves the most specialized builder for `obj_type`"""
128 |         return cls._resolve(obj_type, cls.BUILDERS)
129 | 
130 |     def expand(self, node: Any) -> Iterable[Any]:
131 |         expander = self.resolve_expander(type(node))
132 |         if expander is None:
133 |             return self.default_expander(node)
134 |         return expander(self, node)
135 | 
136 |     def build(self, node: Any, children: List[TreeNode]) -> TreeNode:
137 |         builder = self.resolve_builder(type(node))
138 |         if builder is None:
139 |             result = self.default_builder(node, children)
140 |         else:
141 |             result = builder(self, node, children)
142 |         if not isinstance(result, TreeNode):
143 |             if builder is None:
144 |                 source = f"{self.__class__.__name__}.default_builder"
145 |             else:
146 |                 source = f"{builder!r}"
147 |             raise ValueError(f"{source}(node={node!r}, children={children!r}) returned {result!r}; "
148 |                              f"builders must return a graphtage.TreeNode")
149 |         return result
150 | 
151 |     def build_tree(self, root_obj) -> TreeNode:
152 |         children = self.expand(root_obj)
153 |         work: List[Tuple[Any, List[TreeNode], List[Any]]] = [(root_obj, [], list(reversed(list(children))))]
154 |         basic_builder = BasicBuilder(self.options)
155 |         with self.options.printer.tqdm(
156 |                 desc="Walking the Tree", leave=False, delay=2.0, unit=" nodes", total=1 + len(work[-1][-1])
157 |         ) as t:
158 |             while work:
159 |                 node, processed_children, unprocessed_children = work[-1]
160 | 
161 |                 if unprocessed_children:
162 |                     child = unprocessed_children.pop()
163 |                     t.update(1)
164 | 
165 |                     grandchildren = list(self.expand(child))
166 | 
167 |                     if grandchildren and self.options.check_for_cycles:
168 |                         # first, check if all of our grandchildren are leaves; if so, we don't need to check for a cycle
169 |                         all_are_leaves = all(
170 |                             all(False for _ in self.expand(grandchild))
171 |                             for grandchild in grandchildren
172 |                         )
173 |                         if not all_are_leaves:
174 |                             # make sure we aren't already in the process of expanding this child
175 |                             is_cycle = False
176 |                             for already_expanding, _, _ in work:
177 |                                 if already_expanding is child:
178 |                                     if self.options.ignore_cycles:
179 |                                         log.debug(f"Detected a cycle in {node!r} at child {child!r}; ignoring…")
180 |                                         processed_children.append(CyclicReference(child))
181 |                                         is_cycle = True
182 |                                         break
183 |                                     else:
184 |                                         raise ValueError(f"Detected a cycle in {node!r} at child {child!r}")
185 |                             if is_cycle:
186 |                                 continue
187 |                     work.append((child, [], list(reversed(grandchildren))))
188 |                     t.total = t.total + 1 + len(grandchildren)
189 |                     t.refresh()
190 |                     continue
191 | 
192 |                 _ = work.pop()
193 |                 t.update(1)
194 | 
195 |                 new_node = self.build(node, processed_children)
196 |                 if not work:
197 |                     return new_node
198 |                 work[-1][1].append(new_node)
199 | 
200 |             return NullNode()
201 | 
202 | 
203 | class BasicBuilder(Builder):
204 |     """A builder for basic Python types"""
205 | 
206 |     @Builder.builder(int)
207 |     def build_int(self, obj: int, _) -> IntegerNode:
208 |         return IntegerNode(obj)
209 | 
210 |     @Builder.builder(str)
211 |     @Builder.builder(bytes)
212 |     def build_str(self, obj: str, _) -> StringNode:
213 |         return StringNode(obj)
214 | 
215 |     @Builder.builder(type(None))
216 |     def build_none(self, obj, _) -> NullNode:
217 |         assert obj is None
218 |         return NullNode()
219 | 
220 |     @Builder.builder(float)
221 |     def build_float(self, obj: float, _) -> FloatNode:
222 |         return FloatNode(obj)
223 | 
224 |     @Builder.builder(bool)
225 |     def build_bool(self, obj: bool, _) -> BoolNode:
226 |         return BoolNode(obj)
227 | 
228 |     @Builder.expander(list)
229 |     @Builder.expander(tuple)
230 |     @Builder.expander(set)
231 |     @Builder.expander(frozenset)
232 |     def expand_list(self, obj: list):
233 |         yield from obj
234 | 
235 |     @Builder.builder(list)
236 |     @Builder.builder(tuple)
237 |     def build_list(self, obj, children: List[TreeNode]) -> ListNode:
238 |         return ListNode(
239 |             children,
240 |             allow_list_edits=self.options.allow_list_edits,
241 |             allow_list_edits_when_same_length=self.options.allow_list_edits_when_same_length
242 |         )
243 | 
244 |     @Builder.builder(set)
245 |     @Builder.builder(frozenset)
246 |     def build_set(self, obj, children: List[TreeNode]) -> MultiSetNode:
247 |         return MultiSetNode(children)
248 | 
249 |     @Builder.expander(dict)
250 |     def expand_dict(self, obj: dict):
251 |         yield from obj.keys()
252 |         yield from obj.values()
253 | 
254 |     @Builder.builder(dict)
255 |     def build_dict(self, _, children: List[TreeNode]):
256 |         n = len(children) // 2
257 |         keys = children[:n]
258 |         values = children[n:]
259 |         dict_items = {
260 |             k: v
261 |             for k, v in zip(keys, values)
262 |         }
263 |         if self.options.allow_key_edits:
264 |             dict_node = DictNode.from_dict(dict_items)
265 |             dict_node.auto_match_keys = self.options.auto_match_keys
266 |             return dict_node
267 |         else:
268 |             return FixedKeyDictNode.from_dict(dict_items)
269 | 


--------------------------------------------------------------------------------
/graphtage/constraints.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | import logging
 3 | from typing import Optional
 4 | 
 5 | from .edits import Edit
 6 | from . import expressions
 7 | from . import graphtage
 8 | 
 9 | log = logging.getLogger('graphtage')
10 | 
11 | 
12 | class ConditionalMatcher(metaclass=ABCMeta):
13 |     def __init__(self, condition: expressions.Expression):
14 |         self.condition: expressions.Expression = condition
15 | 
16 |     @abstractmethod
17 |     def __call__(self, from_node: graphtage.TreeNode, to_node: graphtage.TreeNode) -> Optional[Edit]:
18 |         raise NotImplementedError()
19 | 
20 |     @classmethod
21 |     def apply(cls, node: graphtage.TreeNode, condition: expressions.Expression):
22 |         node.add_edit_modifier(cls(condition))
23 | 
24 | 
25 | class MatchIf(ConditionalMatcher):
26 |     def __call__(self, from_node: graphtage.TreeNode, to_node: graphtage.TreeNode) -> Optional[Edit]:
27 |         try:
28 |             if self.condition.eval(locals={'from': from_node, 'to': to_node}):
29 |                 return None
30 |         except Exception as e:
31 |             log.debug(f"{e!s} while evaluating --match-if for nodes {from_node} and {to_node}")
32 |         return graphtage.Replace(from_node, to_node)
33 | 
34 | 
35 | class MatchUnless(ConditionalMatcher):
36 |     def __call__(self, from_node: graphtage.TreeNode, to_node: graphtage.TreeNode) -> Optional[Edit]:
37 |         try:
38 |             if self.condition.eval(locals={'from': from_node.to_obj(), 'to': to_node.to_obj()}):
39 |                 return graphtage.Replace(from_node, to_node)
40 |         except Exception as e:
41 |             log.debug(f"{e!s} while evaluating --match-unless for nodes {from_node} and {to_node}")
42 |         return None
43 | 


--------------------------------------------------------------------------------
/graphtage/csv.py:
--------------------------------------------------------------------------------
  1 | """A :class:`graphtage.Filetype` for parsing, diffing, and rendering `CSV files`_.
  2 | 
  3 | .. _CSV files:
  4 |     https://en.wikipedia.org/wiki/Comma-separated_values
  5 | 
  6 | """
  7 | 
  8 | import csv
  9 | from io import StringIO
 10 | from typing import Optional
 11 | 
 12 | from . import graphtage, json
 13 | from .json import JSONFormatter
 14 | from .printer import Printer
 15 | from .sequences import SequenceFormatter
 16 | from .tree import GraphtageFormatter, TreeNode
 17 | 
 18 | 
 19 | class CSVRow(graphtage.ListNode[TreeNode]):
 20 |     """A node representing a row of a CSV file."""
 21 |     def __bool__(self):
 22 |         return bool(self._children)
 23 | 
 24 | 
 25 | class CSVNode(graphtage.ListNode[CSVRow]):
 26 |     """A node representing zero or more CSV rows."""
 27 |     def __bool__(self):
 28 |         return bool(self._children) and any(self._children)
 29 | 
 30 |     def __eq__(self, other: 'CSVNode'):
 31 |         return self._children == other._children or (not self and not other)
 32 | 
 33 | 
 34 | def build_tree(path: str, options: Optional[graphtage.BuildOptions] = None, *args, **kwargs) -> CSVNode:
 35 |     """Constructs a :class:`CSVNode` from a CSV file.
 36 | 
 37 |     The file is parsed using Python's :func:`csv.reader`. The elements in each row are constructed by delegating to
 38 |     :func:`graphtage.json.build_tree`::
 39 | 
 40 |         CSVRow([json.build_tree(i, options=options) for i in row])
 41 | 
 42 |     Args:
 43 |         path: The path to the file to be parsed.
 44 |         options: Optional build options to pass on to :meth:`graphtage.json.build_tree`.
 45 |         *args: Any extra positional arguments are passed on to :func:`csv.reader`.
 46 |         **kwargs: Any extra keyword arguments are passed on to :func:`csv.reader`.
 47 | 
 48 |     Returns:
 49 |         CSVNode: The resulting CSV node object.
 50 | 
 51 |     """
 52 |     csv_data = []
 53 |     with open(path) as f:
 54 |         for row in csv.reader(f, *args, **kwargs):
 55 |             rowdata = [json.build_tree(i, options=options) for i in row]
 56 |             for col in rowdata:
 57 |                 if isinstance(col, graphtage.StringNode):
 58 |                     col.quoted = False
 59 |             csv_data.append(CSVRow(rowdata))
 60 |     return CSVNode(csv_data)
 61 | 
 62 | 
 63 | class CSVRowFormatter(SequenceFormatter):
 64 |     """A formatter for CSV rows."""
 65 |     is_partial = True
 66 | 
 67 |     def __init__(self):
 68 |         """Initializes the formatter.
 69 | 
 70 |         Equivalent to::
 71 | 
 72 |             super().__init__('', '', ',')
 73 | 
 74 |         """
 75 |         super().__init__('', '', ',')
 76 | 
 77 |     def print_CSVRow(self, *args, **kwargs):
 78 |         """Prints a CSV row.
 79 | 
 80 |         Equivalent to::
 81 | 
 82 |             super().print_SequenceNode(*args, **kwargs)
 83 | 
 84 |         """
 85 |         super().print_SequenceNode(*args, **kwargs)
 86 | 
 87 |     def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False):
 88 |         """An empty implementation, since each row should be printed as a single line."""
 89 |         pass
 90 | 
 91 | 
 92 | class CSVRows(SequenceFormatter):
 93 |     """A sub formatter for printing the sequence of rows in a CSV file."""
 94 |     is_partial = True
 95 | 
 96 |     sub_format_types = [CSVRowFormatter]
 97 | 
 98 |     def __init__(self):
 99 |         """Initializes the formatter.
100 | 
101 |         Equivalent to::
102 | 
103 |             super().__init__('', '', '')
104 | 
105 |         """
106 |         super().__init__('', '', '')
107 | 
108 |     def print_CSVNode(self, *args, **kwargs):
109 |         """Prints a CSV node.
110 | 
111 |         Equivalent to::
112 | 
113 |             super().print_SequenceNode(*args, **kwargs)
114 | 
115 |         """
116 |         super().print_SequenceNode(*args, **kwargs)
117 | 
118 |     def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False):
119 |         """Prints a newline on all but the first and last items."""
120 |         if not is_first:
121 |             printer.newline()
122 | 
123 |     def items_indent(self, printer: Printer):
124 |         """Returns :obj:`printer` because CSV rows do not need to be indented."""
125 |         return printer
126 | 
127 | 
128 | class CSVFormatter(GraphtageFormatter):
129 |     """Top-level formatter for CSV files."""
130 |     sub_format_types = [CSVRows, JSONFormatter]
131 | 
132 |     def print_LeafNode(self, printer: Printer, node: graphtage.LeafNode):
133 |         """Prints a leaf node, which should always be a column in a CSV row.
134 | 
135 |         The node is escaped by first writing it to :func:`csv.writer`::
136 | 
137 |             csv.writer(...).writerow([node.object])
138 | 
139 |         """
140 |         if node.edited and node.edit is not None:
141 |             self.sub_formatters[1].print(printer, node.edit)
142 |             return
143 |         s = StringIO()
144 |         writer = csv.writer(s)
145 |         writer.writerow([node.object])
146 |         r = s.getvalue()
147 |         if r.endswith('\r\n'):
148 |             r = r[:-2]
149 |         elif r.endswith('\n') or r.endswith('\r'):
150 |             r = r[:-1]
151 |         printer.write(r)
152 |         s.close()
153 | 
154 | 
155 | class CSV(graphtage.Filetype):
156 |     """The CSV filetype."""
157 |     def __init__(self):
158 |         """Initializes the CSV filetype.
159 | 
160 |         CSV identifies itself with the MIME types `csv` and `text/csv`.
161 | 
162 |         """
163 |         super().__init__(
164 |             'csv',
165 |             'text/csv'
166 |         )
167 | 
168 |     def build_tree(self, path: str, options: Optional[graphtage.BuildOptions] = None) -> TreeNode:
169 |         """Equivalent to :func:`build_tree`"""
170 |         return build_tree(path, options=options)
171 | 
172 |     def build_tree_handling_errors(self, path: str, options: Optional[graphtage.BuildOptions] = None) -> TreeNode:
173 |         return self.build_tree(path=path, options=options)
174 | 
175 |     def get_default_formatter(self) -> CSVFormatter:
176 |         return CSVFormatter.DEFAULT_INSTANCE
177 | 


--------------------------------------------------------------------------------
/graphtage/dataclasses.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, Iterator, List, Tuple, Type
  2 | 
  3 | from . import AbstractCompoundEdit, Edit, Range, Replace
  4 | from .printer import Fore, Printer
  5 | from .tree import ContainerNode, TreeNode
  6 | 
  7 | 
  8 | class DataClassEdit(AbstractCompoundEdit):
  9 |     def __init__(self, from_node: "DataClassNode", to_node: "DataClassNode"):
 10 |         from_slots = dict(from_node.items())
 11 |         to_slots = dict(to_node.items())
 12 |         if from_slots.keys() != to_slots.keys():
 13 |             raise ValueError(f"Node {from_node!r} cannot be edited to {to_node!r} because they have incompatible slots")
 14 |         self.slot_edits: List[Edit] = [
 15 |             value.edits(to_slots[slot])
 16 |             for slot, value in from_slots.items()
 17 |         ]
 18 |         super().__init__(from_node, to_node)
 19 | 
 20 |     def bounds(self) -> Range:
 21 |         total = Range(0, 0)
 22 |         for e in self.slot_edits:
 23 |             total = total + e.bounds()
 24 |         return total
 25 | 
 26 |     def edits(self) -> Iterator[Edit]:
 27 |         yield from self.slot_edits
 28 | 
 29 |     def tighten_bounds(self) -> bool:
 30 |         for edit in self.slot_edits:
 31 |             if edit.tighten_bounds():
 32 |                 return True
 33 |         return False
 34 | 
 35 | 
 36 | class DataClassNode(ContainerNode):
 37 |     """A container node that can be initialized similar to a Python :func:`dataclasses.dataclass`"""
 38 | 
 39 |     _SLOTS: Tuple[str, ...]
 40 |     _SLOT_ANNOTATIONS: Dict[str, Type[TreeNode]]
 41 |     _DATA_CLASS_ANCESTORS: List[Type["DataClassNode"]]
 42 | 
 43 |     def __init__(self, *args, **kwargs):
 44 |         """Be careful extending __init__; consider using :func:`DataClassNode.post_init` instead."""
 45 |         our_kwargs = {
 46 |             k: v
 47 |             for k, v in kwargs.items()
 48 |             if k in self._SLOTS
 49 |         }
 50 |         parent_kwargs = {
 51 |             k: v
 52 |             for k, v in kwargs.items()
 53 |             if k not in self._SLOTS
 54 |         }
 55 |         required_positional_args = len(self._SLOTS) - len(our_kwargs)
 56 |         assert required_positional_args >= 0
 57 |         if required_positional_args > len(args):
 58 |             raise ValueError(f"Not enough arguments sent to {self.__class__.__name__}.__init__: {args!r} {kwargs!r}; "
 59 |                              f"expected at least {len(self._SLOTS)}")
 60 |         start_index = len(args) - required_positional_args
 61 |         parent_args = args[:start_index]
 62 |         super().__init__(*parent_args, **parent_kwargs)
 63 |         our_args = list(args[start_index:])
 64 |         for s in self._SLOTS:
 65 |             if s in our_kwargs:
 66 |                 value = our_kwargs[s]
 67 |             elif not our_args:
 68 |                 raise ValueError(f"Missing argument for {self.__class__.__name__}.{s}")
 69 |             else:
 70 |                 value = our_args[0]
 71 |                 our_args = our_args[1:]
 72 |             expected_type = self._SLOT_ANNOTATIONS[s]
 73 |             if not isinstance(value, expected_type):
 74 |                 raise ValueError(f"Expected a node of type {expected_type.__name__} for argument "
 75 |                                  f"{self.__class__.__name__}.{s} but instead got {value!r}")
 76 |             setattr(self, s, value)
 77 |         # self.__hash__ gets called so often, we cache the result:
 78 |         self.__hash = hash(tuple(self))
 79 |         for ancestor in self._DATA_CLASS_ANCESTORS:
 80 |             ancestor.post_init(self)
 81 | 
 82 |     def post_init(self):
 83 |         """Callback called after this class's members have been initialized.
 84 | 
 85 |         This callback should not call `super().post_init()`. Each superclass's `post_init()` will be automatically
 86 |         called in order of the `__mro__`.
 87 |         """
 88 |         pass
 89 | 
 90 |     def __init_subclass__(cls, **kwargs):
 91 |         super().__init_subclass__(**kwargs)
 92 |         ancestors = [
 93 |             c
 94 |             for c in cls.__mro__
 95 |             if c is not cls and issubclass(c, DataClassNode) and c is not DataClassNode
 96 |         ]
 97 |         cls._DATA_CLASS_ANCESTORS = ancestors
 98 |         ancestor_slot_names = {
 99 |             name: a
100 |             for a in ancestors
101 |             for name in a._SLOTS
102 |         }
103 |         if not hasattr(cls, "_SLOT_ANNOTATIONS") or cls._SLOT_ANNOTATIONS is None:
104 |             cls._SLOT_ANNOTATIONS = {}
105 |             cls._SLOTS = ()
106 |         else:
107 |             cls._SLOT_ANNOTATIONS = dict(cls._SLOT_ANNOTATIONS)
108 |         new_slots = []
109 |         for i, (name, slot_type) in enumerate(cls.__annotations__.items()):
110 |             if not isinstance(slot_type, type) or not issubclass(slot_type, TreeNode):
111 |                 continue
112 |             if name in ancestor_slot_names:
113 |                 raise TypeError(f"Dataclass {cls.__name__} cannot redefine slot {name!r} because it is already "
114 |                                 f"defined in its superclass {ancestor_slot_names[name].__name__}")
115 |             new_slots.append(name)
116 |             cls._SLOT_ANNOTATIONS[name] = slot_type
117 |         cls._SLOTS = cls._SLOTS + tuple(new_slots)
118 | 
119 |     def __hash__(self):
120 |         return self.__hash
121 | 
122 |     def __iter__(self) -> Iterator[TreeNode]:
123 |         for _, value in self.items():
124 |             yield value
125 | 
126 |     def items(self) -> Iterator[Tuple[str, TreeNode]]:
127 |         for slot in self._SLOTS:
128 |             yield slot, getattr(self, slot)
129 | 
130 |     def to_obj(self):
131 |         return {
132 |             slot: getattr(self, slot).to_obj()
133 |             for slot in self._SLOTS
134 |         }
135 | 
136 |     def edits(self, node: TreeNode) -> Edit:
137 |         if isinstance(node, DataClassNode):
138 |             our_slots = set(self._SLOTS)
139 |             their_slots = set(node._SLOTS)
140 |             if our_slots == their_slots:
141 |                 return DataClassEdit(self, node)
142 |         return Replace(self, node)
143 | 
144 |     def calculate_total_size(self) -> int:
145 |         return sum(s.calculate_total_size() for s in self)
146 | 
147 |     def print(self, printer: Printer):
148 |         with printer.color(Fore.Yellow):
149 |             printer.write(self.__class__.__name__)
150 |         printer.write("(")
151 |         for i, slot in enumerate(self._SLOTS):
152 |             if i > 0:
153 |                 printer.write(", ")
154 |             with printer.color(Fore.RED):
155 |                 printer.write(slot)
156 |             with printer.bright():
157 |                 printer.write("=")
158 |             getattr(self, slot).print(printer)
159 |         printer.write(")")
160 | 
161 |     def __len__(self):
162 |         return len(self._SLOTS)
163 | 
164 |     def __eq__(self, other):
165 |         return isinstance(other, DataClassNode) and dict(self.items()) == dict(other.items())
166 | 
167 |     def __repr__(self):
168 |         attrs = ", ".join(
169 |             f"{slot}={value!r}"
170 |             for slot, value in self.items()
171 |         )
172 |         return f"{self.__class__.__name__}({attrs})"
173 | 


--------------------------------------------------------------------------------
/graphtage/debug.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utilities to aid in debugging
 3 | """
 4 | 
 5 | from functools import partial
 6 | from inspect import getmembers
 7 | 
 8 | DEBUG_MODE = False
 9 | 
10 | 
11 | if DEBUG_MODE:
12 |     class Debuggable:
13 |         _DEBUG_PATCHED: bool = False
14 | 
15 |         def __new__(cls, *args, **kwargs):
16 |             instance = super().__new__(cls)
17 |             if not instance._DEBUG_PATCHED:
18 |                 debug_all_member = None
19 |                 for name, member in getmembers(instance):
20 |                     if not name.startswith("_debug_"):
21 |                         continue
22 |                     name = name[len("_debug_"):]
23 |                     if name == "__all__":
24 |                         debug_all_member = member
25 |                         continue
26 |                     elif not hasattr(instance, name):
27 |                         continue
28 |                     func = getattr(instance, name)
29 |                     setattr(instance, f"_original_{name}", func)
30 |                     setattr(instance, name, member)
31 |                 if debug_all_member is not None:
32 |                     for name, member in getmembers(instance):
33 |                         if name.startswith("_") or not callable(member):
34 |                             continue
35 | 
36 |                         setattr(instance, name, partial(debug_all_member, name, member))
37 |                 instance._DEBUG_PATCHED = True
38 |             return instance
39 | else:
40 |     class Debuggable:
41 |         pass
42 | 


--------------------------------------------------------------------------------
/graphtage/fibonacci.py:
--------------------------------------------------------------------------------
  1 | """A pure Python implementation of a `Fibonacci Heap`_.
  2 | 
  3 | Many of the algorithms in Graphtage only require partially sorting collections, so we can get a speedup from using a
  4 | Fibonacci Heap that has amortized constant time insertion.
  5 | 
  6 | .. _Fibonacci Heap:
  7 |     https://en.wikipedia.org/wiki/Fibonacci_heap
  8 | 
  9 | """
 10 | 
 11 | from typing import Callable, Generic, Iterator, Optional, TypeVar
 12 | 
 13 | T = TypeVar('T')
 14 | Key = TypeVar('Key')
 15 | DefaultKey = object()
 16 | 
 17 | 
 18 | class HeapNode(Generic[T, Key]):
 19 |     """A node in a :class:`FibonacciHeap`."""
 20 |     def __init__(self, item: T, key: Key = DefaultKey):
 21 |         """Initializes a Fibonacci heap node.
 22 | 
 23 |         Args:
 24 |             item: The heap item associated with the node.
 25 |             key: An optional key to use for the item in sorting. If omitted, the item itself will be used.
 26 | 
 27 |         """
 28 |         self.item: T = item
 29 |         """The item associated with this heap node."""
 30 |         if id(key) == id(DefaultKey):
 31 |             key = item
 32 |         self.key: Key = key
 33 |         """The key to be used when sorting this heap node."""
 34 |         self.parent: Optional[HeapNode[T, Key]] = None
 35 |         """The node's parent."""
 36 |         self.child: Optional[HeapNode[T, Key]] = None
 37 |         """The node's child."""
 38 |         self.left: HeapNode[T, Key] = self
 39 |         """The left sibling of this node, or :obj:`self` if it has no left sibling."""
 40 |         self.right: HeapNode[T, Key] = self
 41 |         """The right sibling of this node, or :obj:`self` if it has no left sibling."""
 42 |         self.degree: int = 0
 43 |         """The degree of this node (*i.e.*, the number of its children)."""
 44 |         self.mark: bool = False
 45 |         """The node's marked state."""
 46 |         self.deleted: bool = False
 47 |         """Whether the node has been deleted.
 48 |         
 49 |         This is to prevent nodes from being manipulated after they have been removed from a heap.
 50 |         
 51 |         Warning:
 52 |             Do not set :attr:`HeapNode.deleted` to :const:`True` unless the node has already been removed from the heap.
 53 | 
 54 |         """
 55 | 
 56 |     def add_child(self, node):
 57 |         """Adds a child to this heap node, incrementing its degree."""
 58 |         assert node != self
 59 |         if self.child is None:
 60 |             self.child = node
 61 |         else:
 62 |             node.right = self.child.right
 63 |             node.left = self.child
 64 |             self.child.right.left = node
 65 |             self.child.right = node
 66 |         self.degree += 1
 67 | 
 68 |     def remove_child(self, node):
 69 |         """Removes a child from this heap node, decrementing its degree."""
 70 |         assert self.child is not None
 71 |         if self.child == self.child.right:
 72 |             self.child = None
 73 |         elif self.child == node:
 74 |             self.child = node.right
 75 |             node.right.parent = self
 76 |         node.left.right = node.right
 77 |         node.right.left = node.left
 78 |         self.degree -= 1
 79 | 
 80 |     @property
 81 |     def siblings(self) -> Iterator['HeapNode[T, Key]']:
 82 |         """Iterates over this node's siblings.
 83 | 
 84 |         Equivalent to::
 85 | 
 86 |             node = self.right
 87 |             while node != self:
 88 |                 yield node
 89 |                 node = node.right
 90 | 
 91 |         """
 92 |         node = self.right
 93 |         while node != self:
 94 |             yield node
 95 |             node = node.right
 96 | 
 97 |     @property
 98 |     def children(self) -> Iterator['HeapNode[T, Key]']:
 99 |         """Iterates over this node's children.
100 | 
101 |         Equivalent to::
102 | 
103 |             if self.child is not None:
104 |                 yield self.child
105 |                 yield from self.child.siblings
106 | 
107 |         """
108 |         assert (self.degree == 0 and self.child is None) or (self.degree == 1 + sum(1 for _ in self.child.siblings))
109 |         if self.child is not None:
110 |             yield self.child
111 |             yield from self.child.siblings
112 | 
113 |     def __iter__(self) -> Iterator['HeapNode[T, Key]']:
114 |         """Iterates over all of this node's descendants, including itself."""
115 |         yield self
116 |         if self.child:
117 |             yield from iter(self.child)
118 |         node = self.right
119 |         while node != self:
120 |             yield node
121 |             if node.child is not None:
122 |                 yield from iter(node.child)
123 |             node = node.right
124 | 
125 |     def __lt__(self, other):
126 |         return (self.deleted and not other.deleted) or self.key < other.key
127 | 
128 |     def __le__(self, other):
129 |         return self < other or self.key == other.key
130 | 
131 |     def __eq__(self, other):
132 |         return id(self) == id(other)
133 | 
134 |     def __hash__(self):
135 |         return hash(self.item)
136 | 
137 |     def __repr__(self):
138 |         return f"{self.__class__.__name__}(item={self.item!r}, key={self.key!r})"
139 | 
140 | 
141 | class FibonacciHeap(Generic[T, Key]):
142 |     """A Fibonacci Heap."""
143 |     def __init__(self, key: Optional[Callable[[T], Key]] = None):
144 |         """Initializes a Fibonacci heap.
145 | 
146 |         Args:
147 |             key: An optional function that accepts an item and returns the key to be used for comparing that item.
148 |                 If omitted, it is equivalent to::
149 | 
150 |                     lambda item: item
151 | 
152 |         """
153 |         if key is None:
154 |             self.key = lambda a: a
155 |             """The function to extract comparison keys from items."""
156 |         else:
157 |             self.key: Callable[[T], Key] = key
158 |         self._min: Optional[HeapNode[T, Key]] = None
159 |         self._root: Optional[HeapNode[T, Key]] = None
160 |         self._n: int = 0
161 | 
162 |     def clear(self):
163 |         """Removes all items from this heap."""
164 |         self._min = None
165 |         self._root = None
166 |         self._n = 0
167 | 
168 |     def peek(self) -> T:
169 |         """Returns the smallest element of the heap without removing it.
170 | 
171 |         Returns:
172 |             T: The smallest element of the heap.
173 | 
174 |         """
175 |         while self._min is not None and self._min.deleted:
176 |             self._extract_min()
177 |         return self._min.item
178 | 
179 |     def remove(self, node: HeapNode[T, Key]):
180 |         """Removes the given node from this heap.
181 | 
182 |         Args:
183 |             node: The node to be removed.
184 | 
185 |         Warning:
186 |             This function assumes that the provided node is actually a member of this heap. It also assumes (but does
187 |             not check) that :attr:`node.deleted <HeapNode.deleted>` is :const:`False`. If either of these assumptions
188 |             is incorrect, it will lead to undefined behavior and corruption of the heap.
189 | 
190 |         """
191 |         node.deleted = True
192 |         y = node.parent
193 |         if y is not None and node < y:
194 |             self._cut(node, y)
195 |             self._cascading_cut(y)
196 |         self._min = node
197 |         self._extract_min()
198 | 
199 |     @property
200 |     def min_node(self) -> HeapNode[T, Key]:
201 |         """Returns the heap node associated with the smallest item in the heap, without removing it."""
202 |         return self._min
203 | 
204 |     @property
205 |     def _roots(self) -> Iterator[HeapNode[T, Key]]:
206 |         if self._root is not None:
207 |             yield self._root
208 |             yield from self._root.siblings
209 | 
210 |     def __len__(self):
211 |         return self._n
212 | 
213 |     def __bool__(self):
214 |         return self._n > 0
215 | 
216 |     def __iter__(self) -> Iterator[T]:
217 |         for node in self._root:
218 |             yield node.item
219 | 
220 |     def nodes(self) -> Iterator[HeapNode[T, Key]]:
221 |         """Iterates over all of the heap nodes in this heap."""
222 |         if self._root is None:
223 |             return
224 |         yield from iter(self._root)
225 | 
226 |     def _extract_min(self) -> HeapNode[T, Key]:
227 |         z = self._min
228 |         if z is not None:
229 |             if z.child is not None:
230 |                 for child in list(z.children):
231 |                     self._append_root(child)
232 |                     child.parent = None
233 |             self._remove_root(z)
234 |             if z == z.right:
235 |                 self._min = self._root = None
236 |             else:
237 |                 self._min = z.right
238 |                 self._consolidate()
239 |             self._n -= 1
240 |         return z
241 | 
242 |     def push(self, item: T) -> HeapNode[T, Key]:
243 |         """Adds a new item to this heap.
244 | 
245 |         Returns:
246 |             HeapNode[T, Key]: The heap node created to store the new item.
247 | 
248 |         """
249 |         node = HeapNode(item=item, key=self.key(item))
250 |         node.left = node.right = node
251 |         self._append_root(node)
252 |         if self._min is None or node < self._min:
253 |             self._min = node
254 |         self._n += 1
255 |         return node
256 | 
257 |     def decrease_key(self, x: HeapNode[T, Key], k: Key):
258 |         """Decreases the key value associated with the given node.
259 | 
260 |         Args:
261 |             x: The node to modify.
262 |             k: The new key value.
263 | 
264 |         Raises:
265 |             ValueError: If :attr:`x.key <HeapNode.key>` is less than :obj:`k`.
266 | 
267 |         """
268 |         if x.key < k:
269 |             raise ValueError(f"The key can only decrease! New key {k!r} > old key {x.key!r}.")
270 |         x.key = k
271 |         y = x.parent
272 |         if y is not None and x < y:
273 |             self._cut(x, y)
274 |             self._cascading_cut(y)
275 |         if x < self._min:
276 |             self._min = x
277 | 
278 |     def __add__(self, other):
279 |         if not other:
280 |             return self
281 |         elif not self:
282 |             return other
283 |         merged = FibonacciHeap(key=self.key)
284 |         merged._root, merged._min = self._root, self._min
285 |         merged.key = self.key
286 |         last = other._root.left
287 |         other._root.left = merged._root.left
288 |         merged._root.left.right = other._root
289 |         merged._root.left = last
290 |         merged._root.left.right = merged._root
291 |         if other._min < merged._min:
292 |             merged._min = other._min
293 |         merged._n = self._n + other._n
294 |         return merged
295 | 
296 |     def _cut(self, x: HeapNode[T, Key], y: HeapNode[T, Key]):
297 |         y.remove_child(x)
298 |         self._append_root(x)
299 |         x.parent = None
300 |         x.mark = False
301 | 
302 |     def _cascading_cut(self, y: HeapNode[T, Key]):
303 |         z = y.parent
304 |         if z is not None:
305 |             if y.mark is False:
306 |                 y.mark = True
307 |             else:
308 |                 self._cut(y, z)
309 |                 self._cascading_cut(z)
310 | 
311 |     def _consolidate(self):
312 |         a = [None] * self._n
313 |         for x in list(self._roots):
314 |             d = x.degree
315 |             while a[d] is not None:
316 |                 y = a[d]
317 |                 if y < x:
318 |                     x, y = y, x
319 |                 self._link(y, x)
320 |                 a[d] = None
321 |                 d += 1
322 |             a[d] = x
323 |         for i in range(0, len(a)):
324 |             if a[i] is not None:
325 |                 if a[i] <= self._min:
326 |                     self._min = a[i]
327 | 
328 |     def _link(self, y: HeapNode[T, Key], x: HeapNode[T, Key]):
329 |         self._remove_root(y)
330 |         y.left = y.right = y
331 |         x.add_child(y)
332 |         y.parent = x
333 |         y.mark = False
334 | 
335 |     def _append_root(self, node: HeapNode[T, Key]):
336 |         if self._root is None:
337 |             self._root = node
338 |         else:
339 |             node.right = self._root.right
340 |             node.left = self._root
341 |             self._root.right.left = node
342 |             self._root.right = node
343 | 
344 |     def _remove_root(self, node: HeapNode[T, Key]):
345 |         if node == self._root:
346 |             self._root = node.right
347 |         node.left.right = node.right
348 |         node.right.left = node.left
349 | 
350 |     def pop(self) -> T:
351 |         """Returns and removes the smallest item from this heap."""
352 |         while self._min is not None and self._min.deleted:
353 |             self._extract_min()
354 |         return self._extract_min().item
355 | 
356 | 
357 | class ReversedComparator(Generic[Key]):
358 |     """A wrapper that reverses the semantics of its comparison operators."""
359 |     def __init__(self, key: Key):
360 |         self.key = key
361 | 
362 |     def __lt__(self, other):
363 |         return self.key > other.key
364 | 
365 |     def __le__(self, other):
366 |         return self.key >= other.key
367 | 
368 |     def __eq__(self, other):
369 |         return self.key == other.key
370 | 
371 |     def __hash__(self):
372 |         return hash(self.key)
373 | 
374 | 
375 | class MaxFibonacciHeap(Generic[T, Key], FibonacciHeap[T, ReversedComparator[Key]]):
376 |     """A Fibonacci Heap that yields items in decreasing order, using a :class:`ReversedComparator`."""
377 |     def __init__(self, key: Optional[Callable[[T], Key]] = None):
378 |         if key is None:
379 |             def key(n: T):
380 |                 return n
381 |         super().__init__(key=lambda n: ReversedComparator(key(n)))
382 | 


--------------------------------------------------------------------------------
/graphtage/json.py:
--------------------------------------------------------------------------------
  1 | """A :class:`graphtage.Filetype` for parsing, diffing, and rendering `JSON files`_.
  2 | 
  3 | .. _JSON files:
  4 |     https://tools.ietf.org/html/std90
  5 | 
  6 | """
  7 | 
  8 | import json
  9 | import json5
 10 | import os
 11 | from typing import Optional, Union
 12 | 
 13 | from .graphtage import BoolNode, BuildOptions, DictNode, Filetype, FixedKeyDictNode, \
 14 |     FloatNode, IntegerNode, KeyValuePairNode, LeafNode, ListNode, NullNode, StringFormatter, StringNode
 15 | from .printer import DEFAULT_PRINTER, Fore, Printer
 16 | from .sequences import SequenceFormatter
 17 | from .tree import ContainerNode, GraphtageFormatter, TreeNode
 18 | 
 19 | 
 20 | def build_tree(
 21 |         python_obj: Union[int, float, bool, str, bytes, list, dict],
 22 |         options: Optional[BuildOptions] = None,
 23 |         force_leaf_node: bool = False) -> TreeNode:
 24 |     """Builds a Graphtage tree from an arbitrary Python object.
 25 | 
 26 |     Args:
 27 |         python_obj: The object from which to build the tree.
 28 |         options: An optional set of options for building the tree.
 29 |         force_leaf_node: If :const:`True`, assume that :obj:`python_obj` is *not* a :func:`list` or :func:`dict`.
 30 | 
 31 |     Returns:
 32 |         TreeNode: The resulting tree.
 33 | 
 34 |     Raises:
 35 |         ValueError: If :obj:`force_leaf_node` is :const:`True` and :obj:`python_obj` is *not* one of :class:`int`,
 36 |             :class:`float`, :class:`bool`, :class:`str`, or :class:`bytes`.
 37 |         ValueError: If the object is of an unsupported type.
 38 | 
 39 |     """
 40 |     if options is None:
 41 |         options = BuildOptions()
 42 |     if isinstance(python_obj, bool):
 43 |         return BoolNode(python_obj)
 44 |     elif isinstance(python_obj, int):
 45 |         return IntegerNode(python_obj)
 46 |     elif isinstance(python_obj, float):
 47 |         return FloatNode(python_obj)
 48 |     elif isinstance(python_obj, str):
 49 |         return StringNode(python_obj)
 50 |     elif isinstance(python_obj, bytes):
 51 |         return StringNode(python_obj.decode('utf-8'))
 52 |     elif force_leaf_node:
 53 |         raise ValueError(f"{python_obj!r} was expected to be an int or string, but was instead a {type(python_obj)}")
 54 |     elif isinstance(python_obj, list) or isinstance(python_obj, tuple):
 55 |         return ListNode(
 56 |             [build_tree(n, options=options) for n in
 57 |              DEFAULT_PRINTER.tqdm(python_obj, delay=2.0, desc="Loading JSON List", leave=False)],
 58 |             allow_list_edits=options.allow_list_edits,
 59 |             allow_list_edits_when_same_length=options.allow_list_edits_when_same_length
 60 |         )
 61 |     elif isinstance(python_obj, dict):
 62 |         dict_items = {
 63 |             build_tree(k, options=options, force_leaf_node=True):
 64 |                 build_tree(v, options=options) for k, v in
 65 |             DEFAULT_PRINTER.tqdm(python_obj.items(), delay=2.0, desc="Loading JSON Dict", leave=False)
 66 |         }
 67 |         if options.allow_key_edits:
 68 |             dict_node = DictNode.from_dict(dict_items)
 69 |             dict_node.auto_match_keys = options.auto_match_keys
 70 |             return dict_node
 71 |         else:
 72 |             return FixedKeyDictNode.from_dict(dict_items)
 73 |     elif python_obj is None:
 74 |         return NullNode()
 75 |     else:
 76 |         raise ValueError(f"Unsupported Python object {python_obj!r} of type {type(python_obj)}")
 77 | 
 78 | 
 79 | class JSONListFormatter(SequenceFormatter):
 80 |     """A sub-formatter for JSON lists."""
 81 |     is_partial = True
 82 | 
 83 |     def __init__(self):
 84 |         """Initializes the JSON list formatter.
 85 | 
 86 |         Equivalent to::
 87 | 
 88 |             super().__init__('[', ']', ',')
 89 | 
 90 |         """
 91 |         super().__init__('[', ']', ',')
 92 | 
 93 |     def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False):
 94 |         if not hasattr(printer, 'join_lists') or not printer.join_lists:
 95 |             printer.newline()
 96 | 
 97 |     def print_ListNode(self, *args, **kwargs):
 98 |         """Prints a :class:`graphtage.ListNode`.
 99 | 
100 |         Equivalent to::
101 | 
102 |             super().print_SequenceNode(*args, **kwargs)
103 | 
104 |         """
105 |         super().print_SequenceNode(*args, **kwargs)
106 | 
107 |     def print_SequenceNode(self, *args, **kwargs):
108 |         """Prints a non-List sequence.
109 | 
110 |         This delegates to the parent formatter's implementation::
111 | 
112 |             self.parent.print(*args, **kwargs)
113 | 
114 |         which should invoke :meth:`JSONFormatter.print`, thereby delegating to the :class:`JSONDictFormatter` in
115 |         instances where a list contains a dict.
116 | 
117 |         """
118 |         self.parent.print(*args, **kwargs)
119 | 
120 | 
121 | class JSONDictFormatter(SequenceFormatter):
122 |     """A sub-formatter for JSON dicts."""
123 |     is_partial = True
124 | 
125 |     def __init__(self):
126 |         super().__init__('{', '}', ',')
127 | 
128 |     def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False):
129 |         if not hasattr(printer, 'join_dict_items') or not printer.join_dict_items:
130 |             printer.newline()
131 | 
132 |     def print_MultiSetNode(self, *args, **kwargs):
133 |         """Prints a :class:`graphtage.MultiSetNode`.
134 | 
135 |         Equivalent to::
136 | 
137 |             super().print_SequenceNode(*args, **kwargs)
138 | 
139 |         """
140 |         super().print_SequenceNode(*args, **kwargs)
141 | 
142 |     def print_MappingNode(self, *args, **kwargs):
143 |         """Prints a :class:`graphtage.MappingNode`.
144 | 
145 |         Equivalent to::
146 | 
147 |             super().print_SequenceNode(*args, **kwargs)
148 | 
149 |         """
150 |         super().print_SequenceNode(*args, **kwargs)
151 | 
152 |     def print_SequenceNode(self, *args, **kwargs):
153 |         """Prints a non-Dict sequence.
154 | 
155 |         This delegates to the parent formatter's implementation::
156 | 
157 |             self.parent.print(*args, **kwargs)
158 | 
159 |         which should invoke :meth:`JSONFormatter.print`, thereby delegating to the :class:`JSONListFormatter` in
160 |         instances where a dict contains a list.
161 | 
162 |         """
163 |         self.parent.print(*args, **kwargs)
164 | 
165 | 
166 | class JSONStringFormatter(StringFormatter):
167 |     """A JSON formatter for strings."""
168 |     is_partial = True
169 | 
170 |     def write_start_quote(self, printer: Printer, _):
171 |         """Prints a starting quote for the string"""
172 |         # JSON strings are always quoted
173 |         self.is_quoted = True
174 |         printer.write('"')
175 | 
176 |     def write_end_quote(self, printer: Printer, _):
177 |         """Prints an ending quote for the string"""
178 |         # JSON strings are always quoted
179 |         self.is_quoted = True
180 |         printer.write('"')
181 | 
182 |     def escape(self, c: str) -> str:
183 |         """String escape.
184 | 
185 |         This function is called once for each character in the string.
186 | 
187 |         Returns:
188 |             str: The escaped version of `c`, or `c` itself if no escaping is required.
189 | 
190 |         This is equivalent to::
191 | 
192 |             printer.write(json.dumps(c)[1:-1])
193 | 
194 |         """
195 |         # json.dumps will enclose the string in quotes, so remove them
196 |         return json.dumps(c)[1:-1]
197 | 
198 | 
199 | class JSONFormatter(GraphtageFormatter):
200 |     """The default JSON formatter."""
201 |     sub_format_types = [JSONStringFormatter, JSONListFormatter, JSONDictFormatter]
202 | 
203 |     def print_LeafNode(self, printer: Printer, node: LeafNode):
204 |         """Prints a :class:`graphtage.LeafNode`.
205 | 
206 |         This is equivalent to::
207 | 
208 |             printer.write(json.dumps(node.object))
209 | 
210 |         """
211 |         printer.write(json.dumps(node.object))
212 | 
213 |     def print_KeyValuePairNode(self, printer: Printer, node: KeyValuePairNode):
214 |         """Prints a :class:`graphtage.KeyValuePairNode`.
215 | 
216 |         By default, the key is printed in blue, followed by a bright ": ", followed by the value.
217 | 
218 |         """
219 |         with printer.color(Fore.BLUE):
220 |             self.print(printer, node.key)
221 |         with printer.bright():
222 |             printer.write(": ")
223 |         self.print(printer, node.value)
224 | 
225 |     def print_ContainerNode(self, printer: Printer, node: ContainerNode):
226 |         """Prints a :class:`graphtage.ContainerNode`.
227 | 
228 |         This is a fallback to permit the printing of custom containers, like :class:`graphtage.xml.XMLElement`.
229 | 
230 |         """
231 |         # Treat the container like a list
232 |         list_node = ListNode((c.copy() for c in node.children()))
233 |         self.print(printer, list_node)
234 | 
235 | 
236 | class JSON(Filetype):
237 |     """The JSON file type."""
238 |     def __init__(self):
239 |         """Initializes the JSON file type.
240 | 
241 |         By default, JSON associates itself with the "json", "application/json", "application/x-javascript",
242 |         "text/javascript", "text/x-javascript", and "text/x-json" MIME types.
243 | 
244 |         """
245 |         super().__init__(
246 |             'json',
247 |             'application/json',
248 |             'application/x-javascript',
249 |             'text/javascript',
250 |             'text/x-javascript',
251 |             'text/x-json'
252 |         )
253 | 
254 |     def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode:
255 |         with open(path) as f:
256 |             return build_tree(json.load(f), options)
257 | 
258 |     def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]:
259 |         try:
260 |             return self.build_tree(path=path, options=options)
261 |         except json.decoder.JSONDecodeError as de:
262 |             return f'Error parsing {os.path.basename(path)}: {de.msg}: line {de.lineno}, column {de.colno} ' \
263 |                    f'(char {de.pos})'
264 | 
265 |     def get_default_formatter(self) -> JSONFormatter:
266 |         return JSONFormatter.DEFAULT_INSTANCE
267 | 
268 | 
269 | class JSON5(Filetype):
270 |     """The JSON5 file type."""
271 |     def __init__(self):
272 |         """Initializes the JSON5 file type.
273 | 
274 |         By default, JSON5 associates itself with the "json5", "application/json5", and "text/x-json5" MIME types.
275 | 
276 |         """
277 |         super().__init__(
278 |             'json5',
279 |             'application/json5',
280 |             'text/x-json5'
281 |         )
282 | 
283 |     def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode:
284 |         with open(path) as f:
285 |             return build_tree(json5.load(f), options)
286 | 
287 |     def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]:
288 |         try:
289 |             return self.build_tree(path=path, options=options)
290 |         except ValueError as ve:
291 |             return f'Error parsing {os.path.basename(path)}: {ve:!s}'
292 | 
293 |     def get_default_formatter(self) -> JSONFormatter:
294 |         return JSONFormatter.DEFAULT_INSTANCE
295 | 


--------------------------------------------------------------------------------
/graphtage/multiset.py:
--------------------------------------------------------------------------------
  1 | """A module for representing an edit on a multiset.
  2 | 
  3 | This is used by :class:`graphtage.MultiSetNode` and :class:`graphtage.DictNode`, since the latter is a multiset containg
  4 | :class:`graphtage.KeyValuePairNode` objects.
  5 | 
  6 | """
  7 | 
  8 | from typing import Iterator, List
  9 | 
 10 | import graphtage
 11 | from .bounds import Range
 12 | from .edits import Insert, Match, Remove
 13 | from .matching import WeightedBipartiteMatcher
 14 | from .sequences import SequenceEdit, SequenceNode
 15 | from .tree import Edit, TreeNode
 16 | from .utils import HashableCounter, largest
 17 | 
 18 | 
 19 | class MultiSetEdit(SequenceEdit):
 20 |     """An edit matching one unordered collection of items to another.
 21 | 
 22 |     It works by using a :class:`graphtage.matching.WeightedBipartiteMatcher` to find the minimum cost matching from
 23 |     the elements of one collection to the elements of the other.
 24 | 
 25 |     """
 26 |     def __init__(
 27 |             self,
 28 |             from_node: SequenceNode,
 29 |             to_node: SequenceNode,
 30 |             from_set: HashableCounter[TreeNode],
 31 |             to_set: HashableCounter[TreeNode],
 32 |             auto_match_keys: bool = True
 33 |     ):
 34 |         """Initializes the edit.
 35 | 
 36 |         Args:
 37 |             from_node: Any sequence node from which to match.
 38 |             to_node: Any sequence node to which to match.
 39 |             from_set: The set of nodes from which to match. These should typically be children of :obj:`from_node`, but
 40 |                 this is neither checked nor enforced.
 41 |             to_set: The set of nodes to which to match. These should typically be children of :obj:`to_node`, but this
 42 |                 is neither checked nor enforced.
 43 |             auto_match_keys: If `True`, any :class:`graphtage.KeyValuePairNode`s in :obj:`from_set` that have keys
 44 |                 equal to :class:`graphtage.KeyValuePairNode`s in :obj:`to_set` will automatically be matched. Setting
 45 |                 this to `False` will require a significant amount more computation for larger dictionaries.
 46 | 
 47 |         """
 48 |         self._matched_kvp_edits: List[Edit] = []
 49 |         if auto_match_keys:
 50 |             to_set = HashableCounter(to_set)
 51 |             from_set = HashableCounter(from_set)
 52 |             to_remove_from = []
 53 |             for f in from_set.keys():
 54 |                 if not isinstance(f, graphtage.KeyValuePairNode):
 55 |                     continue
 56 |                 for t in to_set.keys():
 57 |                     if not isinstance(f, graphtage.KeyValuePairNode):
 58 |                         continue
 59 |                     if f.key == t.key:
 60 |                         num_matched = min(from_set[f], to_set[t])
 61 |                         for _ in range(num_matched):
 62 |                             self._matched_kvp_edits.append(f.edits(t))
 63 |                         to_remove_from.append((f, num_matched))
 64 |                         break
 65 |                 else:
 66 |                     continue
 67 |                 to_set[t] -= num_matched
 68 |             for f, num_matched in to_remove_from:
 69 |                 from_set[f] -= num_matched
 70 |         self.to_insert = to_set - from_set
 71 |         """The set of nodes in :obj:`to_set` that do not exist in :obj:`from_set`."""
 72 |         self.to_remove = from_set - to_set
 73 |         """The set of nodes in :obj:`from_set` that do not exist in :obj:`to_set`."""
 74 |         to_match = from_set & to_set
 75 |         self._edits: List[Edit] = [Match(n, n, 0) for n in to_match.elements()]
 76 |         self._matcher = WeightedBipartiteMatcher(
 77 |             from_nodes=self.to_remove.elements(),
 78 |             to_nodes=self.to_insert.elements(),
 79 |             get_edge=lambda f, t: f.edits(t)
 80 |         )
 81 |         super().__init__(
 82 |             from_node=from_node,
 83 |             to_node=to_node
 84 |         )
 85 | 
 86 |     def is_complete(self) -> bool:
 87 |         return self._matcher.is_complete()
 88 | 
 89 |     def edits(self) -> Iterator[Edit]:
 90 |         yield from self._edits
 91 |         yield from self._matched_kvp_edits
 92 |         remove_matched: HashableCounter[TreeNode] = HashableCounter()
 93 |         insert_matched: HashableCounter[TreeNode] = HashableCounter()
 94 |         for (rem, (ins, edit)) in self._matcher.matching.items():
 95 |             yield edit
 96 |             remove_matched[rem] += 1
 97 |             insert_matched[ins] += 1
 98 |         for rm in (self.to_remove - remove_matched).elements():
 99 |             yield Remove(to_remove=rm, remove_from=self.from_node)
100 |         for ins in (self.to_insert - insert_matched).elements():
101 |             yield Insert(to_insert=ins, insert_into=self.from_node)
102 | 
103 |     def tighten_bounds(self) -> bool:
104 |         """Delegates to :meth:`WeightedBipartiteMatcher.tighten_bounds`."""
105 |         for kvp_edit in self._matched_kvp_edits:
106 |             if kvp_edit.tighten_bounds():
107 |                 return True
108 |         return self._matcher.tighten_bounds()
109 | 
110 |     def bounds(self) -> Range:
111 |         b = self._matcher.bounds()
112 |         for kvp_edit in self._matched_kvp_edits:
113 |             b = b + kvp_edit.bounds()
114 |         if len(self.to_remove) > len(self.to_insert):
115 |             for edit in largest(
116 |                     *(Remove(to_remove=r, remove_from=self.from_node) for r in self.to_remove),
117 |                     n=len(self.to_remove) - len(self.to_insert),
118 |                     key=lambda e: e.bounds()
119 |             ):
120 |                 b = b + edit.bounds()
121 |         elif len(self.to_remove) < len(self.to_insert):
122 |             for edit in largest(
123 |                     *(Insert(to_insert=i, insert_into=self.from_node) for i in self.to_insert),
124 |                     n=len(self.to_insert) - len(self.to_remove),
125 |                     key=lambda e: e.bounds()
126 |             ):
127 |                 b = b + edit.bounds()
128 |         return b
129 | 


--------------------------------------------------------------------------------
/graphtage/object_set.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A data structure that can hold a set of unique Python objects, even if those objects are not hashable.
 3 | Uniqueness is determined based upon identity.
 4 | """
 5 | 
 6 | from collections.abc import MutableSet
 7 | from typing import Any, Iterable, Set
 8 | 
 9 | 
10 | class IdentityHash:
11 |     def __init__(self, obj):
12 |         self.obj = obj
13 | 
14 |     def __hash__(self):
15 |         return id(self.obj)
16 | 
17 |     def __eq__(self, other):
18 |         if not isinstance(other, IdentityHash):
19 |             return False
20 |         return id(self.obj) == id(other.obj)
21 | 
22 | 
23 | class ObjectSet(MutableSet):
24 |     """A set that can hold unhashable Python objects
25 | 
26 |     Uniqueness is determined based upon identity.
27 | 
28 |     """
29 |     def __init__(self, initial_objs: Iterable[Any] = ()):
30 |         self.objs: Set[IdentityHash] = set()
31 |         for obj in initial_objs:
32 |             self.add(obj)
33 | 
34 |     def add(self, value):
35 |         self.objs.add(IdentityHash(value))
36 | 
37 |     def discard(self, value):
38 |         value = IdentityHash(value)
39 |         self.objs.remove(value)
40 | 
41 |     def __contains__(self, x):
42 |         x = IdentityHash(x)
43 |         return x in self.objs
44 | 
45 |     def __len__(self):
46 |         return len(self.objs)
47 | 
48 |     def __iter__(self):
49 |         for obj in self.objs:
50 |             yield obj.obj
51 | 
52 |     def __str__(self):
53 |         return f"{{{', '.join(map(str, self.objs))}}}"
54 | 


--------------------------------------------------------------------------------
/graphtage/pickle.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Optional, Union
 3 | 
 4 | from fickling.fickle import Interpreter, Pickled, PickleDecodeError
 5 | 
 6 | from .graphtage import BuildOptions, Filetype, TreeNode
 7 | from .pydiff import ast_to_tree, PyDiffFormatter
 8 | 
 9 | 
10 | class Pickle(Filetype):
11 |     """The Python Pickle file type."""
12 |     def __init__(self):
13 |         """Initializes the Pickle file type.
14 | 
15 |         By default, Pickle associates itself with the "pickle", "application/python-pickle",
16 |         and "application/x-python-pickle" MIME types.
17 | 
18 |         """
19 |         super().__init__(
20 |             'pickle',
21 |             'application/python-pickle',
22 |             'application/x-python-pickle'
23 |         )
24 | 
25 |     def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode:
26 |         with open(path, "rb") as f:
27 |             pickle = Pickled.load(f)
28 |             interpreter = Interpreter(pickle)
29 |             ast = interpreter.to_ast()
30 |             return ast_to_tree(ast, options)
31 | 
32 |     def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]:
33 |         try:
34 |             return self.build_tree(path=path, options=options)
35 |         except PickleDecodeError as e:
36 |             return f'Error deserializing {os.path.basename(path)}: {e!s}'
37 | 
38 |     def get_default_formatter(self) -> PyDiffFormatter:
39 |         return PyDiffFormatter.DEFAULT_INSTANCE
40 | 


--------------------------------------------------------------------------------
/graphtage/plist.py:
--------------------------------------------------------------------------------
  1 | """A :class:`graphtage.Filetype` for parsing, diffing, and rendering Apple plist files."""
  2 | import os
  3 | from xml.parsers.expat import ExpatError
  4 | from typing import Optional, Tuple, Union
  5 | 
  6 | from plistlib import dumps, load
  7 | 
  8 | from . import json
  9 | from .edits import Edit, EditCollection, Match
 10 | from .graphtage import BoolNode, BuildOptions, Filetype, FloatNode, KeyValuePairNode, IntegerNode, LeafNode, StringNode
 11 | from .printer import Printer
 12 | from .sequences import SequenceFormatter, SequenceNode
 13 | from .tree import ContainerNode, GraphtageFormatter, TreeNode
 14 | 
 15 | 
 16 | class PLISTNode(ContainerNode):
 17 |     def __init__(self, root: TreeNode):
 18 |         self.root: TreeNode = root
 19 | 
 20 |     def to_obj(self):
 21 |         return self.root.to_obj()
 22 | 
 23 |     def edits(self, node: 'TreeNode') -> Edit:
 24 |         if isinstance(node, PLISTNode):
 25 |             return EditCollection(
 26 |                 from_node=self,
 27 |                 to_node=node,
 28 |                 edits=iter((
 29 |                     Match(self, node, 0),
 30 |                     self.root.edits(node.root)
 31 |                 )),
 32 |                 collection=list,
 33 |                 add_to_collection=list.append,
 34 |                 explode_edits=False
 35 |             )
 36 |         return self.root.edits(node)
 37 | 
 38 |     def calculate_total_size(self) -> int:
 39 |         return self.root.calculate_total_size()
 40 | 
 41 |     def print(self, printer: Printer):
 42 |         printer.write(PLIST_HEADER)
 43 |         self.root.print(printer)
 44 |         printer.write(PLIST_FOOTER)
 45 | 
 46 |     def __iter__(self):
 47 |         yield self.root
 48 | 
 49 |     def __len__(self) -> int:
 50 |         return 1
 51 | 
 52 | 
 53 | def build_tree(path: str, options: Optional[BuildOptions] = None, *args, **kwargs) -> PLISTNode:
 54 |     """Constructs a PLIST tree from an PLIST file."""
 55 |     with open(path, "rb") as stream:
 56 |         data = load(stream)
 57 |         return PLISTNode(json.build_tree(data, options=options, *args, **kwargs))
 58 | 
 59 | 
 60 | class PLISTSequenceFormatter(SequenceFormatter):
 61 |     is_partial = True
 62 | 
 63 |     def __init__(self):
 64 |         super().__init__('', '', '')
 65 | 
 66 |     def print_SequenceNode(self, printer: Printer, node: SequenceNode):
 67 |         self.parent.print(printer, node)
 68 | 
 69 |     def print_ListNode(self, printer: Printer, *args, **kwargs):
 70 |         printer.write("<array>")
 71 |         super().print_SequenceNode(printer, *args, **kwargs)
 72 |         printer.write("</array>")
 73 | 
 74 |     def print_MultiSetNode(self, printer: Printer, *args, **kwargs):
 75 |         printer.write("<dict>")
 76 |         super().print_SequenceNode(printer, *args, **kwargs)
 77 |         printer.write("</dict>")
 78 | 
 79 |     def print_KeyValuePairNode(self, printer: Printer, node: KeyValuePairNode):
 80 |         printer.write("<key>")
 81 |         if isinstance(node.key, StringNode):
 82 |             printer.write(node.key.object)
 83 |         else:
 84 |             self.print(printer, node.key)
 85 |         printer.write("</key>")
 86 |         printer.newline()
 87 |         self.print(printer, node.value)
 88 | 
 89 |     print_MappingNode = print_MultiSetNode
 90 | 
 91 | 
 92 | def _plist_header_footer() -> Tuple[str, str]:
 93 |     string = "1234567890"
 94 |     encoded = dumps(string).decode("utf-8")
 95 |     expected = f"<string>{string}</string>"
 96 |     body_offset = encoded.find(expected)
 97 |     if body_offset <= 0:
 98 |         raise ValueError("Unexpected plist encoding!")
 99 |     return encoded[:body_offset], encoded[body_offset+len(expected):]
100 | 
101 | 
102 | PLIST_HEADER: str
103 | PLIST_FOOTER: str
104 | PLIST_HEADER, PLIST_FOOTER = _plist_header_footer()
105 | 
106 | 
107 | class PLISTFormatter(GraphtageFormatter):
108 |     sub_format_types = [PLISTSequenceFormatter]
109 | 
110 |     def print(self, printer: Printer, *args, **kwargs):
111 |         # PLIST uses an eight-space indent
112 |         printer.indent_str = " " * 8
113 |         super().print(printer, *args, **kwargs)
114 | 
115 |     @staticmethod
116 |     def write_obj(printer: Printer, obj):
117 |         encoded = dumps(obj).decode("utf-8")
118 |         printer.write(encoded[len(PLIST_HEADER):-len(PLIST_FOOTER)])
119 | 
120 |     def print_StringNode(self, printer: Printer, node: StringNode):
121 |         printer.write(f"<string>{node.object}</string>")
122 | 
123 |     def print_IntegerNode(self, printer: Printer, node: IntegerNode):
124 |         printer.write(f"<integer>{node.object}</integer>")
125 | 
126 |     def print_FloatNode(self, printer: Printer, node: FloatNode):
127 |         printer.write(f"<real>{node.object}</real>")
128 | 
129 |     def print_BoolNode(self, printer, node: BoolNode):
130 |         if node.object:
131 |             printer.write("<true />")
132 |         else:
133 |             printer.write("<false />")
134 | 
135 |     def print_LeafNode(self, printer: Printer, node: LeafNode):
136 |         self.write_obj(printer, node.object)
137 | 
138 |     def print_PLISTNode(self, printer: Printer, node: PLISTNode):
139 |         printer.write(PLIST_HEADER)
140 |         self.print(printer, node.root)
141 |         printer.write(PLIST_FOOTER)
142 | 
143 | 
144 | class PLIST(Filetype):
145 |     """The Apple PLIST filetype."""
146 |     def __init__(self):
147 |         """Initializes the PLIST file type.
148 | 
149 |         By default, PLIST associates itself with the "plist" and "application/x-plist" MIME types.
150 | 
151 |         """
152 |         super().__init__(
153 |             'plist',
154 |             'application/x-plist'
155 |         )
156 | 
157 |     def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode:
158 |         tree = build_tree(path=path, options=options)
159 |         for node in tree.dfs():
160 |             if isinstance(node, StringNode):
161 |                 node.quoted = False
162 |         return tree
163 | 
164 |     def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]:
165 |         try:
166 |             return self.build_tree(path=path, options=options)
167 |         except ExpatError as ee:
168 |             return f'Error parsing {os.path.basename(path)}: {ee})'
169 | 
170 |     def get_default_formatter(self) -> PLISTFormatter:
171 |         return PLISTFormatter.DEFAULT_INSTANCE
172 | 


--------------------------------------------------------------------------------
/graphtage/progress.py:
--------------------------------------------------------------------------------
  1 | """A module for printing status messages and progress bars to the command line."""
  2 | 
  3 | import io
  4 | import sys
  5 | from types import TracebackType
  6 | from typing import AnyStr, Iterable, Iterator, IO, List, Optional, TextIO, Type
  7 | 
  8 | from tqdm import tqdm, trange
  9 | 
 10 | 
 11 | class StatusWriter(IO[str]):
 12 |     """A writer compatible with the :class:`graphtage.printer.Writer` protocol that can print status.
 13 | 
 14 |     See :meth:`StatusWriter.tqdm` and :meth:`StatusWriter.trange`. If :attr:`StatusWriter.status_stream` is either
 15 |     :attr:`sys.stdout` or :attr:`sys.stderr`, then bytes printed to this writer will be buffered. For each full line
 16 |     buffered, a call to :func:`tqdm.write` will be made.
 17 | 
 18 |     A status writer whose lifetime is not controlled by instantiation in a ``with`` block must be manually flushed
 19 |     with :meth:`StatusWriter.flush(final=True)<StatusWriter.flush>` after its final write, or else the last line
 20 |     written may be lost.
 21 | 
 22 |     """
 23 |     def __init__(self, out_stream: Optional[TextIO] = None, quiet: bool = False):
 24 |         """Initializes a status writer.
 25 | 
 26 |         Args:
 27 |             out_stream: An optional stream to which to write. If omitted this defaults to :attr:`sys.stdout`.
 28 |             quiet: Whether or not :mod:`tqdm` status messages and progress should be suppressed.
 29 | 
 30 |         """
 31 |         self.quiet = quiet
 32 |         """Whether or not :mod:`tqdm` status messages and progress should be suppressed."""
 33 |         self._reentries: int = 0
 34 |         if out_stream is None:
 35 |             out_stream = sys.stdout
 36 |         self.status_stream: TextIO = out_stream
 37 |         """The status stream to which to print."""
 38 |         self._buffer: List[str] = []
 39 |         try:
 40 |             self.write_raw = self.quiet or (
 41 |                     out_stream.fileno() != sys.stderr.fileno() and out_stream.fileno() != sys.stdout.fileno()
 42 |             )
 43 |             """If :const:`True`, this writer *will not* buffer output and use :func:`tqdm.write`.
 44 |             
 45 |             This defaults to::
 46 |             
 47 |                 self.write_raw = self.quiet or (
 48 |                     out_stream.fileno() != sys.stderr.fileno() and out_stream.fileno() != sys.stdout.fileno()
 49 |                 )
 50 |             
 51 |             """
 52 |         except io.UnsupportedOperation as e:
 53 |             self.write_raw = True
 54 | 
 55 |     def tqdm(self, *args, **kwargs) -> tqdm:
 56 |         """Returns a :class:`tqdm.tqdm` object."""
 57 |         if self.quiet or 'disable' not in kwargs:
 58 |             kwargs['disable'] = self.quiet
 59 |         return tqdm(*args, **kwargs)
 60 | 
 61 |     def trange(self, *args, **kwargs) -> trange:
 62 |         """Returns a :class:`tqdm.trange` object."""
 63 |         if self.quiet or 'disable' not in kwargs:
 64 |             kwargs['disable'] = self.quiet
 65 |         return trange(*args, **kwargs)
 66 | 
 67 |     def flush(self, final=False):
 68 |         """Flushes this writer.
 69 | 
 70 |         If :obj:`final` is :const:`True`, any extra bytes will be flushed along with a final newline.
 71 | 
 72 |         """
 73 |         if final and self._buffer and not self._buffer[-1].endswith('\n'):
 74 |             self._buffer.append('\n')
 75 |         while self._buffer:
 76 |             if '\n' in self._buffer[0]:
 77 |                 trailing_newline = self._buffer[0].endswith('\n')
 78 |                 lines = self._buffer[0].split('\n')
 79 |                 if not trailing_newline:
 80 |                     if len(self._buffer) == 1:
 81 |                         self._buffer.append(lines[-1])
 82 |                     else:
 83 |                         self._buffer[1] = f"{lines[-1]}{self._buffer[1]}"
 84 |                 for line in lines[:-1]:
 85 |                     tqdm.write(line, file=self.status_stream)
 86 |                 self._buffer = self._buffer[1:]
 87 |             elif len(self._buffer) == 1:
 88 |                 break
 89 |             else:
 90 |                 self._buffer = [''.join(self._buffer)]
 91 |         return self.status_stream.flush()
 92 | 
 93 |     def write(self, text: str) -> int:
 94 |         if self.write_raw:
 95 |             return self.status_stream.write(text)
 96 |         self._buffer.append(text)
 97 |         if '\n' in text:
 98 |             self.flush()
 99 |         return len(text)
100 | 
101 |     def close(self) -> None:
102 |         self.flush(final=True)
103 |         return self.status_stream.close()
104 | 
105 |     def fileno(self) -> int:
106 |         return self.status_stream.fileno()
107 | 
108 |     def isatty(self) -> bool:
109 |         return self.status_stream.isatty()
110 | 
111 |     def read(self, n: int = ...) -> AnyStr:
112 |         return self.status_stream.read(n)
113 | 
114 |     def readable(self) -> bool:
115 |         return self.status_stream.readable()
116 | 
117 |     def readline(self, limit: int = ...) -> AnyStr:
118 |         return self.status_stream.readline(limit)
119 | 
120 |     def readlines(self, hint: int = ...) -> List[AnyStr]:
121 |         return self.status_stream.readlines(hint)
122 | 
123 |     def seek(self, offset: int, whence: int = ...) -> int:
124 |         return self.status_stream.seek(offset, whence)
125 | 
126 |     def seekable(self) -> bool:
127 |         return self.status_stream.seekable()
128 | 
129 |     def tell(self) -> int:
130 |         return self.status_stream.tell()
131 | 
132 |     def truncate(self, size: Optional[int] = ...) -> int:
133 |         return self.status_stream.truncate(size)
134 | 
135 |     def writable(self) -> bool:
136 |         return self.status_stream.writable()
137 | 
138 |     def writelines(self, lines: Iterable[AnyStr]) -> None:
139 |         return self.status_stream.writelines(lines)
140 | 
141 |     @property
142 |     def closed(self) -> bool:
143 |         return self.status_stream.closed
144 | 
145 |     @property
146 |     def mode(self) -> str:
147 |         return self.status_stream.mode
148 | 
149 |     @property
150 |     def name(self) -> str:
151 |         return self.status_stream.name
152 | 
153 |     def __next__(self) -> AnyStr:
154 |         return next(self.status_stream)
155 | 
156 |     def __iter__(self) -> Iterator[AnyStr]:
157 |         return iter(self.status_stream)
158 | 
159 |     def __enter__(self) -> IO[AnyStr]:
160 |         self._reentries += 1
161 |         return self
162 | 
163 |     def __exit__(self, t: Optional[Type[BaseException]], value: Optional[BaseException],
164 |                  traceback: Optional[TracebackType]) -> Optional[bool]:
165 |         self._reentries -= 1
166 |         if self._reentries == 0:
167 |             self.flush(final=True)
168 | 
169 |     def __delete__(self, instance):
170 |         self.flush(final=True)
171 | 


--------------------------------------------------------------------------------
/graphtage/search.py:
--------------------------------------------------------------------------------
  1 | """A module for solving a search problem in an iteratively revealed search space.
  2 | 
  3 | **Given:** an iterator that yields an unknown but finite number of integer range objects, *e.g.*, ``[100, 200]``,
  4 | ``[50, 1000]``, ``[60, 500]``, …. Each integer range object has a member function that is guaranteed to tighten the
  5 | bounds of the range, such that the range monotonically shrinks and converges toward a specific number (*i.e.*, it
  6 | conforms to the :class:`graphtage.bounds.Bounded` protocol). For example, ``[100, 200].tighten()`` →
  7 | ``[150, 160].tighten()`` → ``[150, 155].tighten()`` → ``[153, 153]`` → ``153``. Each object might have a different
  8 | tighten function; we cannot make any assumptions about the rate of convergence, other than that the bounds are
  9 | guaranteed to shrink with each call to ``tighten()``.
 10 | 
 11 | **Goal:** Create the most computationally efficient algorithm to determine the range object that converges to the
 12 | smallest integer (*i.e.*, with the fewest possible tightenings).
 13 | 
 14 | """
 15 | 
 16 | from typing import Generic, Iterator, Optional, TypeVar
 17 | 
 18 | from .bounds import Bounded, NEGATIVE_INFINITY, POSITIVE_INFINITY, Range
 19 | from .fibonacci import FibonacciHeap, HeapNode
 20 | 
 21 | B = TypeVar('B', bound=Bounded)
 22 | 
 23 | 
 24 | class IterativeTighteningSearch(Bounded, Generic[B]):
 25 |     """Implementation of iterative tightening search on a given sequence of :class:`graphtage.bounds.Bounded` objects.
 26 | 
 27 |     The search class itself is :class:`graphtage.bounds.Bounded`, with bounds on the value of the optimal solution.
 28 |     Each call to :meth:`IterativeTighteningSearch.tighten_bounds` will improve these bounds, if possible.
 29 | 
 30 |     """
 31 |     def __init__(self,
 32 |                  possibilities: Iterator[B],
 33 |                  initial_bounds: Optional[Range] = None):
 34 |         """Initializes the search.
 35 | 
 36 |         Args:
 37 |             possibilities: An iterator yielding :class:`graphtage.bounded.Bounded` objects over which to search.
 38 |             initial_bounds: Bounds on the optimal solution, if known. Having good initial bounds can greatly speed up
 39 |                 the search. However, if the initial bounds are incorrect (*i.e.*, if the true optimal solution lies
 40 |                 outside of :obj:`initial_bounds`, then the resulting solution may be incorrect.
 41 | 
 42 |         """
 43 | 
 44 |         def get_range(bounded: Bounded) -> Range:
 45 |             return bounded.bounds()
 46 | 
 47 |         self._unprocessed: Iterator[B] = possibilities
 48 | 
 49 |         # Heap to track the ranges with the lowest upper bound
 50 |         self._untightened: FibonacciHeap[B, Range] = FibonacciHeap(key=get_range)
 51 | 
 52 |         # Fully tightened (`definitive`) ranges, sorted by increasing bound
 53 |         self._tightened: FibonacciHeap[B, Range] = FibonacciHeap(key=get_range)
 54 | 
 55 |         if initial_bounds is None:
 56 |             self.initial_bounds = Range(NEGATIVE_INFINITY, POSITIVE_INFINITY)
 57 |         else:
 58 |             self.initial_bounds = initial_bounds
 59 | 
 60 |     def __bool__(self):
 61 |         """Returns whether or not this search's bounds are :meth:`definitive<graphtage.bounds.Range.definitive>`."""
 62 |         return bool(self._unprocessed or ((self._untightened or self._tightened) and not self.bounds().definitive()))
 63 | 
 64 |     @property
 65 |     def best_match(self) -> Optional[B]:
 66 |         """Returns the best solution the search has thus found.
 67 | 
 68 |          Returns:
 69 |             Optional[B]: The best solution the search has thus found, or :const:`None` if it has not yet found a
 70 |             feasible solution.
 71 | 
 72 |         """
 73 |         if self._unprocessed is not None or not (self._untightened or self._tightened):
 74 |             return None
 75 |         elif self._tightened and self._untightened:
 76 |             if self._untightened.peek().bounds() < self._tightened.peek().bounds():
 77 |                 return self._untightened.peek()
 78 |             else:
 79 |                 return self._tightened.peek()
 80 |         elif self._tightened:
 81 |             return self._tightened.peek()
 82 |         else:
 83 |             return self._untightened.peek()
 84 | 
 85 |     def remove_best(self) -> Optional[B]:
 86 |         """Removes and returns the current best solution found by the search, if one exists.
 87 | 
 88 |         This enables one to iteratively sort the input sequence. However, this function is only guaranteed to return
 89 |         the globally optimal item if :meth:`IterativeTighteningSearch.goal_test` returns :const:`True`. Therefore,
 90 |         to generate a total ordering over the input sequence, you should tighten bounds until the goal is reached before
 91 |         each call to this function::
 92 | 
 93 |             while search.tighten_bounds():
 94 |                 while not search.goal_test() and search.tighten_bounds():
 95 |                     pass
 96 |                 if search.goal_test():
 97 |                     yield search.remove_best()
 98 |             while search.goal_test():
 99 |                 yield search.remove_best()
100 | 
101 |         However, if your goal is to produce a total ordering, :func:`graphtage.bounds.sort` is more efficient.
102 | 
103 |         """
104 |         if self._unprocessed is not None or not (self._untightened or self._tightened):
105 |             return None
106 |         elif self._tightened and self._untightened:
107 |             if self._untightened.peek().bounds() < self._tightened.peek().bounds():
108 |                 heap = self._untightened
109 |             else:
110 |                 heap = self._tightened
111 |         elif self._tightened:
112 |             heap = self._tightened
113 |         else:
114 |             heap = self._untightened
115 |         return heap.pop()
116 | 
117 |     def search(self) -> B:
118 |         """Finds and returns the smallest item, fully tightened.
119 | 
120 |         This is equivalent to::
121 | 
122 |             while self.tighten_bounds():
123 |                 pass
124 |             return self.best_match
125 | 
126 |         """
127 |         while self.tighten_bounds():
128 |             pass
129 |         return self.best_match
130 | 
131 |     def _nodes(self) -> Iterator[HeapNode[B, Range]]:
132 |         yield from self._untightened.nodes()
133 |         yield from self._tightened.nodes()
134 | 
135 |     def bounds(self) -> Range:
136 |         if self.best_match is None:
137 |             return self.initial_bounds
138 |         else:
139 |             if self._unprocessed is None and (self._untightened or self._tightened):
140 |                 lb = POSITIVE_INFINITY
141 |                 for node in self._nodes():
142 |                     if not node.deleted:
143 |                         lb = min(node.key.lower_bound, lb)
144 |                 if lb == POSITIVE_INFINITY or lb < self.initial_bounds.lower_bound:
145 |                     lb = self.initial_bounds.lower_bound
146 |             else:
147 |                 lb = self.initial_bounds.lower_bound
148 |             return Range(min(lb, self.best_match.bounds().upper_bound), self.best_match.bounds().upper_bound)
149 | 
150 |     def _delete_node(self, node: HeapNode[B, Range]):
151 |         self._untightened.decrease_key(node, Range(NEGATIVE_INFINITY, NEGATIVE_INFINITY))
152 |         self._untightened.pop()
153 |         node.deleted = True
154 | 
155 |     def _update_bounds(self, node: HeapNode[B, Range]):
156 |         if self.best_match is not None \
157 |                 and self.best_match != node.item \
158 |                 and self.best_match.bounds().dominates(node.item.bounds()):
159 |             self._delete_node(node)
160 |             return
161 |         elif self.initial_bounds.dominates(node.item.bounds()):
162 |             self._delete_node(node)
163 |             return
164 |         bounds: Range = node.item.bounds()
165 |         if bounds.definitive():
166 |             self._delete_node(node)
167 |             self._tightened.push(node.item)
168 |         elif bounds.lower_bound > node.key.lower_bound:
169 |             # The lower bound increased, so we need to remove and re-add the node
170 |             # because the Fibonacci heap only permits making keys smaller
171 |             self._untightened.decrease_key(node, Range(NEGATIVE_INFINITY, NEGATIVE_INFINITY))
172 |             self._untightened.pop()
173 |             self._untightened.push(node.item)
174 | 
175 |     def goal_test(self) -> bool:
176 |         """Returns whether :meth:`best_match<IterativeTighteningSearch.best_match>` is the optimal solution."""
177 |         if self._unprocessed is not None:
178 |             return False
179 |         best = self.best_match
180 |         return best is not None and best.bounds().dominates(self.bounds())
181 | 
182 |     def tighten_bounds(self) -> bool:
183 |         starting_bounds = self.bounds()
184 |         while True:
185 |             if self._unprocessed is not None:
186 |                 try:
187 |                     next_best: B = next(self._unprocessed)
188 |                     if self.initial_bounds.lower_bound > NEGATIVE_INFINITY and \
189 |                             self.initial_bounds.lower_bound >= next_best.bounds().upper_bound:
190 |                         # We can't do any better than this choice!
191 |                         self._unprocessed = None
192 |                         self._untightened.clear()
193 |                         self._tightened.clear()
194 |                         if next_best.bounds().definitive():
195 |                             self._tightened.push(next_best)
196 |                         else:
197 |                             self._untightened.push(next_best)
198 |                         return True
199 |                     if starting_bounds.dominates(next_best.bounds()) or \
200 |                             (self.best_match is not None
201 |                              and self.best_match.bounds().dominates(next_best.bounds())) or \
202 |                             self.initial_bounds.dominates(next_best.bounds()):
203 |                         # No need to add this new edit if it is strictly worse than the current best!
204 |                         pass
205 |                     if next_best.bounds().definitive():
206 |                         self._tightened.push(next_best)
207 |                     else:
208 |                         self._untightened.push(next_best)
209 |                 except StopIteration:
210 |                     self._unprocessed = None
211 |             tightened = False
212 |             if self._untightened:
213 |                 if self._unprocessed is None:
214 |                     if len(self._untightened) == 1:
215 |                         untightened = self._untightened.peek()
216 |                         if untightened.tighten_bounds() and untightened.bounds().definitive():
217 |                             self._untightened.clear()
218 |                             self._tightened.push(untightened)
219 |                     if self.goal_test():
220 |                         best = self.best_match
221 |                         self._untightened.clear()
222 |                         self._tightened.clear()
223 |                         ret = best.tighten_bounds()
224 |                         if best.bounds().definitive():
225 |                             self._tightened.push(best)
226 |                         else:
227 |                             self._untightened.push(best)
228 |                         assert self.best_match == best
229 |                         return ret
230 |                 for node in list(self._untightened.min_node):
231 |                     if node.deleted:
232 |                         continue
233 |                     tightened = node.item.tighten_bounds()
234 |                     if tightened:
235 |                         self._update_bounds(node)
236 |                         break
237 |             if starting_bounds.lower_bound < self.bounds().lower_bound \
238 |                     or starting_bounds.upper_bound > self.bounds().upper_bound:
239 |                 return True
240 |             elif self._unprocessed is None and not tightened:
241 |                 return False
242 | 


--------------------------------------------------------------------------------
/graphtage/version.py:
--------------------------------------------------------------------------------
 1 | """A module that centralizes the version information for Graphtage.
 2 | 
 3 | Changing the version here not only affects the version printed with the ``--version`` command line option, but it also
 4 | automatically updates the version used in ``setup.py`` and rendered in the documentation.
 5 | 
 6 | Attributes:
 7 |     DEV_BUILD (bool): Sets whether this build is a development build.
 8 |         This should only be set to :const:`True` to coincide with a release. It should *always* be :const:`True` before
 9 |         deploying to PyPI.
10 | 
11 |         If :const:`False`, the git branch will be included in :attr:`graphtage.version.__version__`.
12 | 
13 |     __version__ (Tuple[Union[int, str], ...]): The version of Graphtage. This tuple can contain any sequence of ints and
14 |         strings. Typically this will be three ints: major/minor/revision number. However, it can contain additional
15 |         ints and strings. If :attr:`graphtage.version.DEV_BUILD`, then `("git", git_branch())` will be appended to the
16 |         version.
17 | 
18 |     VERSION_STRING (str): A rendered string containing the version of Graphtage. Each element of
19 |         :attr:`graphtage.version.__version__` is appended to the string, delimited by a "." if the element is an ``int``
20 |         or a "-" if the element is a string.
21 | 
22 | """
23 | 
24 | import os
25 | import subprocess
26 | from typing import Optional, Tuple, Union
27 | 
28 | 
29 | def git_branch() -> Optional[str]:
30 |     """Returns the git branch for the codebase, or :const:`None` if it could not be determined.
31 | 
32 |     The git branch is determined by running
33 | 
34 |     .. code-block:: console
35 | 
36 |         $ git symbolic-ref -q HEAD
37 | 
38 |     """
39 |     try:
40 |         branch = subprocess.check_output(
41 |             ['git', 'symbolic-ref', '-q', 'HEAD'],
42 |             cwd=os.path.dirname(os.path.realpath(__file__)),
43 |             stderr=subprocess.DEVNULL
44 |         )
45 |         branch = branch.decode('utf-8').strip().split('/')[-1]
46 |         return branch
47 |     except Exception:
48 |         return None
49 | 
50 | 
51 | DEV_BUILD = False
52 | """Sets whether this build is a development build.
53 | 
54 | This should only be set to :const:`False` to coincide with a release. It should *always* be :const:`False` before
55 | deploying to PyPI.
56 | 
57 | If :const:`True`, the git branch will be included in the version string.
58 | 
59 | """
60 | 
61 | 
62 | __version__: Tuple[Union[int, str], ...] = (0, 3, 1)
63 | 
64 | if DEV_BUILD:
65 |     branch_name = git_branch()
66 |     if branch_name is None:
67 |         __version__ = __version__ + ('git',)
68 |     else:
69 |         __version__ = __version__ + ('git', branch_name)
70 | 
71 | VERSION_STRING = ''
72 | 
73 | for element in __version__:
74 |     if isinstance(element, int):
75 |         if VERSION_STRING:
76 |             VERSION_STRING += f'.{element}'
77 |         else:
78 |             VERSION_STRING = str(element)
79 |     else:
80 |         if VERSION_STRING:
81 |             VERSION_STRING += f'-{element!s}'
82 |         else:
83 |             VERSION_STRING += str(element)
84 | 
85 | 
86 | if __name__ == '__main__':
87 |     print(VERSION_STRING)
88 | 


--------------------------------------------------------------------------------
/graphtage/yaml.py:
--------------------------------------------------------------------------------
  1 | """A :class:`graphtage.Filetype` for parsing, diffing, and rendering YAML files."""
  2 | import os
  3 | from io import StringIO
  4 | from typing import Optional, Union
  5 | 
  6 | from yaml import dump, load_all, YAMLError
  7 | try:
  8 |     from yaml import CLoader as Loader, CDumper as Dumper
  9 | except ImportError:
 10 |     from yaml import Loader, Dumper
 11 | 
 12 | from . import json
 13 | from .edits import Insert, Match
 14 | from .graphtage import BuildOptions, Filetype, KeyValuePairNode, LeafNode, ListNode, MappingNode, StringNode, \
 15 |     StringEdit, StringFormatter
 16 | from .printer import Fore, Printer
 17 | from .sequences import SequenceFormatter, SequenceNode
 18 | from .tree import ContainerNode, Edit, GraphtageFormatter, TreeNode
 19 | 
 20 | 
 21 | def build_tree(path: str, options: Optional[BuildOptions] = None, *args, **kwargs) -> TreeNode:
 22 |     """Constructs a YAML tree from an YAML file."""
 23 |     with open(path, 'rb') as stream:
 24 |         document_stream = load_all(stream, Loader=Loader)
 25 |         documents = list(document_stream)
 26 |         if len(documents) == 0:
 27 |             return json.build_tree(None, options=options, *args, **kwargs)
 28 |         elif len(documents) > 1:
 29 |             return json.build_tree(documents, options=options, *args, **kwargs)
 30 |         else:
 31 |             singleton = documents[0]
 32 |             return json.build_tree(singleton, options=options, *args, **kwargs)
 33 | 
 34 | 
 35 | class YAMLListFormatter(SequenceFormatter):
 36 |     is_partial = True
 37 | 
 38 |     def __init__(self):
 39 |         super().__init__('', '', '')
 40 | 
 41 |     def print_SequenceNode(self, printer: Printer, node: SequenceNode):
 42 |         self.parent.print(printer, node)
 43 | 
 44 |     def print_ListNode(self, printer: Printer, *args, **kwargs):
 45 |         printer.newline()
 46 |         super().print_SequenceNode(printer, *args, **kwargs)
 47 | 
 48 |     def edit_print(self, printer: Printer, edit: Edit):
 49 |         printer.indents += 1
 50 |         self.print(printer, edit)
 51 |         printer.indents -= 1
 52 | 
 53 |     def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False):
 54 |         if not is_last:
 55 |             if not is_first:
 56 |                 printer.newline()
 57 |             with printer.bright().color(Fore.WHITE):
 58 |                 printer.write('- ')
 59 | 
 60 |     def items_indent(self, printer: Printer):
 61 |         return printer
 62 | 
 63 | 
 64 | class YAMLKeyValuePairFormatter(GraphtageFormatter):
 65 |     is_partial = True
 66 | 
 67 |     def print_KeyValuePairNode(self, printer: Printer, node: KeyValuePairNode):
 68 |         if printer.context().fore is None:
 69 |             with printer.color(Fore.BLUE) as p:
 70 |                 self.print(p, node.key)
 71 |         else:
 72 |             self.print(printer, node.key)
 73 |         with printer.bright().color(Fore.CYAN):
 74 |             printer.write(": ")
 75 |         if isinstance(node.value, MappingNode):
 76 |             printer.newline()
 77 |             printer.indents += 1
 78 |             self.parent.print(printer, node.value)
 79 |             printer.indents -= 1
 80 |         elif isinstance(node.value, SequenceNode):
 81 |             self.parent.parent.print(printer, node.value)
 82 |         else:
 83 |             self.print(printer, node.value)
 84 | 
 85 | 
 86 | class YAMLDictFormatter(SequenceFormatter):
 87 |     is_partial = True
 88 |     sub_format_types = [YAMLKeyValuePairFormatter]
 89 | 
 90 |     def __init__(self):
 91 |         super().__init__('', '', '')
 92 | 
 93 |     def print_MultiSetNode(self, *args, **kwargs):
 94 |         super().print_SequenceNode(*args, **kwargs)
 95 | 
 96 |     def print_MappingNode(self, *args, **kwargs):
 97 |         super().print_SequenceNode(*args, **kwargs)
 98 | 
 99 |     def print_SequenceNode(self, *args, **kwargs):
100 |         self.parent.print(*args, **kwargs)
101 | 
102 |     def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False):
103 |         if not is_first and not is_last:
104 |             printer.newline()
105 | 
106 |     def items_indent(self, printer: Printer):
107 |         return printer
108 | 
109 | 
110 | class YAMLStringFormatter(StringFormatter):
111 |     is_partial = True
112 |     has_newline = False
113 | 
114 |     def write_start_quote(self, printer: Printer, edit: StringEdit):
115 |         for sub_edit in edit.edit_distance.edits():
116 |             if isinstance(sub_edit, Match) and '\n' in sub_edit.from_node.object:
117 |                 self.has_newline = True
118 |                 break
119 |             elif isinstance(sub_edit, Insert) and '\n' in sub_edit.from_node.object:
120 |                 self.has_newline = True
121 |                 break
122 |         else:
123 |             self.has_newline = False
124 |         if self.has_newline:
125 |             printer.write('|')
126 |             printer.indents += 1
127 |             printer.newline()
128 | 
129 |     def context(self, printer: Printer):
130 |         return printer
131 | 
132 |     def write_end_quote(self, printer: Printer, edit: StringEdit):
133 |         if self.has_newline:
134 |             printer.indents -= 1
135 | 
136 |     def print_StringNode(self, printer: Printer, node: 'StringNode'):
137 |         s = node.object
138 |         if '\n' in s:
139 |             if printer.context().fore is None:
140 |                 context = printer.color(Fore.CYAN)
141 |             else:
142 |                 context = printer
143 |             with context as c:
144 |                 c.write('|')
145 |                 with c.indent():
146 |                     lines = s.split('\n')
147 |                     if lines[-1] == '':
148 |                         # Remove trailing newline
149 |                         lines = lines[:-1]
150 |                     for line in lines:
151 |                         c.newline()
152 |                         self.parent.write_obj(c, line)
153 |         else:
154 |             self.parent.write_obj(printer, s)
155 | 
156 |     def write_char(self, printer: Printer, c: str, index: int, num_edits: int, removed=False, inserted=False):
157 |         if c == '\n':
158 |             if removed or inserted:
159 |                 super().write_char(printer, '\u23CE', index, num_edits, removed, inserted)
160 |             if not removed and index < num_edits - 1:
161 |                 # Do not print a trailing newline
162 |                 printer.newline()
163 |         else:
164 |             super().write_char(printer, c, index, num_edits, removed, inserted)
165 | 
166 | 
167 | class YAMLFormatter(GraphtageFormatter):
168 |     sub_format_types = [YAMLStringFormatter, YAMLDictFormatter, YAMLListFormatter]
169 | 
170 |     def print(self, printer: Printer, *args, **kwargs):
171 |         # YAML only gets a two-space indent
172 |         printer.indent_str = '  '
173 |         super().print(printer, *args, **kwargs)
174 | 
175 |     @staticmethod
176 |     def write_obj(printer: Printer, obj):
177 |         if obj == '':
178 |             return
179 |         s = StringIO()
180 |         dump(obj, stream=s, Dumper=Dumper)
181 |         ret = s.getvalue()
182 |         if isinstance(obj, str) and obj.strip().startswith('#'):
183 |             if ret.startswith("'"):
184 |                 ret = ret[1:]
185 |             if ret.endswith("\n"):
186 |                 ret = ret[:-1]
187 |             if ret.endswith("'"):
188 |                 ret = ret[:-1]
189 |         if ret.endswith('\n...\n'):
190 |             ret = ret[:-len('\n...\n')]
191 |         elif ret.endswith('\n'):
192 |             ret = ret[:-1]
193 |         printer.write(ret)
194 | 
195 |     def print_LeafNode(self, printer: Printer, node: LeafNode):
196 |         self.write_obj(printer, node.object)
197 | 
198 |     def print_ContainerNode(self, printer: Printer, node: ContainerNode):
199 |         """Prints a :class:`graphtage.ContainerNode`.
200 | 
201 |         This is a fallback to permit the printing of custom containers, like :class:`graphtage.xml.XMLElement`.
202 | 
203 |         """
204 |         # Treat the container like a list
205 |         list_node = ListNode(node.children())
206 |         self.print(printer, list_node)
207 | 
208 | 
209 | class YAML(Filetype):
210 |     """The YAML filetype."""
211 |     def __init__(self):
212 |         """Initializes the YAML file type.
213 | 
214 |         By default, YAML associates itself with the "yaml", "application/x-yaml", "application/yaml", "text/yaml",
215 |         "text/x-yaml", and "text/vnd.yaml" MIME types.
216 | 
217 |         """
218 |         super().__init__(
219 |             'yaml',
220 |             'application/x-yaml',
221 |             'application/yaml',
222 |             'text/yaml',
223 |             'text/x-yaml',
224 |             'text/vnd.yaml'
225 |         )
226 | 
227 |     def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode:
228 |         tree = build_tree(path=path, options=options)
229 |         for node in tree.dfs():
230 |             if isinstance(node, StringNode):
231 |                 node.quoted = False
232 |         return tree
233 | 
234 |     def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]:
235 |         try:
236 |             return self.build_tree(path=path, options=options)
237 |         except YAMLError as ye:
238 |             return f'Error parsing {os.path.basename(path)}: {ye})'
239 | 
240 |     def get_default_formatter(self) -> YAMLFormatter:
241 |         return YAMLFormatter.DEFAULT_INSTANCE
242 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup, find_packages
 3 | 
 4 | HERE = os.path.realpath(os.path.dirname(__file__))
 5 | 
 6 | VERSION_MODULE_PATH = os.path.join(HERE, "graphtage", "version.py")
 7 | README_PATH = os.path.join(HERE, "README.md")
 8 | 
 9 | 
10 | def get_version_string():
11 |     version = {}
12 |     with open(VERSION_MODULE_PATH) as f:
13 |         exec(f.read(), version)
14 |     return version['VERSION_STRING']
15 | 
16 | 
17 | def get_readme():
18 |     with open(README_PATH, encoding='utf-8') as f:
19 |         return f.read()
20 | 
21 | 
22 | setup(
23 |     name='graphtage',
24 |     description='A utility to diff tree-like files such as JSON and XML.',
25 |     license="LGPL-3.0-or-later",
26 |     long_description=get_readme(),
27 |     long_description_content_type="text/markdown",
28 |     url='https://github.com/trailofbits/graphtage',
29 |     project_urls={
30 |         'Documentation': 'https://trailofbits.github.io/graphtage',
31 |         'Source': 'https://github.com/trailofbits/graphtage',
32 |         'Tracker': 'https://github.com/trailofbits/graphtage/issues',
33 |     },
34 |     author='Trail of Bits',
35 |     version=get_version_string(),
36 |     packages=find_packages(exclude=['test']),
37 |     python_requires='>=3.8',
38 |     install_requires=[
39 |         "colorama",
40 |         "fickling>=0.0.8",
41 |         "intervaltree",
42 |         "json5==0.9.5",
43 |         "numpy>=1.19.4",
44 |         "PyYAML",
45 |         "scipy>=1.4.0",
46 |         "tqdm",
47 |         "typing_extensions>=3.7.4.3"
48 |     ],
49 |     entry_points={
50 |         'console_scripts': [
51 |             'graphtage = graphtage.__main__:main'
52 |         ]
53 |     },
54 |     extras_require={
55 |         "dev": [
56 |             "flake8",
57 |             "Sphinx",
58 |             "pytest",
59 |             "sphinx_rtd_theme==1.2.2",
60 |             "twine",
61 |             # workaround for https://github.com/python/importlib_metadata/issues/406:
62 |             "importlib_metadata<5; python_version == '3.7'"
63 |         ]
64 |     },
65 |     classifiers=[
66 |         'Development Status :: 4 - Beta',
67 |         'Environment :: Console',
68 |         'Intended Audience :: Science/Research',
69 |         'License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)',
70 |         'Programming Language :: Python :: 3 :: Only',
71 |         'Topic :: Utilities'
72 |     ],
73 |     include_package_data=True
74 | )
75 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trailofbits/graphtage/23654acf488eb803a60ce27ac515ee0755feb1a7/test/__init__.py


--------------------------------------------------------------------------------
/test/test_bounds.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from typing import Optional
  3 | from unittest import TestCase
  4 | 
  5 | from tqdm import trange
  6 | 
  7 | from graphtage.bounds import Bounded, make_distinct, Range, sort
  8 | 
  9 | 
 10 | class RandomDecreasingRange(Bounded):
 11 |     def __init__(self, fixed_lb: int = 0, fixed_ub: int = 2000000, final_value: Optional[int] = None):
 12 |         if final_value is None:
 13 |             self.final_value = random.randint(fixed_lb, fixed_lb + (fixed_ub - fixed_lb) // 2)
 14 |         elif final_value < fixed_lb:
 15 |             raise ValueError(f"final_value of {final_value} < fixed lower bound of {fixed_lb}")
 16 |         elif final_value > fixed_ub:
 17 |             raise ValueError(f"final_value of {final_value} > fixed upper bound of {fixed_ub}")
 18 |         else:
 19 |             self.final_value = final_value
 20 |         self._lb = random.randint(fixed_lb, self.final_value)
 21 |         self._ub = random.randint(self.final_value, fixed_ub)
 22 |         self.tightenings: int = 0
 23 | 
 24 |     def bounds(self) -> Range:
 25 |         return Range(self._lb, self._ub)
 26 | 
 27 |     def tighten_bounds(self) -> bool:
 28 |         bounds_before = self.bounds()
 29 |         lb_diff = self.final_value - self._lb
 30 |         ub_diff = self._ub - self.final_value
 31 |         if lb_diff == ub_diff == 0:
 32 |             return False
 33 |         if lb_diff <= 1:
 34 |             self._lb = self.final_value
 35 |         else:
 36 |             self._lb += random.randint(max(int(0.5 * lb_diff), 1), lb_diff)
 37 |         if ub_diff <= 1:
 38 |             self._ub = self.final_value
 39 |         else:
 40 |             self._ub -= random.randint(max(int(0.5 * ub_diff), 1), ub_diff)
 41 |         if bounds_before.lower_bound < self._lb or bounds_before.upper_bound > self._ub:
 42 |             self.tightenings += 1
 43 |             return True
 44 |         else:
 45 |             return False
 46 | 
 47 |     def __repr__(self):
 48 |         return repr(self.bounds())
 49 | 
 50 | 
 51 | class TestBounds(TestCase):
 52 |     def test_random_decreasing_range(self):
 53 |         for _ in range(1000):
 54 |             r = RandomDecreasingRange()
 55 |             last_range = r.bounds()
 56 |             while r.tighten_bounds():
 57 |                 next_range = r.bounds()
 58 |                 self.assertTrue(next_range.lower_bound >= last_range.lower_bound
 59 |                                 and next_range.upper_bound <= last_range.upper_bound
 60 |                                 and (
 61 |                                     next_range.lower_bound > last_range.lower_bound or
 62 |                                     next_range.upper_bound < last_range.upper_bound
 63 |                                 ))
 64 |                 last_range = next_range
 65 | 
 66 |     def test_sort(self):
 67 |         for _ in trange(100):
 68 |             ranges = [RandomDecreasingRange() for _ in range(100)]
 69 |             sorted_ranges = sorted(ranges, key=lambda r: r.final_value)
 70 |             for expected, actual in zip(sorted_ranges, sort(ranges)):
 71 |                 self.assertEqual(expected.final_value, actual.final_value)
 72 | 
 73 |     def test_make_distinct(self):
 74 |         speedups = 0
 75 |         tests = 0
 76 |         try:
 77 |             with trange(0, 100) as t:
 78 |                 for i in t:
 79 |                     ranges = [RandomDecreasingRange() for _ in range(i)]
 80 |                     make_distinct(*ranges)
 81 |                     last_range = None
 82 |                     for r in sort(ranges):
 83 |                         rbounds = r.bounds()
 84 |                         if last_range is not None:
 85 |                             self.assertTrue((last_range.definitive() and rbounds.definitive() and last_range == rbounds) or
 86 |                                             last_range.upper_bound < rbounds.lower_bound,
 87 |                                             f"{last_range!r} was followed by {rbounds!r}")
 88 |                         last_range = rbounds
 89 |                     tightenings = sum(r.tightenings for r in ranges)
 90 |                     if tightenings > 0:
 91 |                         untightened = 0
 92 |                         for r in ranges:
 93 |                             t_before = r.tightenings
 94 |                             while r.tighten_bounds():
 95 |                                 pass
 96 |                             untightened += r.tightenings - t_before
 97 |                         t.desc = f"{(untightened + tightenings) / tightenings:.01f}x Speedup"
 98 |                         speedups += (untightened + tightenings) / tightenings
 99 |                         tests += 1
100 |         finally:
101 |             print(f"Average speedup: {speedups / tests:.01f}x")
102 | 


--------------------------------------------------------------------------------
/test/test_builder.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from unittest import TestCase
 3 | 
 4 | from graphtage import IntegerNode, ListNode, TreeNode
 5 | from graphtage.builder import BasicBuilder, Builder
 6 | 
 7 | 
 8 | class TestBuilder(TestCase):
 9 |     def test_basic_builder(self):
10 |         result = BasicBuilder().build_tree([1, "a", (2, "b"), {1, 2}, {"a": "b"}, None])
11 |         self.assertIsInstance(result, ListNode)
12 |         self.assertEqual(6, len(result.children()))
13 | 
14 |     def test_custom_builder(self):
15 |         test = self
16 | 
17 |         class Foo:
18 |             def __init__(self, bar):
19 |                 self.bar = bar
20 | 
21 |         class Tester(BasicBuilder):
22 |             @Builder.expander(Foo)
23 |             def expand_foo(self, obj: Foo):
24 |                 yield obj.bar
25 | 
26 |             @Builder.builder(Foo)
27 |             def build_foo(self, obj: Foo, children: List[TreeNode]):
28 |                 test.assertEqual(1, len(children))
29 |                 return children[0]
30 | 
31 |         tree = Tester().build_tree(Foo(10))
32 |         self.assertIsInstance(tree, IntegerNode)
33 |         self.assertEqual(10, tree.object)
34 | 


--------------------------------------------------------------------------------
/test/test_constraints.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | import graphtage
 4 | from graphtage.constraints import MatchIf, MatchUnless
 5 | from graphtage.json import build_tree
 6 | from graphtage import expressions
 7 | 
 8 | 
 9 | class TestConstraints(TestCase):
10 |     def test_match_if(self):
11 |         expr = expressions.parse("from.key == 'foo' && to.key == 'bar'")
12 |         from_tree = build_tree({
13 |             "foo": [1, 2, 3]
14 |         })
15 |         for node in from_tree.dfs():
16 |             MatchIf.apply(node, expr)
17 |         to_tree = build_tree({
18 |             "bar": [1, 2, 4]
19 |         })
20 |         diff = from_tree.diff(to_tree)
21 |         self.assertIsInstance(diff.edit, graphtage.Replace)
22 | 
23 |     def test_match_unless(self):
24 |         expr = expressions.parse("from.key == 'foo' && to.key == 'bar'")
25 |         from_tree = build_tree({
26 |             "foo": [1, 2, 3]
27 |         })
28 |         for node in from_tree.dfs():
29 |             MatchUnless.apply(node, expr)
30 |         to_tree = build_tree({
31 |             "bar": [1, 2, 4]
32 |         })
33 |         diff = from_tree.diff(to_tree)
34 |         self.assertIsInstance(diff.edit, graphtage.MultiSetEdit)
35 | 


--------------------------------------------------------------------------------
/test/test_dataclasses.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from graphtage import IntegerNode, Replace, StringNode
 4 | from graphtage.dataclasses import DataClassEdit, DataClassNode
 5 | 
 6 | 
 7 | class TestDataclasses(TestCase):
 8 |     def test_inheritance(self):
 9 |         class Foo(DataClassNode):
10 |             foo: IntegerNode
11 |             initialized = False
12 | 
13 |             def post_init(self):
14 |                 self.initialized = True
15 | 
16 |         class Bar(Foo):
17 |             bar: StringNode
18 |             initialized = False
19 | 
20 |             def post_init(self):
21 |                 self.initialized = True
22 | 
23 |         self.assertEqual(("foo",), Foo._SLOTS)
24 |         self.assertEqual(0, len(Foo._DATA_CLASS_ANCESTORS))
25 |         self.assertEqual(("foo", "bar",), Bar._SLOTS)
26 |         self.assertEqual(1, len(Bar._DATA_CLASS_ANCESTORS))
27 | 
28 |         b = Bar(foo=IntegerNode(10), bar=StringNode("bar"))
29 |         self.assertEqual(10, b.foo.object)
30 |         self.assertEqual("bar", b.bar.object)
31 |         self.assertTrue(b.initialized)
32 | 
33 |         # now test a mixture of positional and keyword arguments
34 |         b = Bar(StringNode("bar"), foo=IntegerNode(10))
35 |         self.assertEqual(10, b.foo.object)
36 |         self.assertEqual("bar", b.bar.object)
37 |         self.assertTrue(b.initialized)
38 | 
39 |         # test equality
40 |         self.assertEqual(Bar(IntegerNode(10), StringNode("bar")), b)
41 |         self.assertNotEqual(Bar(IntegerNode(11), StringNode("bar")), b)
42 | 
43 |         # test diffing of different dataclasses
44 |         f = Foo(IntegerNode(10))
45 |         edit = f.edits(b)
46 |         self.assertIsInstance(edit, Replace)
47 |         c = Foo(IntegerNode(12))
48 |         edit = f.edits(c)
49 |         self.assertIsInstance(edit, DataClassEdit)
50 | 
51 |     def test_inheritance_with_duplicate(self):
52 |         def define_duplicate():
53 |             class BaseFoo(DataClassNode):
54 |                 foo: StringNode
55 | 
56 |             class DuplicateFoo(BaseFoo):
57 |                 bar: IntegerNode
58 |                 foo: IntegerNode
59 | 
60 |         self.assertRaises(TypeError, define_duplicate)
61 | 
62 |     def test_runtime_type_checking(self):
63 |         class Foo(DataClassNode):
64 |             foo: IntegerNode
65 | 
66 |         def try_wrong_type():
67 |             return Foo(StringNode("foo"))
68 | 
69 |         self.assertRaises(ValueError, try_wrong_type)
70 | 


--------------------------------------------------------------------------------
/test/test_expressions.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from graphtage.expressions import parse, ParseError, StringToken
 4 | 
 5 | 
 6 | class TestExpressions(TestCase):
 7 |     def test_string_parsing(self):
 8 |         input_str = 'This is a test'
 9 |         ret = parse(f'"{input_str}"').eval()
10 |         self.assertIsInstance(ret, StringToken)
11 |         self.assertEqual(input_str, str(ret))
12 | 
13 |     def test_string_escaping(self):
14 |         input_str = 'foo " bar'
15 |         escaped_input = input_str.replace('"', '\\"')
16 |         ret = parse(f'"{escaped_input}"').eval()
17 |         self.assertIsInstance(ret, StringToken)
18 |         self.assertEqual(input_str, str(ret))
19 |         with self.assertRaises(ParseError):
20 |             parse(f'{input_str}')
21 | 
22 |     def test_getitem(self):
23 |         self.assertEqual(1234, parse('foo[(bar + 10) * 2]').eval({
24 |             'foo': {
25 |                 40: 1234
26 |             },
27 |             'bar': 10
28 |         }))
29 | 
30 |     def test_bracket_parsing(self):
31 |         with self.assertRaises(ParseError):
32 |             parse('foo[bar(])')
33 |         with self.assertRaises(ParseError):
34 |             parse('(bar[)]')
35 | 
36 |     def test_evaluation(self):
37 |         assignments = {
38 |             'sampling_factors': 1234,
39 |             'thumbnail_x': 5,
40 |             'thumbnail_y': 7
41 |         }
42 |         self.assertEqual(65, parse('(sampling_factors & -0xf0) >> 4').eval(assignments))
43 |         self.assertEqual(105, parse('thumbnail_x * thumbnail_y * 3').eval(assignments))
44 | 
45 |     def test_functions(self):
46 |         self.assertEqual(sum([1, 2, 3, 4]), parse('sum([1, 2, 3, 4])').eval())
47 |         self.assertEqual('a, b, c, d', parse('", ".join(["a", "b", "c", "d"])').eval())
48 | 
49 |     def test_member_access(self):
50 |         class Foo:
51 |             def __init__(self, bar):
52 |                 self.bar = bar
53 | 
54 |         assignments = {
55 |             'foo': Foo(1234)
56 |         }
57 | 
58 |         self.assertEqual(1234, parse('foo.bar').eval(assignments))
59 |         with self.assertRaises(ParseError):
60 |             parse('foo.__dict__').eval(assignments)
61 | 
62 |     def test_containers(self):
63 |         self.assertEqual([[1, (3,)]], parse('[[1, (3,)]]').eval())
64 |         self.assertEqual([1, 2, 3, 4], parse('[1, 2, 3, 4]').eval())
65 |         self.assertEqual((1, 2, 3, 4), parse('(1, 2, 3, 4)').eval())
66 |         self.assertEqual([[1, 2, [3], 4]], parse('[[1, 2, [3], 4]]').eval())
67 |         self.assertEqual((1,), parse('(1,)').eval())
68 |         self.assertEqual([1], parse('[1]').eval())
69 |         with self.assertRaises(ParseError):
70 |             self.assertEqual([1], parse('[1,]').eval())
71 | 


--------------------------------------------------------------------------------
/test/test_fibonacci.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from collections import defaultdict
  3 | from typing import Callable, Dict, List, Optional, Set
  4 | from unittest import TestCase
  5 | 
  6 | from tqdm import tqdm, trange
  7 | 
  8 | from graphtage.fibonacci import FibonacciHeap, HeapNode, MaxFibonacciHeap
  9 | 
 10 | 
 11 | class TestFibonacciHeap(TestCase):
 12 |     @classmethod
 13 |     def setUpClass(cls):
 14 |         cls.random_list: List[int] = [random.randint(0, 10000) for _ in range(10000)]
 15 |         cls.sorted_list: List[int] = sorted(cls.random_list)
 16 | 
 17 |     def test_duplicate_items(self):
 18 |         heap = FibonacciHeap()
 19 |         test_list = [2, 1, 2]
 20 |         for i in test_list:
 21 |             heap.push(i)
 22 |         heap_sorted = [heap.pop() for _ in range(len(test_list))]
 23 |         self.assertEqual(sorted(test_list), heap_sorted)
 24 | 
 25 |     def random_heap(self) -> FibonacciHeap[int, int]:
 26 |         heap: FibonacciHeap[int, int] = FibonacciHeap()
 27 |         for rand_int in self.random_list:
 28 |             heap.push(rand_int)
 29 |         return heap
 30 | 
 31 |     def random_max_heap(self, key: Optional[Callable[[int], int]] = None) -> MaxFibonacciHeap[int, int]:
 32 |         heap: FibonacciHeap[int, int] = MaxFibonacciHeap(key=key)
 33 |         for rand_int in self.random_list:
 34 |             heap.push(rand_int)
 35 |         return heap
 36 | 
 37 |     def test_fibonacci_heap(self):
 38 |         heap = self.random_heap()
 39 |         heap_sorted = [heap.pop() for _ in range(len(self.random_list))]
 40 |         self.assertEqual(self.sorted_list, heap_sorted)
 41 | 
 42 |     def test_max_fibonacci_heap(self):
 43 |         heap = self.random_max_heap()
 44 |         heap_sorted = [heap.pop() for _ in range(len(self.random_list))]
 45 |         self.assertEqual(list(reversed(self.sorted_list)), heap_sorted)
 46 | 
 47 |     def test_max_fibonacci_heap_with_key(self):
 48 |         heap = self.random_max_heap(key=lambda i: -i)
 49 |         heap_sorted = [heap.pop() for _ in range(len(self.random_list))]
 50 |         self.assertEqual(self.sorted_list, heap_sorted)
 51 | 
 52 |     def test_node_traversal(self):
 53 |         heap = self.random_heap()
 54 |         self.assertEqual(sum(1 for _ in heap.nodes()), len(heap))
 55 | 
 56 |     def test_manual_node_deletion(self):
 57 |         heap = self.random_heap()
 58 |         for i in trange(len(self.random_list)//20):
 59 |             random_node: HeapNode[int, int] = random.choice(list(heap.nodes()))
 60 |             heap.decrease_key(random_node, -1)
 61 |             heap.pop()
 62 |             random_node.deleted = True
 63 |             self.assertEqual(len(heap), len(self.random_list) - i - 1)
 64 | 
 65 |     def test_node_deletion(self):
 66 |         heap = self.random_heap()
 67 |         for i in trange(len(self.random_list)//20):
 68 |             random_node: HeapNode[int, int] = random.choice(list(heap.nodes()))
 69 |             heap.remove(random_node)
 70 |             self.assertEqual(len(heap), len(self.random_list) - i - 1)
 71 | 
 72 |     def test_decrease_key(self):
 73 |         heap = self.random_heap()
 74 |         nodes_by_value: Dict[int, Set[HeapNode[int, int]]] = defaultdict(set)
 75 |         for node in heap.nodes():
 76 |             nodes_by_value[node.key].add(node)
 77 |         changes: Dict[int, int] = {}
 78 |         for _ in trange(len(self.random_list)//20):
 79 |             while True:
 80 |                 random_sorted_index = random.randint(0, len(self.random_list) - 1)
 81 |                 if random_sorted_index not in changes:
 82 |                     break
 83 |             random_node: HeapNode[int, int] = next(iter(nodes_by_value[self.sorted_list[random_sorted_index]]))
 84 |             self.assertEqual(random_node.key, self.sorted_list[random_sorted_index])
 85 |             if random_node.key <= 0:
 86 |                 continue
 87 |             new_key = random.randint(0, random_node.key - 1)
 88 |             nodes_by_value[random_node.key].remove(random_node)
 89 |             nodes_by_value[new_key].add(random_node)
 90 |             changes[random_sorted_index] = new_key
 91 |             heap.decrease_key(random_node, new_key)
 92 |         updated_list = []
 93 |         for i, expected in enumerate(self.sorted_list):
 94 |             if i in changes:
 95 |                 updated_list.append(changes[i])
 96 |             else:
 97 |                 updated_list.append(expected)
 98 |         expected_list = sorted(updated_list)
 99 |         for expected in tqdm(expected_list):
100 |             node = heap.min_node
101 |             heap.pop()
102 |             self.assertEqual(node.key, expected)
103 | 


--------------------------------------------------------------------------------
/test/test_formatting.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import json
  3 | import plistlib
  4 | import random
  5 | from functools import partial, wraps
  6 | from io import StringIO
  7 | from typing import FrozenSet, Optional, Tuple, Type, Union
  8 | from unittest import TestCase
  9 | 
 10 | import yaml
 11 | from tqdm import trange
 12 | 
 13 | import graphtage
 14 | from graphtage import xml
 15 | 
 16 | 
 17 | STR_BYTES: FrozenSet[str] = frozenset([
 18 |     chr(i) for i in range(32, 127)
 19 | ] + ['\n', '\t', '\r'])
 20 | LETTERS: Tuple[str, ...] = tuple(
 21 |     chr(i) for i in range(ord('a'), ord('z'))
 22 | ) + tuple(
 23 |     chr(i) for i in range(ord('A'), ord('Z'))
 24 | )
 25 | 
 26 | FILETYPE_TEST_PREFIX = 'test_'
 27 | FILETYPE_TEST_SUFFIX = '_formatting'
 28 | 
 29 | 
 30 | def filetype_test(test_func=None, *, test_equality: bool = True, iterations: int = 1000):
 31 |     if test_func is None:
 32 |         return partial(filetype_test, test_equality=test_equality, iterations=iterations)
 33 | 
 34 |     @wraps(test_func)
 35 |     def wrapper(self: 'TestFormatting'):
 36 |         name = test_func.__name__
 37 |         if not name.startswith(FILETYPE_TEST_PREFIX):
 38 |             raise ValueError(f'@filetype_test {name} must start with "{FILETYPE_TEST_PREFIX}"')
 39 |         elif not name.endswith(FILETYPE_TEST_SUFFIX):
 40 |             raise ValueError(f'@filetype_test {name} must end with "{FILETYPE_TEST_SUFFIX}"')
 41 |         filetype_name = name[len(FILETYPE_TEST_PREFIX):-len(FILETYPE_TEST_SUFFIX)]
 42 |         if filetype_name not in graphtage.FILETYPES_BY_TYPENAME:
 43 |             raise ValueError(f'Filetype "{filetype_name}" for @filetype_test {name} not found in graphtage.FILETYPES_BY_TYPENAME')
 44 |         filetype = graphtage.FILETYPES_BY_TYPENAME[filetype_name]
 45 |         formatter = filetype.get_default_formatter()
 46 | 
 47 |         for _ in trange(iterations):
 48 |             orig_obj, representation = test_func(self)
 49 |             if isinstance(representation, str):
 50 |                 representation = representation.encode("utf-8")
 51 |             with graphtage.utils.Tempfile(representation) as t:
 52 |                 tree = filetype.build_tree(t)
 53 |                 stream = StringIO()
 54 |                 printer = graphtage.printer.Printer(out_stream=stream, ansi_color=False)
 55 |                 formatter.print(printer, tree)
 56 |                 formatted_str = stream.getvalue()
 57 |             with graphtage.utils.Tempfile(formatted_str.encode('utf-8')) as t:
 58 |                 try:
 59 |                     new_obj = filetype.build_tree(t)
 60 |                 except Exception as e:
 61 |                     self.fail(f"""{filetype_name.upper()} decode error {e}: Original object:
 62 | {orig_obj!r}
 63 | Expected format:
 64 | {representation.decode("utf-8")}
 65 | Actual format:
 66 | {formatted_str!s}""")
 67 |             if test_equality:
 68 |                 self.assertEqual(tree, new_obj)
 69 | 
 70 |     return wrapper
 71 | 
 72 | 
 73 | class TestFormatting(TestCase):
 74 |     @staticmethod
 75 |     def make_random_int() -> int:
 76 |         return random.randint(-1000000, 1000000)
 77 | 
 78 |     @staticmethod
 79 |     def make_random_float() -> float:
 80 |         return random.random()
 81 | 
 82 |     @staticmethod
 83 |     def make_random_bool() -> bool:
 84 |         return random.choice([True, False])
 85 | 
 86 |     @staticmethod
 87 |     def make_random_str(exclude_bytes: FrozenSet[str] = frozenset(), allow_empty_strings: bool = True) -> str:
 88 |         if allow_empty_strings:
 89 |             min_length = 0
 90 |         else:
 91 |             min_length = 1
 92 |         return ''.join(random.choices(list(STR_BYTES - exclude_bytes), k=random.randint(min_length, 128)))
 93 | 
 94 |     @staticmethod
 95 |     def make_random_non_container(exclude_bytes: FrozenSet[str] = frozenset(), allow_empty_strings: bool = True):
 96 |         return random.choice([
 97 |             TestFormatting.make_random_int,
 98 |             TestFormatting.make_random_bool,
 99 |             TestFormatting.make_random_float,
100 |             partial(
101 |                 TestFormatting.make_random_str, exclude_bytes=exclude_bytes, allow_empty_strings=allow_empty_strings
102 |             )
103 |         ])()
104 | 
105 |     @staticmethod
106 |     def _make_random_obj(obj_stack, force_container_type: Optional[Type[Union[dict, list]]] = None, *args, **kwargs):
107 |         r = random.random()
108 |         NON_CONTAINER_PROB = 0.1
109 |         CONTAINER_PROB = (1.0 - NON_CONTAINER_PROB) / 2.0
110 |         if r <= NON_CONTAINER_PROB:
111 |             ret = TestFormatting.make_random_non_container(*args, **kwargs)
112 |         elif r <= NON_CONTAINER_PROB + CONTAINER_PROB:
113 |             if force_container_type is not None:
114 |                 ret = force_container_type()
115 |             else:
116 |                 ret = []
117 |             obj_stack.append(ret)
118 |         else:
119 |             if force_container_type is not None:
120 |                 ret = force_container_type()
121 |             else:
122 |                 ret = {}
123 |             obj_stack.append(ret)
124 |         return ret
125 | 
126 |     @staticmethod
127 |     def make_random_obj(
128 |             force_string_keys: bool = False,
129 |             allow_empty_containers: bool = True,
130 |             alternate_containers: bool = False,
131 |             *args, **kwargs):
132 |         obj_stack = []
133 |         ret = TestFormatting._make_random_obj(obj_stack, *args, **kwargs)
134 | 
135 |         while obj_stack:
136 |             expanding = obj_stack.pop()
137 |             size = int(random.betavariate(0.75, 5) * 10)
138 |             if isinstance(expanding, dict):
139 |                 if size == 0 and not allow_empty_containers:
140 |                     if force_string_keys:
141 |                         expanding[TestFormatting.make_random_str(*args, **kwargs)] = \
142 |                             TestFormatting.make_random_non_container(*args, **kwargs)
143 |                     else:
144 |                         expanding[TestFormatting.make_random_non_container(*args, **kwargs)] = \
145 |                             TestFormatting.make_random_non_container(*args, **kwargs)
146 |                 else:
147 |                     if alternate_containers:
148 |                         force_container_type = list
149 |                     else:
150 |                         force_container_type = None
151 |                     for _ in range(size):
152 |                         if force_string_keys:
153 |                             expanding[TestFormatting.make_random_str(*args, **kwargs)] = \
154 |                                 TestFormatting._make_random_obj(
155 |                                     obj_stack, force_container_type=force_container_type, *args, **kwargs
156 |                                 )
157 |                         else:
158 |                             expanding[TestFormatting.make_random_non_container(*args, **kwargs)] = \
159 |                                 TestFormatting._make_random_obj(
160 |                                     obj_stack, force_container_type=force_container_type, *args, **kwargs
161 |                                 )
162 |             else:
163 |                 if size == 0 and not allow_empty_containers:
164 |                     expanding.append(TestFormatting.make_random_non_container(*args, **kwargs))
165 |                 else:
166 |                     if alternate_containers:
167 |                         force_container_type = dict
168 |                     else:
169 |                         force_container_type = None
170 |                     for _ in range(size):
171 |                         expanding.append(TestFormatting._make_random_obj(
172 |                             obj_stack, force_container_type=force_container_type, *args, **kwargs
173 |                         ))
174 |         return ret
175 | 
176 |     def test_formatter_coverage(self):
177 |         for name in graphtage.FILETYPES_BY_TYPENAME.keys():
178 |             if not hasattr(self, f'test_{name}_formatting'):
179 |                 self.fail(f"Filetype {name} is missing a `test_{name}_formatting` test function")
180 | 
181 |     @filetype_test
182 |     def test_json_formatting(self):
183 |         orig_obj = TestFormatting.make_random_obj(force_string_keys=True)
184 |         return orig_obj, json.dumps(orig_obj)
185 | 
186 |     @filetype_test
187 |     def test_csv_formatting(self):
188 |         orig_obj = [
189 |             [TestFormatting.make_random_non_container(
190 |                 exclude_bytes=frozenset('\n\r\t,"\'')
191 |             ) for _ in range(random.randint(0, 10))]
192 |             for _ in range(random.randint(0, 10))
193 |         ]
194 |         s = StringIO()
195 |         writer = csv.writer(s)
196 |         for row in orig_obj:
197 |             writer.writerow(row)
198 |         return orig_obj, s.getvalue()
199 | 
200 |     @staticmethod
201 |     def make_random_xml() -> xml.XMLElementObj:
202 |         ret = xml.XMLElementObj('', {})
203 |         elem_stack = [ret]
204 |         while elem_stack:
205 |             elem = elem_stack.pop()
206 |             elem.tag = ''.join(random.choices(LETTERS, k=random.randint(1, 20)))
207 |             elem.attrib = {
208 |                ''.join(random.choices(LETTERS, k=random.randint(1, 10))): TestFormatting.make_random_str()
209 |                for _ in range(int(random.betavariate(0.75, 5) * 10))
210 |             }
211 |             if random.random() <= 0.5:
212 |                elem.text = TestFormatting.make_random_str()
213 |             elem.children = [xml.XMLElementObj('', {}) for _ in range(int(random.betavariate(0.75, 5) * 10))]
214 |             elem_stack.extend(elem.children)
215 |         return ret
216 | 
217 |     # Do not test equality for XML because the XMLFormatter auto-indents and thereby adds extra spaces to element text
218 |     @filetype_test(test_equality=False, iterations=250)
219 |     def test_xml_formatting(self):
220 |         orig_obj = self.make_random_xml()
221 |         return orig_obj, str(orig_obj)
222 | 
223 |     def test_html_formatting(self):
224 |         # For now, HTML support is implemented through XML, so we don't need a separate test.
225 |         # However, test_formatter_coverage will complain unless this function is here!
226 |         pass
227 | 
228 |     def test_json5_formatting(self):
229 |         # For now, JSON5 support is implemented using the regular JSON formatter, so we don't need a separate test.
230 |         # However, test_formatter_coverage will complain unless this function is here!
231 |         pass
232 | 
233 |     def test_pickle_formatting(self):
234 |         # test_formatter_coverage will complain unless this function is here!
235 |         # TODO: Implement a Pickle formatting test
236 |         pass
237 | 
238 |     @filetype_test
239 |     def test_yaml_formatting(self):
240 |         orig_obj = TestFormatting.make_random_obj(
241 |             allow_empty_containers=False,
242 |             # The YAML formatter doesn't properly handle certain special characters
243 |             # TODO: Relax the excluded bytes in the following argument once the formatter properly handles special chars
244 |             exclude_bytes=frozenset('\t \\\'"\r:[]{}&\n()`|+%<>#*^%$@!~_+-=.,;\n?/'),
245 |             # The YAML formatter doesn't properly handle nested lists yet
246 |             # TODO: Remove the next argument once the formatter properly formats nested lists
247 |             alternate_containers=True,
248 |             # The YAML formatter also doesn't properly handle empty strings that are dict keys:
249 |             # TODO: Remove the next argument once the formatter properly formats empty strings as dict keys
250 |             allow_empty_strings=False
251 |         )
252 | 
253 |         s = StringIO()
254 |         yaml.dump(orig_obj, s, Dumper=graphtage.yaml.Dumper)
255 |         return orig_obj, s.getvalue()
256 | 
257 |     @filetype_test(test_equality=False)
258 |     def test_plist_formatting(self):
259 |         orig_obj = TestFormatting.make_random_obj(force_string_keys=True, exclude_bytes=frozenset('<>/\n&?|@{}[]'))
260 |         return orig_obj, plistlib.dumps(orig_obj)
261 | 


--------------------------------------------------------------------------------
/test/test_graphtage.py:
--------------------------------------------------------------------------------
  1 | from io import StringIO
  2 | from unittest import TestCase
  3 | 
  4 | import graphtage
  5 | import graphtage.json
  6 | import graphtage.multiset
  7 | import graphtage.tree
  8 | 
  9 | from graphtage.printer import Printer
 10 | 
 11 | 
 12 | class TestGraphtage(TestCase):
 13 |     @classmethod
 14 |     def setUpClass(cls):
 15 |         cls.small_from = graphtage.json.build_tree({
 16 |             "test": "foo",
 17 |             "baz": 1
 18 |         })
 19 |         cls.small_to = graphtage.json.build_tree({
 20 |             "test": "bar",
 21 |             "baz": 2
 22 |         })
 23 |         cls.list_from = graphtage.json.build_tree([0, 1, 2, 3, 4, 5])
 24 |         cls.list_to = graphtage.json.build_tree([1, 2, 3, 4, 5])
 25 | 
 26 |     def test_string_diff_printing(self):
 27 |         s1 = graphtage.StringNode("abcdef")
 28 |         s2 = graphtage.StringNode("azced")
 29 |         diff = s1.diff(s2)
 30 |         out_stream = StringIO()
 31 |         p = Printer(ansi_color=True, out_stream=out_stream)
 32 |         diff.print(p)
 33 |         self.assertEqual(diff.edited_cost(), 5)
 34 |         self.assertEqual('\x1b[32m"\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32ma\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1mz̟\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1mb̶\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32mc\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1md̶\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32me\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1md̟\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1mf̶\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m"\x1b[39m', out_stream.getvalue())
 35 | 
 36 |     def test_string_diff_remove_insert_reordering(self):
 37 |         s1 = graphtage.StringNode('abcdefg')
 38 |         s2 = graphtage.StringNode('abhijfg')
 39 |         diff = s1.diff(s2)
 40 |         out_stream = StringIO()
 41 |         p = Printer(ansi_color=True, out_stream=out_stream)
 42 |         diff.print(p)
 43 |         self.assertEqual('\x1b[32m"\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32ma\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32mb\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1mh̟\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1mi̟\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1mj̟\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1mc̶\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1md̶\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1me̶\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32mf\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32mg\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m"\x1b[39m', out_stream.getvalue())
 44 | 
 45 |     def test_small_diff(self):
 46 |         diff = self.small_from.diff(self.small_to)
 47 |         self.assertIsInstance(diff, graphtage.DictNode)
 48 |         self.assertIsInstance(diff, graphtage.tree.EditedTreeNode)
 49 |         self.assertEqual(1, len(diff.edit_list))
 50 |         self.assertIsInstance(diff.edit_list[0], graphtage.multiset.MultiSetEdit)
 51 |         has_test_match = False
 52 |         has_baz_match = False
 53 |         for edit in diff.edit_list[0].edits():
 54 |             if edit.bounds().upper_bound > 0:
 55 |                 self.assertIsInstance(edit, graphtage.KeyValuePairEdit)
 56 |                 key_edit = edit.key_edit
 57 |                 value_edit = edit.value_edit
 58 |                 if isinstance(value_edit.from_node, graphtage.StringNode):
 59 |                     self.assertIsInstance(key_edit.to_node, graphtage.StringNode)
 60 |                     self.assertEqual(key_edit.from_node.object, 'test')
 61 |                     self.assertEqual(value_edit.from_node.object, 'foo')
 62 |                     self.assertEqual(value_edit.to_node.object, 'bar')
 63 |                     self.assertEqual(edit.bounds().upper_bound, 6)
 64 |                     self.assertFalse(has_test_match)
 65 |                     has_test_match = True
 66 |                 elif isinstance(value_edit.from_node, graphtage.IntegerNode):
 67 |                     self.assertIsInstance(value_edit.to_node, graphtage.IntegerNode)
 68 |                     self.assertEqual(value_edit.from_node.object, 1)
 69 |                     self.assertEqual(value_edit.to_node.object, 2)
 70 |                     self.assertEqual(value_edit.bounds().upper_bound, 1)
 71 |                     self.assertFalse(has_baz_match)
 72 |                     has_baz_match = True
 73 |                 else:
 74 |                     self.fail()
 75 |         self.assertTrue(has_test_match)
 76 |         self.assertTrue(has_baz_match)
 77 | 
 78 |     def test_list_diff(self):
 79 |         diff = self.list_from.diff(self.list_to)
 80 |         self.assertIsInstance(diff, graphtage.ListNode)
 81 |         self.assertIsInstance(diff, graphtage.tree.EditedTreeNode)
 82 |         self.assertEqual(1, len(diff.edit_list))
 83 |         self.assertIsInstance(diff.edit_list[0], graphtage.EditDistance)
 84 |         for edit in diff.edit_list[0].edits():
 85 |             if edit.bounds().upper_bound > 0:
 86 |                 self.assertIsInstance(edit, graphtage.Remove)
 87 |                 self.assertIsInstance(edit.from_node, graphtage.IntegerNode)
 88 |                 self.assertEqual(edit.from_node.object, 0)
 89 |                 self.assertIsInstance(edit.to_node, graphtage.ListNode)
 90 |                 self.assertEqual(edit.to_node, self.list_from)
 91 |             else:
 92 |                 self.assertIsInstance(edit, graphtage.Match)
 93 | 
 94 |     def test_single_element_list(self):
 95 |         diff = graphtage.json.build_tree([1]).diff(graphtage.json.build_tree([2]))
 96 |         self.assertIsInstance(diff, graphtage.ListNode)
 97 |         self.assertIsInstance(diff, graphtage.tree.EditedTreeNode)
 98 |         self.assertEqual(1, len(diff.edit_list))
 99 |         self.assertIsInstance(diff.edit_list[0], graphtage.FixedLengthSequenceEdit)
100 | 
101 |     def test_empty_list(self):
102 |         diff = graphtage.ListNode(()).diff(graphtage.ListNode(()))
103 |         self.assertEqual(1, len(diff.edit_list))
104 |         self.assertIsInstance(diff.edit_list[0], graphtage.Match)
105 |         self.assertEqual(0, diff.edit_list[0].bounds().upper_bound)
106 | 
107 |     def test_null_json(self):
108 |         diff = graphtage.json.build_tree([None]).diff(graphtage.json.build_tree([1]))
109 |         self.assertIsInstance(diff, graphtage.ListNode)
110 |         self.assertIsInstance(diff, graphtage.tree.EditedTreeNode)
111 |         self.assertEqual(1, len(diff.edit_list))
112 |         self.assertIsInstance(diff.edit_list[0], graphtage.FixedLengthSequenceEdit)
113 | 


--------------------------------------------------------------------------------
/test/test_levenshtein.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from typing import List
 3 | from unittest import TestCase
 4 | 
 5 | from tqdm import trange
 6 | 
 7 | from graphtage.edits import Edit, Insert, Match, Remove
 8 | from graphtage import EditDistance, string_edit_distance
 9 | 
10 | 
11 | LETTERS: str = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
12 | 
13 | 
14 | class TestEditDistance(TestCase):
15 |     def test_string_edit_distance_reconstruction(self):
16 |         for _ in trange(200):
17 |             str1_len = random.randint(10, 30)
18 |             str2_len = random.randint(10, 30)
19 |             str_from = ''.join(random.choices(LETTERS, k=str1_len))
20 |             str_to = ''.join(random.choices(LETTERS, k=str2_len))
21 |             distance: EditDistance = string_edit_distance(str_from, str_to)
22 |             edits: List[Edit] = list(distance.edits())
23 |             reconstructed_from = ''
24 |             reconstructed_to = ''
25 |             for edit in edits:
26 |                 if isinstance(edit, Match):
27 |                     reconstructed_from += edit.from_node.object
28 |                     reconstructed_to += edit.to_node.object
29 |                 elif isinstance(edit, Remove):
30 |                     reconstructed_from += edit.from_node.object
31 |                 elif isinstance(edit, Insert):
32 |                     reconstructed_to += edit.from_node.object
33 |                 else:
34 |                     self.fail()
35 |             self.assertEqual(str_from, reconstructed_from)
36 |             self.assertEqual(str_to, reconstructed_to)
37 | 
38 |     def test_string_edit_distance_optimality(self):
39 |         for _ in trange(200):
40 |             str_len = random.randint(10, 30)
41 |             str_from = ''.join(random.choices(LETTERS, k=str_len))
42 |             num_ground_truth_edits: int = 0
43 |             str_to = ''
44 |             for i in range(str_len):
45 |                 while random.random() < 0.2:
46 |                     # 20% chance of inserting a new character
47 |                     str_to += random.choice(LETTERS)
48 |                     num_ground_truth_edits += 1
49 |                 num_ground_truth_edits += 1
50 |                 if random.random() < 0.2:
51 |                     # 20% chance of removing the original character
52 |                     pass
53 |                 else:
54 |                     str_to += str_from[i]
55 |             distance: EditDistance = string_edit_distance(str_from, str_to)
56 |             edits: List[Edit] = list(distance.edits())
57 |             num_edits = len(edits)
58 |             if num_ground_truth_edits < num_edits:
59 |                 print()
60 |                 print('\n'.join([e.__class__.__name__ for e in edits]))
61 |                 print(str_from, str_to)
62 |             self.assertGreaterEqual(num_ground_truth_edits, num_edits)
63 | 
64 |     def test_empty_string_edit_distance(self):
65 |         with self.assertRaises(StopIteration):
66 |             next(string_edit_distance('', '').edits())
67 |         self.assertEqual(
68 |             3,
69 |             sum(1 for _ in string_edit_distance('foo', '').edits())
70 |         )
71 |         self.assertEqual(
72 |             3,
73 |             sum(1 for _ in string_edit_distance('', 'foo').edits())
74 |         )
75 | 


--------------------------------------------------------------------------------
/test/test_matching.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | import random
 3 | from unittest import TestCase
 4 | 
 5 | import numpy as np
 6 | from tqdm import tqdm, trange
 7 | 
 8 | from graphtage.matching import get_dtype, min_weight_bipartite_matching, WeightedBipartiteMatcher
 9 | 
10 | from .test_bounds import RandomDecreasingRange
11 | 
12 | 
13 | class TestWeightedBipartiteMatcher(TestCase):
14 |     def test_weighted_bipartite_matching(self):
15 |         for n in trange(1, 25, 3):
16 |             from_nodes = list(range(n))
17 |             to_nodes = list(range(n))
18 |             edges = [
19 |                 [RandomDecreasingRange() for _ in range(len(to_nodes))] for _ in range(len(from_nodes))
20 |             ]
21 |             for i in range(min(len(from_nodes), len(to_nodes))):
22 |                 edges[i][i] = RandomDecreasingRange(fixed_lb=0, fixed_ub=100000, final_value=0)
23 |             matcher = WeightedBipartiteMatcher(
24 |                 from_nodes=from_nodes,
25 |                 to_nodes=to_nodes,
26 |                 get_edge=lambda n1, n2: edges[n1][n2]
27 |             )
28 |             initial_bounds = matcher.bounds()
29 |             prev_diff = initial_bounds.upper_bound - initial_bounds.lower_bound
30 |             with tqdm(leave=False, total=prev_diff) as t:
31 |                 t.update(0)
32 |                 while matcher.tighten_bounds():
33 |                     new_bounds = matcher.bounds()
34 |                     new_diff = new_bounds.upper_bound - new_bounds.lower_bound
35 |                     self.assertLess(new_diff, prev_diff)
36 |                     t.update(prev_diff - new_diff)
37 |                     prev_diff = new_diff
38 |             self.assertTrue(matcher.bounds().definitive())
39 |             self.assertEqual(0, matcher.bounds().upper_bound)
40 | 
41 |     def test_min_weight_bipartite_matching(self):
42 |         for _ in trange(50):
43 |             num_from = random.randint(1, 500)
44 |             num_to = random.randint(1, 500)
45 |             from_nodes = [f'f{i}' for i in range(num_from)]
46 |             to_nodes = [f't{i}' for i in range(num_to)]
47 |             # Force an optimal, zero-value matching:
48 |             expected_matching = {
49 |                 i: (i, 0) for i in range(min(num_from, num_to))
50 |             }
51 |             edges = {
52 |                 (from_nodes[i], to_nodes[i]): 0 for i in range(min(num_from, num_to))
53 |             }
54 |             edge_probability = 0.9
55 |             edges.update({
56 |                 (i, j): random.randint(1, 2**16) for i, j in itertools.product(from_nodes, to_nodes)
57 |                 if (i, j) not in edges and random.random() < edge_probability
58 |             })
59 | 
60 |             def get_edge(f, t):
61 |                 if (f, t) in edges:
62 |                     return edges[(f, t)]
63 |                 else:
64 |                     return None
65 | 
66 |             matching = min_weight_bipartite_matching(from_nodes=from_nodes, to_nodes=to_nodes, get_edges=get_edge)
67 | 
68 |             self.assertEqual(expected_matching, matching)
69 | 
70 |     def test_get_dtype(self):
71 |         for min_range, max_range, expected in (
72 |             (0, 255, np.uint8),
73 |             (-1, 127, np.int8),
74 |             (-128, 255, np.int16),
75 |             (0, 2**64 - 1, np.uint64),
76 |             (0, 2**64, int)
77 |         ):
78 |             actual = get_dtype(min_range, max_range)
79 |             self.assertEqual(np.dtype(expected), actual)
80 | 


--------------------------------------------------------------------------------
/test/test_object_set.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from graphtage.object_set import ObjectSet
 4 | 
 5 | 
 6 | class UnhashableWithBrokenEquality:
 7 |     def __init__(self, value):
 8 |         self.value = value
 9 | 
10 |     def __eq__(self, other):
11 |         raise ValueError()
12 | 
13 | 
14 | class Unhashable(UnhashableWithBrokenEquality):
15 |     def __eq__(self, other):
16 |         return isinstance(other, Unhashable) and self.value == other.value
17 | 
18 | 
19 | class TestObjectSet(TestCase):
20 |     def test_unhashability(self):
21 |         self.assertRaises(TypeError, lambda: hash(Unhashable(10)))
22 | 
23 |     def test_object_set(self):
24 |         u = Unhashable(10)
25 |         u2 = Unhashable(11)
26 |         objs = ObjectSet((10, u, u2))
27 |         self.assertIn(10, objs)
28 |         self.assertIn(u, objs)
29 |         self.assertIn(u2, objs)
30 |         self.assertEqual(3, len(objs))
31 |         objs.remove(u)
32 |         self.assertIn(10, objs)
33 |         self.assertNotIn(u, objs)
34 |         self.assertIn(u2, objs)
35 |         self.assertEqual(2, len(objs))
36 | 
37 |     def test_broken_equality(self):
38 |         u = UnhashableWithBrokenEquality(10)
39 |         u2 = UnhashableWithBrokenEquality(10)
40 |         # this will default to uniqueness by identity
41 |         objs = ObjectSet((10, u, u2))
42 |         self.assertIn(10, objs)
43 |         self.assertIn(u, objs)
44 |         self.assertIn(u2, objs)
45 |         self.assertEqual(3, len(objs))
46 | 


--------------------------------------------------------------------------------
/test/test_pydiff.py:
--------------------------------------------------------------------------------
 1 | import dataclasses
 2 | from unittest import TestCase
 3 | 
 4 | import graphtage
 5 | from graphtage.pydiff import build_tree, print_diff, PyDiffFormatter
 6 | 
 7 | from .timing import run_with_time_limit
 8 | 
 9 | 
10 | class TestPyDiff(TestCase):
11 |     def test_build_tree(self):
12 |         self.assertIsInstance(build_tree([1, 2, 3, 4]), graphtage.ListNode)
13 |         self.assertIsInstance(build_tree({1: 2, 'a': 'b'}), graphtage.DictNode)
14 | 
15 |     def test_diff(self):
16 |         t1 = [1, 2, {3: "three"}, 4]
17 |         t2 = [1, 2, {3: 3}, "four"]
18 |         printer = graphtage.printer.Printer(ansi_color=True)
19 |         print_diff(t1, t2, printer=printer)
20 | 
21 |     def test_custom_class(self):
22 |         class Foo:
23 |             def __init__(self, bar, baz):
24 |                 self.bar = bar
25 |                 self.baz = baz
26 | 
27 |         printer = graphtage.printer.Printer(ansi_color=True)
28 |         print_diff(Foo("bar", "baz"), Foo("bar", "bak"), printer=printer)
29 | 
30 |     def test_nested_tuple_diff(self):
31 |         tree = build_tree({"a": (1, 2)})
32 |         self.assertIsInstance(tree, graphtage.DictNode)
33 |         children = tree.children()
34 |         self.assertEqual(1, len(children))
35 |         kvp = children[0]
36 |         self.assertIsInstance(kvp, graphtage.KeyValuePairNode)
37 |         self.assertIsInstance(kvp.key, graphtage.StringNode)
38 |         self.assertIsInstance(kvp.value, graphtage.ListNode)
39 | 
40 |     def test_infinite_loop(self):
41 |         """Reproduces https://github.com/trailofbits/graphtage/issues/82"""
42 | 
43 |         @dataclasses.dataclass
44 |         class Thing:
45 |             foo: str
46 | 
47 |         with run_with_time_limit(60):
48 |             _ = graphtage.pydiff.diff([Thing("ok")], [Thing("bad")])
49 | 


--------------------------------------------------------------------------------
/test/test_search.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from tqdm import trange
 4 | 
 5 | from graphtage.search import IterativeTighteningSearch
 6 | from .test_bounds import RandomDecreasingRange
 7 | 
 8 | 
 9 | class TestIterativeTighteningSearch(TestCase):
10 |     def test_iterative_tightening_search(self):
11 |         speedups = 0
12 |         tests = 0
13 |         try:
14 |             t = trange(100)
15 |             for _ in t:
16 |                 ranges = [RandomDecreasingRange() for _ in range(100)]
17 |                 best_range: RandomDecreasingRange = None
18 |                 for r in ranges:
19 |                     if best_range is None or r.final_value < best_range.final_value:
20 |                         best_range = r
21 |                 search = IterativeTighteningSearch(iter(ranges))
22 |                 while search.tighten_bounds():
23 |                     pass
24 |                 result = search.best_match
25 |                 tightenings = sum(r.tightenings for r in ranges)
26 |                 untightened = 0
27 |                 for r in ranges:
28 |                     t_before = r.tightenings
29 |                     while r.tighten_bounds():
30 |                         pass
31 |                     untightened += r.tightenings - t_before
32 |                 t.desc = f"{(untightened + tightenings) / tightenings:.01f}x Speedup"
33 |                 speedups += (untightened + tightenings) / tightenings
34 |                 tests += 1
35 |                 self.assertEqual(best_range.final_value, result.final_value)
36 |         finally:
37 |             print(f"Average speedup: {speedups / tests:.01f}x")
38 | 


--------------------------------------------------------------------------------
/test/test_timing.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from .timing import run_with_time_limit
 4 | 
 5 | 
 6 | def infinite_loop():
 7 |     while True:
 8 |         pass
 9 | 
10 | 
11 | def limited_infinite_loop():
12 |     with run_with_time_limit(seconds=1):
13 |         infinite_loop()
14 | 
15 | 
16 | class TestTiming(TestCase):
17 |     def test_time_limit(self):
18 |         self.assertRaises(TimeoutError, limited_infinite_loop)
19 | 
20 |     def test_non_infinite_loop(self):
21 |         with run_with_time_limit(seconds=60):
22 |             _ = 10
23 | 


--------------------------------------------------------------------------------
/test/test_utils.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from unittest import TestCase
 3 | 
 4 | from graphtage.utils import largest, smallest, SparseMatrix
 5 | 
 6 | 
 7 | class TestSparseMatrix(TestCase):
 8 |     def test_matrix_bounds(self):
 9 |         matrix: SparseMatrix[int] = SparseMatrix(num_rows=10, num_cols=10, default_value=None)
10 |         with self.assertRaises(IndexError):
11 |             _ = matrix[matrix.num_rows]
12 |         with self.assertRaises(IndexError):
13 |             _ = matrix[0][matrix.num_cols]
14 | 
15 |     def test_matrix_default_value(self):
16 |         matrix: SparseMatrix[int] = SparseMatrix(default_value=10)
17 |         self.assertEqual(matrix[0][0], 10)
18 |         matrix[0][0] = 11
19 |         self.assertEqual(matrix[0][0], 11)
20 | 
21 |     def test_matrix_getsizeof(self):
22 |         matrix: SparseMatrix[int] = SparseMatrix()
23 |         size_before = matrix.getsizeof()
24 |         dim = 1000
25 |         int_sizes = 0
26 |         for i in range(dim):
27 |             for j in range(dim):
28 |                 matrix[i][j] = i * dim + j
29 |                 int_sizes += sys.getsizeof(matrix[i][j])
30 |         size_after = matrix.getsizeof()
31 |         self.assertGreaterEqual(size_after - size_before, int_sizes)
32 | 
33 |     def test_matrix_shape(self):
34 |         matrix: SparseMatrix[int] = SparseMatrix()
35 |         self.assertEqual((0, 0), matrix.shape())
36 |         matrix[10][20] = 1
37 |         self.assertEqual((11, 21), matrix.shape())
38 |         matrix = SparseMatrix(num_rows=10, num_cols=10)
39 |         self.assertEqual((10, 10), matrix.shape())
40 | 
41 |     def test_smallest(self):
42 |         for i in smallest(range(1000), n=10):
43 |             self.assertGreater(10, i)
44 |         for i in smallest(*list(range(1000)), n=10):
45 |             self.assertGreater(10, i)
46 | 
47 |     def test_largest(self):
48 |         for i in largest(range(1000), n=10):
49 |             self.assertLess(1000 - 11, i)
50 |         for i in largest(*list(range(1000)), n=10):
51 |             self.assertLess(1000 - 11, i)
52 | 


--------------------------------------------------------------------------------
/test/test_xml.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | 
 4 | from graphtage.utils import Tempfile
 5 | from graphtage.xml import XML
 6 | 
 7 | 
 8 | class TestXML(unittest.TestCase):
 9 |     def test_infinite_loop(self):
10 |         """Reproduces https://github.com/trailofbits/graphtage/issues/32"""
11 |         xml = XML.default_instance
12 |         one_xml = b"""
13 | <root>
14 |   <parent>
15 |     <child1 attribute1="foo">child1</child1>
16 |     <child2>child2</child2>
17 |   </parent>
18 | </root>
19 | """
20 |         two_xml = b"""
21 | <root>
22 |   <parent>
23 |     <child1 attribute1="bar">child1</child1>
24 |     <child2>child2</child2>
25 |   </parent>
26 | </root>
27 | """
28 |         with Tempfile(one_xml) as one, Tempfile(two_xml) as two:
29 |             t1 = xml.build_tree(one)
30 |             t2 = xml.build_tree(two)
31 |             for edit in t1.get_all_edits(t2):
32 |                 print(edit)
33 | 


--------------------------------------------------------------------------------
/test/timing.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | import _thread
 3 | from contextlib import contextmanager
 4 | 
 5 | 
 6 | @contextmanager
 7 | def run_with_time_limit(seconds: int):
 8 |     timer = threading.Timer(seconds, _thread.interrupt_main)
 9 |     timer.start()
10 | 
11 |     try:
12 |         yield
13 |         return
14 |     except:
15 |         pass
16 |     finally:
17 |         timer.cancel()
18 |     raise TimeoutError(f"timeout after {seconds} seconds")
19 | 


--------------------------------------------------------------------------------