├── .github
├── dependabot.yml
└── workflows
│ ├── artifacts.yml
│ ├── check_version.yml
│ ├── pip-audit.yml
│ ├── publish_docs.yml
│ ├── pythonpackage.yml
│ └── pythonpublish.yml
├── .gitignore
├── CITATION.cff
├── CODEOWNERS
├── CODE_OF_CONDUCT.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── bindist
├── .gitignore
├── Makefile
└── graphtage_bin.py
├── docs
├── .gitignore
├── Makefile
├── _static
│ └── localtoc.js
├── _templates
│ ├── layout.html
│ └── searchbox.html
├── build_api.py
├── builders.rst
├── conf.py
├── example.png
├── extending.rst
├── filetypes.rst
├── howitworks.rst
├── index.rst
├── library.rst
└── printing.rst
├── graphtage
├── __init__.py
├── __main__.py
├── ast.py
├── bounds.py
├── builder.py
├── constraints.py
├── csv.py
├── dataclasses.py
├── debug.py
├── edits.py
├── expressions.py
├── fibonacci.py
├── formatter.py
├── graphtage.py
├── json.py
├── levenshtein.py
├── matching.py
├── multiset.py
├── object_set.py
├── pickle.py
├── plist.py
├── printer.py
├── progress.py
├── pydiff.py
├── search.py
├── sequences.py
├── tree.py
├── utils.py
├── version.py
├── xml.py
└── yaml.py
├── setup.py
└── test
├── __init__.py
├── test_bounds.py
├── test_builder.py
├── test_constraints.py
├── test_dataclasses.py
├── test_expressions.py
├── test_fibonacci.py
├── test_formatting.py
├── test_graphtage.py
├── test_levenshtein.py
├── test_matching.py
├── test_object_set.py
├── test_pydiff.py
├── test_search.py
├── test_timing.py
├── test_utils.py
├── test_xml.py
└── timing.py
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | updates:
4 | - package-ecosystem: github-actions
5 | directory: /
6 | schedule:
7 | interval: daily
8 |
--------------------------------------------------------------------------------
/.github/workflows/artifacts.yml:
--------------------------------------------------------------------------------
1 | name: Build binary artifacts
2 |
3 | on:
4 | push:
5 | tags:
6 | - "v*"
7 |
8 | jobs:
9 | binaries:
10 |
11 | strategy:
12 | matrix:
13 | os: [ ubuntu-latest, macos-latest ] # windows-latest,
14 |
15 | runs-on: ${{ matrix.os }}
16 |
17 | permissions:
18 | # NOTE: Needed to save artifacts the repository.
19 | contents: write
20 |
21 | steps:
22 | - uses: actions/checkout@v4
23 | with:
24 | submodules: recursive
25 | - name: Set up Python
26 | uses: actions/setup-python@v5
27 | with:
28 | python-version: '3.x'
29 | - name: Install dependencies
30 | run: |
31 | python -m pip install --upgrade pip
32 | pip install setuptools pyinstaller
33 | pip install .
34 | - name: Build the binary
35 | run: |
36 | make -C bindist
37 | cd bindist && echo "DIST_FILE=`make dist-name | tr -d '\n'`" >> $GITHUB_ENV
38 | - name: Release binary artifacts
39 | uses: softprops/action-gh-release@v0.1.15
40 | with:
41 | files: bindist/${{ env.DIST_FILE }}
42 |
--------------------------------------------------------------------------------
/.github/workflows/check_version.yml:
--------------------------------------------------------------------------------
1 | name: Check Release Version
2 |
3 | on:
4 | release:
5 | types: [created, edited, published]
6 |
7 | jobs:
8 | versioncheck:
9 | runs-on: ubuntu-latest
10 |
11 | steps:
12 | - uses: actions/checkout@v4
13 | - name: Set up Python
14 | uses: actions/setup-python@v5
15 | with:
16 | python-version: '3.x'
17 | - name: Install dependencies
18 | run: |
19 | python -m pip install --upgrade pip
20 | pip install setuptools
21 | pip install .
22 | - name: Ensure graphtage.version.DEV_BUILD == False
23 | run: graphtage -dumpversion | grep -qv git
24 |
--------------------------------------------------------------------------------
/.github/workflows/pip-audit.yml:
--------------------------------------------------------------------------------
1 | # IMPORTANT: Read and understand this template fully before applying it.
2 |
3 | name: Scan dependencies for vulnerabilities with pip-audit
4 |
5 | on:
6 | push:
7 | branches: [ "master" ]
8 | pull_request:
9 | branches: [ "master" ]
10 | schedule:
11 | - cron: "0 12 * * *"
12 |
13 | jobs:
14 | pip-audit:
15 | runs-on: ubuntu-latest
16 |
17 | steps:
18 | - name: Checkout repository
19 | uses: actions/checkout@v4
20 |
21 | - name: Install Python
22 | uses: actions/setup-python@v5
23 | with:
24 | # IMPORTANT: You may need a more specific version here.
25 | python-version: "3.x"
26 |
27 | - name: Install project
28 | run: |
29 | python -m venv /tmp/pip-audit-env
30 | source /tmp/pip-audit-env/bin/activate
31 |
32 | python -m pip install --upgrade pip setuptools wheel
33 | python -m pip install .
34 |
35 |
36 | - name: Run pip-audit
37 | uses: pypa/gh-action-pip-audit@v1.0.8
38 | with:
39 | virtual-environment: /tmp/pip-audit-env
40 |
41 |
--------------------------------------------------------------------------------
/.github/workflows/publish_docs.yml:
--------------------------------------------------------------------------------
1 | name: Publish documentation
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | tags:
8 | - v*
9 |
10 | jobs:
11 | deploydocs:
12 | runs-on: ubuntu-latest
13 | permissions:
14 | # NOTE: Needed to push to the repository.
15 | contents: write
16 | steps:
17 | - uses: actions/checkout@v4
18 | with:
19 | path: graphtage
20 | - name: Get the version
21 | id: get_version
22 | run: echo "::set-env name=VERSION::${GITHUB_REF#refs/*/}"
23 | env:
24 | # The use of ::set-env here is safe!
25 | ACTIONS_ALLOW_UNSECURE_COMMANDS: 'true'
26 | - name: Set up Python 3.8
27 | uses: actions/setup-python@v5
28 | with:
29 | python-version: 3.8
30 | - name: Install dependencies
31 | run: |
32 | cd graphtage
33 | python -m pip install --upgrade pip
34 | pip install setuptools
35 | pip install .[dev]
36 | - name: Build documentation
37 | run: |
38 | cd graphtage/docs
39 | make html
40 | - name: Checkout gh-pages branch
41 | uses: actions/checkout@v4
42 | with:
43 | ref: gh-pages
44 | path: gh-pages
45 | fetch-depth: 0
46 | - name: Commit documentation changes
47 | run: |
48 | cd gh-pages
49 | git pull
50 | rm -rf ${VERSION}
51 | mkdir ${VERSION}
52 | cp -r ../graphtage/docs/_build/html/* ${VERSION}/
53 | cd ${VERSION}
54 | git config --local user.email "action@github.com"
55 | git config --local user.name "GitHub Action"
56 | git add .
57 | if [ "$GITHUB_REF" == "refs/heads/master" ]; then
58 | cd ..
59 | # This is not tag, so it is the latest:
60 | rm -f latest
61 | ln -s ${VERSION} latest
62 | git add latest
63 | fi
64 | git commit -m "Update documentation for ${GITHUB_REF}" -a || true
65 | # The above command will fail if no changes were present, so we ignore
66 | # the return code.
67 | - name: Push changes
68 | uses: ad-m/github-push-action@master
69 | with:
70 | branch: gh-pages
71 | directory: gh-pages
72 | github_token: ${{ secrets.GITHUB_TOKEN }}
73 |
--------------------------------------------------------------------------------
/.github/workflows/pythonpackage.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: Python package
5 |
6 | on:
7 | push:
8 | branches: [ master ]
9 | pull_request:
10 | branches: [ master ]
11 |
12 | jobs:
13 |
14 | build:
15 |
16 | runs-on: ubuntu-latest
17 | strategy:
18 | matrix:
19 | python-version: [3.8, 3.9, "3.10", "3.11"]
20 |
21 | steps:
22 | - uses: actions/checkout@v4
23 | - name: Set up Python ${{ matrix.python-version }}
24 | uses: actions/setup-python@v5
25 | with:
26 | python-version: ${{ matrix.python-version }}
27 | - name: Install dependencies
28 | run: |
29 | python -m pip install --upgrade pip
30 | pip install setuptools
31 | pip install .[dev]
32 | - name: Lint with flake8
33 | run: |
34 | pip install flake8
35 | # stop the build if there are Python syntax errors or undefined names
36 | flake8 graphtage test --count --select=E9,F63,F7,F82 --show-source --statistics
37 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
38 | flake8 graphtage test --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
39 | - name: Test building documentation
40 | run: |
41 | cd docs
42 | make html
43 | - name: Test with pytest
44 | run: |
45 | pip install pytest
46 | pytest
47 |
--------------------------------------------------------------------------------
/.github/workflows/pythonpublish.yml:
--------------------------------------------------------------------------------
1 | # This workflows will upload a Python Package using Twine when a release is created
2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3 |
4 | name: Upload Python Package
5 |
6 | on:
7 | release:
8 | types: [published]
9 |
10 | jobs:
11 |
12 | deploy:
13 |
14 | runs-on: ubuntu-latest
15 |
16 | steps:
17 | - uses: actions/checkout@v4
18 | - name: Set up Python
19 | uses: actions/setup-python@v5
20 | with:
21 | python-version: '3.x'
22 | - name: Install dependencies
23 | run: |
24 | python -m pip install --upgrade pip
25 | pip install setuptools wheel twine
26 | - name: Build and publish
27 | env:
28 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
29 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
30 | run: |
31 | python setup.py sdist bdist_wheel
32 | twine upload dist/*
33 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .cache
3 | .python_history
4 | *.pyc
5 | build/
6 | dist/
7 | graphtage.egg-info
8 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | # This CITATION.cff file was generated with cffinit.
2 | # Visit https://bit.ly/cffinit to generate yours today!
3 |
4 | cff-version: 1.2.0
5 | title: Graphtage
6 | message: >-
7 | Graphtage is a command-line utility and underlying library
8 | for semantically comparing and merging tree-like
9 | structures, such as JSON, XML, HTML, YAML, plist, and CSS
10 | files.
11 | type: software
12 | authors:
13 | - given-names: Evan
14 | family-names: Sultanik
15 | email: evan.sultanik@trailofbits.com
16 | affiliation: Trail of Bits
17 | orcid: 'https://orcid.org/0000-0002-6246-1422'
18 | repository-code: 'https://github.com/trailofbits/graphtage'
19 | url: 'https://trailofbits.github.io/graphtage/'
20 | abstract: >-
21 | Graphtage is a command-line utility and underlying library
22 | for semantically comparing and merging tree-like
23 | structures, such as JSON, XML, HTML, YAML, plist, and CSS
24 | files. Its name is a portmanteau of “graph” and
25 | “graftage”—the latter being the horticultural practice of
26 | joining two trees together such that they grow as one.
27 | keywords:
28 | - diffing
29 | - graph isomorphism
30 | - edit distance
31 | license: LGPL-3.0
32 |
--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @ESultanik
2 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to making participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 |
55 | ## Enforcement
56 |
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at jean.bisutti@gmail.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 |
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 |
68 | ## Attribution
69 |
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 |
73 | [homepage]: https://www.contributor-covenant.org
74 |
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | recursive-include test *.*
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Graphtage
2 |
3 | [](https://badge.fury.io/py/graphtage)
4 | [](https://github.com/trailofbits/graphtage/actions)
5 | [](https://slack.empirehacking.nyc)
6 |
7 | Graphtage is a command-line utility and [underlying library](https://trailofbits.github.io/graphtage/latest/library.html)
8 | for semantically comparing and merging tree-like structures, such as JSON, XML, HTML, YAML, plist, and CSS files. Its name is a
9 | portmanteau of “graph” and “graftage”—the latter being the horticultural practice of joining two trees together such
10 | that they grow as one.
11 |
12 | ```console
13 | $ echo Original: && cat original.json && echo Modified: && cat modified.json
14 | ```
15 | ```json
16 | Original:
17 | {
18 | "foo": [1, 2, 3, 4],
19 | "bar": "testing"
20 | }
21 | Modified:
22 | {
23 | "foo": [2, 3, 4, 5],
24 | "zab": "testing",
25 | "woo": ["foobar"]
26 | }
27 | ```
28 | ```console
29 | $ graphtage original.json modified.json
30 | ```
31 | ```json
32 | {
33 | "z̟b̶ab̟r̶": "testing",
34 | "foo": [
35 | 1̶,̶
36 | 2,
37 | 3,
38 | 4,̟
39 | 5̟
40 | ],̟
41 | "̟w̟o̟o̟"̟:̟ ̟[̟
42 | "̟f̟o̟o̟b̟a̟r̟"̟
43 | ]̟
44 | }
45 | ```
46 |
47 | ## Installation
48 |
49 | ```console
50 | $ pip3 install graphtage
51 | ```
52 |
53 | ## Command Line Usage
54 |
55 | ### Output Formatting
56 | Graphtage performs an analysis on an intermediate representation of the trees that is divorced from the filetypes of the
57 | input files. This means, for example, that you can diff a JSON file against a YAML file. Also, the output format can be
58 | different from the input format(s). By default, Graphtage will format the output diff in the same file format as the
59 | first input file. But one could, for example, diff two JSON files and format the output in YAML. There are several
60 | command-line arguments to specify these transformations, such as `--format`; please check the `--help` output for more
61 | information.
62 |
63 | By default, Graphtage pretty-prints its output with as many line breaks and indents as possible.
64 | ```json
65 | {
66 | "foo": [
67 | 1,
68 | 2,
69 | 3
70 | ],
71 | "bar": "baz"
72 | }
73 | ```
74 | Use the `--join-lists` or `-jl` option to suppress linebreaks after list items:
75 | ```json
76 | {
77 | "foo": [1, 2, 3],
78 | "bar": "baz"
79 | }
80 | ```
81 | Likewise, use the `--join-dict-items` or `-jd` option to suppress linebreaks after key/value pairs in a dict:
82 | ```json
83 | {"foo": [
84 | 1,
85 | 2,
86 | 3
87 | ], "bar": "baz"}
88 | ```
89 | Use `--condensed` or `-j` to apply both of these options:
90 | ```json
91 | {"foo": [1, 2, 3], "bar": "baz"}
92 | ```
93 |
94 | The `--only-edits` or `-e` option will print out a list of edits rather than applying them to the input file in place.
95 |
96 | The `--edit-digest` or `-d` option is like `--only-edits` but prints a more concise context for each edit that is more
97 | human-readable.
98 |
99 | ### Matching Options
100 | By default, Graphtage tries to match all possible pairs of elements in a dictionary.
101 |
102 | Matching two dictionaries with each other is hard. Although computationally tractable, this can sometimes be onerous for
103 | input files with huge dictionaries. Graphtage has three different strategies for matching dictionaries:
104 | 1. `--dict-strategy match` (the most computationally expensive) tries to match all pairs of keys and values between the
105 | two dictionaries, resulting in a match of minimum edit distance;
106 | 2. `--dict-strategy none` (the least computationally expensive) will not attempt to match any key/value pairs unless
107 | they have the exact same key; and
108 | 3. `--dict-strategy auto` (the default) will automatically match the values of any key-value pairs that have identical
109 | keys and then use the `match` strategy for the remainder of key/value pairs.
110 |
111 | See [Pull Request #51](https://github.com/trailofbits/graphtage/pull/51) for some examples of how these strategies
112 | affect output.
113 |
114 | The `--no-list-edits` or `-l` option will not consider interstitial insertions and removals when comparing two lists.
115 | The `--no-list-edits-when-same-length` or `-ll` option is a less drastic version of `-l` that will behave normally for
116 | lists that are of different lengths but behave like `-l` for lists that are of the same length.
117 |
118 | ### ANSI Color
119 | By default, Graphtage will only use ANSI color in its output if it is run from a TTY. If, for example, you would like
120 | to have Graphtage emit colorized output from a script or pipe, use the `--color` or `-c` argument. To disable color even
121 | when running on a TTY, use `--no-color`.
122 |
123 | ### HTML Output
124 | Graphtage can optionally emit the diff in HTML with the `--html` option.
125 | ```console
126 | $ graphtage --html original.json modified.json > diff.html
127 | ```
128 |
129 | ### Status and Logging
130 | By default, Graphtage prints status messages and a progress bar to STDERR. To suppress this, use the `--no-status`
131 | option. To additionally suppress all but critical log messages, use `--quiet`. Fine-grained control of log messages is
132 | via the `--log-level` option.
133 |
134 | ## Why does Graphtage exist?
135 |
136 | Diffing tree-like structures with unordered elements is tough. Say you want to compare two JSON files.
137 | There are [limited tools available](https://github.com/zgrossbart/jdd), which are effectively equivalent to
138 | canonicalizing the JSON (_e.g._, sorting dictionary elements by key) and performing a standard diff. This is not always
139 | sufficient. For example, if a key in a dictionary is changed but its value is not, a traditional diff
140 | will conclude that the entire key/value pair was replaced by the new one, even though the only change was the key
141 | itself. See [our documentation](https://trailofbits.github.io/graphtage/latest/howitworks.html) for more information.
142 |
143 | ## Using Graphtage as a Library
144 |
145 | Graphtage has a complete API for programmatically operating its diffing capabilities.
146 | When using Graphtage as a library, it is also capable of diffing in-memory Python objects.
147 | This can be useful for debugging Python code, for example, to determine a differential between two objects.
148 | See [our documentation](https://trailofbits.github.io/graphtage/latest/library.html) for more information.
149 |
150 | ## Extending Graphtage
151 |
152 | Graphtage is designed to be extensible: New filetypes can easily be defined, as well as new node types, edit types,
153 | formatters, and printers. See [our documentation](https://trailofbits.github.io/graphtage/latest/extending.html) for
154 | more information.
155 |
156 | Complete API documentation is available [here](https://trailofbits.github.io/graphtage/latest/package.html).
157 |
158 | ## License and Acknowledgements
159 |
160 | This research was developed by [Trail of Bits](https://www.trailofbits.com/) with partial funding from the Defense
161 | Advanced Research Projects Agency (DARPA) under the SafeDocs program as a subcontractor to [Galois](https://galois.com).
162 | It is licensed under the [GNU Lesser General Public License v3.0](LICENSE).
163 | [Contact us](mailto:opensource@trailofbits.com) if you're looking for an exception to the terms.
164 | © 2020–2023, Trail of Bits.
165 |
--------------------------------------------------------------------------------
/bindist/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | dist
3 | graphtage.spec
4 | graphtage-*.tgz
--------------------------------------------------------------------------------
/bindist/Makefile:
--------------------------------------------------------------------------------
1 | GRAPHTAGE_VERSION=$(shell graphtage --version 2>&1 | sed "s/Graphtage version //")
2 | DIST_VERSION=$(shell uname | tr '[:upper:]' '[:lower:]')-$(shell uname -m | tr '[:upper:]' '[:lower:]')
3 | DIST_NAME=graphtage-$(GRAPHTAGE_VERSION)-$(DIST_VERSION)
4 | DIST_FILE=$(DIST_NAME).zip
5 |
6 | .PHONY: $(DIST_FILE)
7 | $(DIST_FILE):
8 | pyinstaller -F -y --name graphtage graphtage_bin.py
9 | @rm -rf $(DIST_NAME)
10 | mkdir $(DIST_NAME)
11 | cp dist/graphtage $(DIST_NAME)/
12 | cp -p ../README.md $(DIST_NAME)/
13 | cp -p ../LICENSE $(DIST_NAME)/
14 | zip -r $(DIST_FILE) $(DIST_NAME)
15 | rm -rf $(DIST_NAME)
16 |
17 | .PHONY: dist-name
18 | dist-name:
19 | @echo $(DIST_FILE)
20 |
21 | .PHONY: clean
22 | clean:
23 | rm -rf graphtage.spec dist build $(DIST_FILE) $(DIST_NAME)
24 |
--------------------------------------------------------------------------------
/bindist/graphtage_bin.py:
--------------------------------------------------------------------------------
1 | from graphtage.__main__ import main
2 |
3 | if __name__ == "__main__":
4 | import sys
5 | sys.exit(main())
6 |
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 | graphtage*.rst
3 | package.rst
4 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile graphtage.rst package.rst
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
22 | .PHONY: graphtage.rst
23 | graphtage.rst package.rst:
24 | # sphinx-apidoc wasn't configurable enough, so I wrote my own version:
25 | python3 build_api.py
26 | # graphtage.py, edits.py, and tree.py are all merged into the main graphtage module by __init__.py,
27 | # so we should not generate separate submodules for them:
28 | #sphinx-apidoc -f -e -M -T -o . ../graphtage ../graphtage/graphtage.py ../graphtage/edits.py ../graphtage/tree.py
29 |
30 | .PHONY: clean
31 | clean:
32 | rm -rf _build graphtage*.rst package.rst
33 |
--------------------------------------------------------------------------------
/docs/_static/localtoc.js:
--------------------------------------------------------------------------------
1 | $( document ).ready(function (){
2 |
3 | var createList = function(selector){
4 |
5 | var ul = $('
');
6 | var selected = $(selector);
7 |
8 | if (selected.length === 0){
9 | return;
10 | }
11 |
12 | selected.clone().each(function (i,e){
13 |
14 | var p = $(e).children('.descclassname');
15 | var n = $(e).children('.descname');
16 | var l = $(e).children('.headerlink');
17 |
18 | var a = $('');
19 | a.attr('href',l.attr('href')).attr('title', 'Link to this definition');
20 |
21 | a.append(p).append(n);
22 |
23 | var entry = $('').append(a);
24 | ul.append(entry);
25 | });
26 | return ul;
27 | }
28 |
29 | if($('dl.class > dt').length || $('dl.function > dt').length || $('dl.data > dt').length) {
30 | /* collapse any open menus */
31 | var menu = $('.wy-menu ul:first');
32 | menu.find('.current').removeClass("current");
33 |
34 | var pagename = $("h1")[0].innerText;
35 |
36 | if(pagename === "graphtage package") {
37 | pagename = "graphtage module";
38 | }
39 |
40 | var header = $('' + pagename + '')
41 | var ul = $('');
42 | header.append(ul);
43 |
44 | menu.find('ul:first').prepend(header);
45 |
46 | var x = [];
47 | x.push(['Classes','dl.class > dt']);
48 | x.push(['Functions','dl.function > dt']);
49 | x.push(['Variables','dl.data > dt']);
50 |
51 | var first = true;
52 |
53 | x.forEach(function (e) {
54 | var l = createList(e[1]);
55 | if (l) {
56 | var li = $('' + e[0] + '')
57 | if(first) {
58 | li.addClass("current");
59 | first = false;
60 | }
61 | li.append(l);
62 | ul.append(li);
63 | }
64 | });
65 | }
66 |
67 | });
68 |
--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {%- extends "!layout.html" %}
2 |
3 | {% block footer %}
4 | {% if not READTHEDOCS %}
5 |
6 |
7 | Graphtage Documentation
8 | {{ version }}
9 |
10 |
11 |
43 |
44 | {% endif %}
45 | {% endblock %}
--------------------------------------------------------------------------------
/docs/_templates/searchbox.html:
--------------------------------------------------------------------------------
1 | {%- if builder != 'singlehtml' %}
2 | {{ version }}
3 |
4 |
9 |
10 | {%- endif %}
11 |
--------------------------------------------------------------------------------
/docs/build_api.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import os
3 | import sys
4 | from pathlib import Path
5 |
6 |
7 | DOCS_PATH = os.path.dirname(os.path.realpath(__file__))
8 | ROOT_PATH = Path(DOCS_PATH).parents[0]
9 |
10 | sys.path = [ROOT_PATH] + sys.path
11 |
12 | import graphtage
13 |
14 | MODULES = []
15 |
16 |
17 | def process_module(module):
18 | shortname = module.__name__.split('.')[-1]
19 | with open(os.path.join(DOCS_PATH, f"{module.__name__}.rst"), 'w') as f:
20 | f.write(f"{module.__name__}\n")
21 | f.write(f"{'=' * len(module.__name__)}\n")
22 | f.write(f"""
23 | .. automodule:: {module.__name__}
24 | """)
25 | classes = []
26 | for name, c in inspect.getmembers(module, inspect.isclass):
27 | if hasattr(c, '__module__') and c.__module__ == module.__name__ and not name.startswith('_'):
28 | classes.append(c)
29 | if classes:
30 | f.write(f"""
31 | {shortname} classes
32 | {'-' * len(shortname)}--------
33 | """)
34 | for cls in sorted(classes, key=lambda c: c.__name__):
35 | f.write(f"""
36 | {cls.__name__}
37 | {'*' * len(cls.__name__)}
38 |
39 | .. autoclass:: {cls.__name__}
40 | :members:
41 | :undoc-members:
42 | :inherited-members:
43 | :show-inheritance:
44 | """)
45 |
46 | functions = []
47 | for name, func in inspect.getmembers(module, inspect.isfunction):
48 | if hasattr(func, '__module__') and func.__module__ == module.__name__ and not name.startswith('_'):
49 | functions.append(func)
50 | if functions:
51 | f.write(f"""
52 | {shortname} functions
53 | {'-' * len(shortname)}----------
54 | """)
55 | for func in sorted(functions, key=lambda o: o.__name__):
56 | f.write(f"""
57 | {func.__name__}
58 | {'*' * len(func.__name__)}
59 |
60 | .. autofunction:: {func.__name__}
61 | """)
62 |
63 | # attrs = []
64 | # for name in dir(module):
65 | # if name.startswith('_'):
66 | # continue
67 | # attr = getattr(module, name)
68 | # if not inspect.isfunction(attr) and not inspect.isclass(attr) and not inspect.ismodule(attr) and (
69 | # not hasattr(attr, '__module__') or attr.__module__ == module.__name__
70 | # ) and inspect.getattr_static(attr, '__doc__') is not None:
71 | # attrs.append(name)
72 | # if attrs:
73 | # f.write(f"""
74 | # {shortname} attributes
75 | # {'-' * len(shortname)}-----------
76 | # """)
77 | # for name in sorted(attrs):
78 | # f.write(f"""
79 | # {name}
80 | # {'*' * len(name)}
81 | #
82 | # .. autoattribute:: {name}
83 | # """)
84 |
85 |
86 |
87 | for name, obj in inspect.getmembers(graphtage, inspect.ismodule):
88 | if obj.__name__.startswith('graphtage') and name not in ('graphtage', 'tree', 'edits'):
89 | MODULES.append(obj)
90 |
91 | MODULES = [graphtage] + sorted(MODULES, key=lambda m: m.__name__)
92 |
93 | for m in MODULES:
94 | process_module(m)
95 |
96 | with open(os.path.join(DOCS_PATH, "package.rst"), 'w') as f:
97 | f.write("""Graphtage API
98 | -------------
99 |
100 | .. toctree::
101 | :maxdepth: 4
102 |
103 | """)
104 | f.write('\n'.join(f' {m.__name__}' for m in MODULES))
105 |
--------------------------------------------------------------------------------
/docs/builders.rst:
--------------------------------------------------------------------------------
1 | .. _Builders:
2 |
3 | Constructing Graphtage Trees
4 | ============================
5 |
6 | Graphtage operates on trees represented by the :class:`graphtage.TreeNode` base class.
7 | There are various predefined specializations of tree nodes, such as :class:`graphtage.IntegerNode` for integers, :class:`graphtage.ListNode` for lists, and :class:`graphtage.DictNode` for dictionaries. :class:`graphtage.TreeNode` has an optional :attr:`parent ` and a potentially empty set of :func:`children `.
8 |
9 | Graphtage provides a :class:`graphtage.builder.Builder` class for conveniently converting arbitrary objects into a tree of :class:`TreeNode ` objects. It uses Python magic to define the conversions.
10 |
11 | .. code-block:: python
12 |
13 | from graphtage import IntegerNode, TreeNode
14 | from graphtage.builder import Builder
15 |
16 | class CustomBuilder(Builder):
17 | @Builder.builder(int)
18 | def build_int(self, node: int, children: list[TreeNode]):
19 | return IntegerNode(node)
20 |
21 | >>> CustomBuilder().build_tree(10)
22 | IntegerNode(10)
23 |
24 | The :func:`@Builder.builder(int) ` decorator specifies that the function is able to build a Graphtage `TreeNode` object from inputs that are :func:`instanceof` the type `int`. If there are multiple builder functions that match a given object, the function associated with the most specialized type is chosen. For example:
25 |
26 | .. code-block:: python
27 |
28 | class Foo:
29 | pass
30 |
31 |
32 | class Bar(Foo):
33 | pass
34 |
35 |
36 | class CustomBuilder(Builder):
37 | @Builder.builder(Foo)
38 | def build_foo(self, node: Foo, children: list[TreeNode]):
39 | return StringNode("foo")
40 |
41 | @Build.builder(Bar)
42 | def build_bar(self, node: Bar, children: list[TreeNode]):
43 | return StringNode("bar")
44 |
45 | >>> CustomBuilder().build_tree(Foo())
46 | StringNode("foo")
47 | >>> CustomBuilder().build_tree(Bar())
48 | StringNode("bar")
49 |
50 | Expanding Children
51 | ------------------
52 |
53 | So far we have only given examples of the production of leaf nodes, like integers and strings.
54 | What if a node has children, like a list? We can handle this using the :func:`@Builder.expander ` decorator. Here is an example of how a list can be built:
55 |
56 | .. code-block:: python
57 |
58 | class CustomBuilder(Builder):
59 | ...
60 |
61 | @Builder.expander(list)
62 | def expand_list(self, node: list):
63 | """Returns an iterable over the node's children"""
64 | yield from node
65 |
66 | @Builder.builder(list)
67 | def build_list(self, node: list, children: list[TreeNode]):
68 | return ListNode(children)
69 |
70 | >>> CustomBuilder().build_tree([1, 2, 3, 4])
71 | ListNode([IntegerNode(1), IntegerNode(2), IntegerNode(3), IntegerNode(4)])
72 |
73 | If an expander is not defined for a type, it is assumed that the type is a leaf with no children.
74 |
75 | If the root node or one of its descendants is of a type that has no associated builder function, a :exc:`NotImplementedError` is raised.
76 |
77 | Graphtage has a subclassed builder :class:`graphtage.builder.BasicBuilder` that has builders and expanders for the Python basic types like :class:`int`, :class:`float`, :class:`str`, :class:`bytes`, :class:`list`, :class:`dict`, :class:`set`, and :class:`tuple`. You can extend :class:`graphtage.builder.BasicBuilder` to implement support for additional types.
78 |
79 | Custom Nodes
80 | ------------
81 |
82 | Graphtage provides abstract classes like :class:`graphtage.ContainerNode` and :class:`graphtage.SequenceNode` to aid in the implementation of custom node types. But the easiest way to define a custom node type is to extend off of :class:`graphtage.dataclasses.DataClass`.
83 |
84 |
85 | .. code-block:: python
86 |
87 | from graphtage import IntegerNode, ListNode, StringNode
88 | from graphtage.dataclasses import DataClass
89 |
90 | class CustomNode(DataClass):
91 | name: StringNode
92 | value: IntegerNode
93 | attributes: ListNode
94 |
95 | This will automatically build a node type that has three children: a string, an integer, and a list.
96 |
97 | >>> CustomNode(name=StringNode("the name"), value=IntegerNode(1337), attributes=ListNode((IntegerNode(1), IntegerNode(2), IntegerNode(3))))
98 |
99 | Let's say you have another, non-graphtage class that corresponds to :class:`CustomNode`:
100 |
101 | .. code-block:: python
102 |
103 | class NonGraphtageClass:
104 | name: str
105 | value: int
106 | attributes: list[int]
107 |
108 | You can add support for building Graphtage nodes from this custom class as follows:
109 |
110 | .. code-block:: python
111 |
112 | class CustomBuilder(BasicBuilder):
113 | @Builder.expander(NonGraphtageClass)
114 | def expand_non_graphtage_class(node: NonGraphtageClass):
115 | yield node.name
116 | yield node.value
117 | yield node.attributes
118 |
119 | @Builder.builder(NonGraphtageClass)
120 | def build_non_graphtage_class(node: NonGraphtageClass, children: List[TreeNode]) -> CustomNode:
121 | return CustomNode(*children)
122 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 |
13 | import os
14 | from pathlib import Path
15 |
16 | VERSION_MODULE_PATH = os.path.join(Path(os.path.dirname(__file__)).parents[0], "graphtage", "version.py")
17 |
18 |
19 | def get_version_string():
20 | attrs = {}
21 | with open(VERSION_MODULE_PATH) as f:
22 | exec(f.read(), attrs)
23 | vstring = attrs['VERSION_STRING']
24 | if 'git' in vstring:
25 | return vstring
26 | else:
27 | return f"v{vstring}"
28 |
29 |
30 | # -- Project information -----------------------------------------------------
31 |
32 | project = 'Graphtage'
33 | copyright = '2020, Trail of Bits'
34 | author = 'Evan Sultanik'
35 |
36 | # The full version, including alpha/beta/rc tags
37 | release = get_version_string()
38 | version = release
39 | github_url = 'https://github.com/trailofbits/graphtage/'
40 | if 'git' not in version:
41 | github_url = f"{github_url}releases/tag/{ version }"
42 |
43 |
44 | # -- General configuration ---------------------------------------------------
45 |
46 | # Add any Sphinx extension module names here, as strings. They can be
47 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
48 | # ones.
49 | extensions = [
50 | 'sphinx.ext.autodoc',
51 | 'sphinx.ext.napoleon',
52 | 'sphinx.ext.intersphinx',
53 | 'sphinx.ext.todo',
54 | 'sphinx.ext.autosectionlabel',
55 | 'sphinx_rtd_theme',
56 | #'sphinxcontrib.fulltoc'
57 | ]
58 |
59 | # Add any paths that contain templates here, relative to this directory.
60 | templates_path = ['_templates']
61 |
62 | # List of patterns, relative to source directory, that match files and
63 | # directories to ignore when looking for source files.
64 | # This pattern also affects html_static_path and html_extra_path.
65 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
66 |
67 |
68 | # -- Options for HTML output -------------------------------------------------
69 |
70 | # The theme to use for HTML and HTML Help pages. See the documentation for
71 | # a list of builtin themes.
72 | #
73 | #html_theme = 'classic'
74 | html_theme = 'sphinx_rtd_theme'
75 |
76 | html_theme_options = {
77 | 'canonical_url': f'https://trailofbits.github.io/graphtage/latest/',
78 | 'logo_only': False,
79 | 'display_version': False, # This manually configured in our custom templates
80 | 'prev_next_buttons_location': 'bottom',
81 | 'style_external_links': True,
82 | #'vcs_pageview_mode': '',
83 | #'style_nav_header_background': 'white',
84 | # Toc options
85 | 'collapse_navigation': True,
86 | 'sticky_navigation': True,
87 | 'navigation_depth': 4,
88 | 'includehidden': True,
89 | 'titles_only': False
90 | }
91 |
92 | html_context = {
93 | 'github_url': github_url
94 | }
95 |
96 | # Add any paths that contain custom static files (such as style sheets) here,
97 | # relative to this directory. They are copied after the builtin static files,
98 | # so a file named "default.css" will overwrite the builtin "default.css".
99 | html_static_path = ['_static']
100 |
101 | #html_js_files = [
102 | # 'localtoc.js',
103 | #]
104 |
105 |
106 | def skip(app, what, name, obj, would_skip, options):
107 | if name == "__init__":
108 | return False
109 | return would_skip
110 |
111 |
112 | def docstring_callback(app, what, name, obj, options, lines: list):
113 | if what == 'class' or what == 'function':
114 | if lines and lines[0].strip():
115 | lines.insert(1, '')
116 | lines.insert(2, name)
117 | lines.insert(3, '*' * len(name))
118 | if len(lines) == 4:
119 | lines.append('')
120 |
121 |
122 | def setup(app):
123 | app.connect("autodoc-skip-member", skip)
124 | #app.connect('autodoc-process-docstring', docstring_callback)
125 |
126 |
127 | add_package_names = False
128 | # prefix each section label with the name of the document it is in, followed by a colon
129 | autosectionlabel_prefix_document = True
130 | intersphinx_mapping = {'python': ('https://docs.python.org/3', None)}
131 | napoleon_include_private_with_doc = True
132 | napoleon_include_special_with_doc = True
133 | todo_include_todos = True
134 |
135 | #autodoc_default_options = {
136 | # 'inherited-members': True
137 | #}
138 |
--------------------------------------------------------------------------------
/docs/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trailofbits/graphtage/23654acf488eb803a60ce27ac515ee0755feb1a7/docs/example.png
--------------------------------------------------------------------------------
/docs/extending.rst:
--------------------------------------------------------------------------------
1 | Extending Graphtage
2 | ===================
3 |
4 | Graphtage is designed to be extensible; new filetypes can easily be defined, as well as new node types, edit types,
5 | formatters, and printers. This section will give some examples on how to implement each.
6 |
7 | .. toctree::
8 | :maxdepth: 4
9 |
10 | builders
11 | filetypes
12 | printing
13 |
--------------------------------------------------------------------------------
/docs/filetypes.rst:
--------------------------------------------------------------------------------
1 | .. _Filetypes:
2 |
3 | Defining New Filetypes
4 | ======================
5 |
6 | Implementing support for a new Graphtage filetype entails extending the :class:`graphtage.Filetype` class. Subclassing :class:`graphtage.Filetype` automatically registers it with Graphtage.
7 |
8 | Filetype Matching
9 | -----------------
10 |
11 | Input files are matched to an associated :class:`graphtage.Filetype` using MIME types. Each :class:`graphtage.Filetype` registers one or more MIME types for which it will be responsible. Input file MIME types are classified using the :mod:`mimetypes` module. Sometimes a filetype does not have a standardized MIME type or is not properly classified by the :mod:`mimetypes` module. For example, Graphtage's :class:`graphtage.pickle.Pickle` filetype has neither. You can add support for such a filetype as follows:
12 |
13 | .. code-block:: python
14 |
15 | import mimetypes
16 |
17 | if '.pkl' not in mimetypes.types_map and '.pickle' not in mimetypes.types_map:
18 | mimetypes.add_type('application/x-python-pickle', '.pkl')
19 | mimetypes.suffix_map['.pickle'] = '.pkl'
20 |
21 | Implementing a New Filetype
22 | ---------------------------
23 |
24 | With the MIME type registered, here is a sketch of how one might define the Pickle filetype:
25 |
26 | .. code-block:: python
27 |
28 | from graphtage import BuildOptions, Filetype, Formatter, TreeNode
29 |
30 | class Pickle(Filetype):
31 | def __init__(self):
32 | super().__init__(
33 | "pickle", # a unique identifier
34 | "application/python-pickle", # the primary MIME type
35 | "application/x-python-pickle" # an optional secondary MIME type
36 | )
37 |
38 | def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode:
39 | # return the root node of the tree built from the given pickle file
40 |
41 | def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]:
42 | # the same as the build_tree() function,
43 | # but on error return a string containing the error message
44 | #
45 | # for example:
46 | try:
47 | return self.build_tree(path=path, options=options)
48 | except PickleDecodeError as e:
49 | return f"Error deserializing {os.path.basename(path)}: {e!s}"
50 |
51 | def get_default_formatter(self) -> GraphtageFormatter:
52 | # return the formatter associated with this file type
53 |
--------------------------------------------------------------------------------
/docs/howitworks.rst:
--------------------------------------------------------------------------------
1 | How Graphtage Works
2 | ===================
3 |
4 | In general, optimally mapping one graph to another
5 | cannot be executed in polynomial time [#]_, and is therefore not
6 | tractable for graphs of any useful size [*]_. This is true even for restricted classes of graphs like DAGs [#]_.
7 | However, trees and forests are a special case that *can* be mapped in polynomial time, with reasonable constraints on
8 | the types of edits possible. Graphtage exploits this.
9 |
10 | Why Mapping Trees is Complex
11 | ----------------------------
12 |
13 | Ordered nodes in the tree (*e.g.*, JSON lists) and, in particular, mappings (*e.g.*, JSON dicts) are challenging. Most
14 | extant diffing algorithms and utilities assume that the structures are ordered. Take this JSON as an example:
15 |
16 | .. list-table::
17 | :class: align-center
18 |
19 | * - Original JSON
20 | - Modified JSON
21 | * - .. code-block:: json
22 |
23 | {
24 | "foo": [1, 2, 3, 4],
25 | "bar": "testing"
26 | }
27 |
28 | - .. code-block:: json
29 |
30 | {
31 | "foo": [2, 3, 4, 5],
32 | "zab": "testing",
33 | "woo": ["foobar"]
34 | }
35 |
36 | Existing tools effectively canonicalize the JSON (*e.g.*, sort dictionary elements by key and format lists with one
37 | item per line), and then perform a traditional diff:
38 |
39 | .. code-block:: console
40 |
41 | $ cat original.json | jq -M --sort-keys > original.canonical.json
42 | $ cat modified.json | jq -M --sort-keys > modified.canonical.json
43 | $ diff -u original.canonical.json modified.canonical.json
44 |
45 | .. code-block:: diff
46 | :linenos:
47 |
48 | {
49 | - "bar": "testing",
50 | "foo": [
51 | - 1,
52 | 2,
53 | 3,
54 | - 4
55 | - ]
56 | + 4,
57 | + 5
58 | + ],
59 | + "woo": [
60 | + "foobar"
61 | + ],
62 | + "zab": "testing"
63 | }
64 |
65 | Not entirely useful, particularly if the input files are large. The problem is that changing dict keys breaks the diff:
66 | Since "bar" was changed to "zab", the canonical representation changes and they are considered separate edits (lines 2
67 | and 15 of the diff).
68 |
69 | Matching Ordered Sequences
70 | --------------------------
71 |
72 | Graphtage matches ordered sequences like lists using an "online" [#]_, "constructive" [#]_ implementation of the
73 | Levenshtein distance metric [#]_, similar to the Wagner–Fischer algorithm [#]_. The algorithm starts with an
74 | unbounded mapping and iteratively improves it until the bounds converge, at which point the optimal edit sequence is
75 | discovered. This is implemented in the :mod:`graphtage.levenshtein` module.
76 |
77 | Matching Unordered Collections
78 | ------------------------------
79 |
80 | Dicts are matched by solving the minimum weight matching problem [#]_ on the complete bipartite graph from key/value
81 | pairs in the source dict to key/value pairs in the destination dict. This is implemented in the
82 | :mod:`graphtage.matching` module.
83 |
84 | Footnotes
85 | ---------
86 |
87 | .. [#] https://en.wikipedia.org/wiki/Graph_isomorphism_problem
88 | .. [#] https://en.wikipedia.org/wiki/Directed_acyclic_graph
89 | .. [#] https://en.wikipedia.org/wiki/Online_algorithm
90 | .. [#] https://en.wikipedia.org/wiki/Constructive_proof
91 | .. [#] https://en.wikipedia.org/wiki/Levenshtein_distance
92 | .. [#] https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm
93 | .. [#] https://en.wikipedia.org/wiki/Assignment_problem
94 | .. [*] Unless |pvsnp|_.
95 | .. _pvsnp:
96 | https://en.wikipedia.org/wiki/P_versus_NP_problem
97 | .. |pvsnp| replace:: :math:`P = NP`
98 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | Graphtage Documentation
2 | =======================
3 |
4 | Graphtage is *both* a commandline utility *and* a general purpose library for semantically comparing and merging
5 | tree-like structures, such as JSON, XML, HTML, YAML, and CSV files. Its name is a portmanteau of “graph” and
6 | “graftage”—the latter being the practice of joining two trees together such that they grow as one.
7 |
8 | There are several reasons why you might be here…
9 |
10 | .. topic:: You want to learn how to use Graphtage as a command line utility.
11 |
12 | This documentation focuses on Graphtage’ use as a library, specifically how to extend it by implementing new file
13 | formats. For instructions on using Graphtage as a utility, see the documentation in its `GitHub page`_.
14 |
15 | .. topic:: You want to programmatically interact with Graphtage as a library.
16 |
17 | You should start by reading about :doc:`Using Graphtage Programmatically `.
18 |
19 | .. topic:: You want to modify or extend Graphtage.
20 |
21 | For example, you might want to implement support for a new file format or edit type. You should start by reading
22 | the :doc:`Extending Graphtage ` section.
23 |
24 | .. topic:: You are already familiar with Graphtage and just need an API reference.
25 |
26 | The API documentation is :doc:`here `.
27 |
28 | .. topic:: You are curious and want to learn more about how Graphtage works.
29 |
30 | Documentation on how Graphtage works is :doc:`here `.
31 |
32 | .. _GitHub page: https://github.com/trailofbits/graphtage
33 |
34 | .. toctree::
35 | :maxdepth: 4
36 | :caption: Contents:
37 |
38 | library
39 | extending
40 | howitworks
41 | package
42 |
43 | Indices and tables
44 | ==================
45 |
46 | * :ref:`genindex`
47 | * :ref:`modindex`
48 | * :ref:`search`
49 |
--------------------------------------------------------------------------------
/docs/library.rst:
--------------------------------------------------------------------------------
1 | Using Graphtage Programmatically
2 | ================================
3 |
4 | Graphtage is a command line utility, but it can just as easily be used as a library. This section documents how to
5 | interact with Graphtage directly from Python.
6 |
7 | The Intermediate Representation
8 | -------------------------------
9 |
10 | Graphtage's diffing algorithms operate on an
11 | `intermediate representation `__ rather than on the data
12 | structures of the original file format. This allows Graphtage to have generic comparison algorithms that can work on
13 | *any* input file type. The intermediate representation is a tree of :class:`graphtage.TreeNode` objects.
14 |
15 | Therefore, the first step is to convert the files being diffed into Graphtage's intermediate representation. The JSON
16 | filetype has a function to convert arbitrary Python objects (comprised of standard Python types) into Graphtage trees::
17 |
18 | >>> from graphtage import json
19 | >>> from_tree = json.build_tree({"foo": [1, 2, 3, 4]})
20 | >>> from_tree
21 | DictNode([KeyValuePairNode(key=StringNode('foo'), value=ListNode((IntegerNode(1), IntegerNode(2), IntegerNode(3), IntegerNode(4))))])
22 |
23 | Transforming Nodes with Edits
24 | -----------------------------
25 |
26 | To see the sequence of edits to transform this tree to another, we call :meth:`graphtage.TreeNode.get_all_edits`::
27 |
28 | >>> to_tree = json.build_tree({"bar": [2, 3, 4]})
29 | >>> to_tree
30 | DictNode([KeyValuePairNode(key=StringNode('bar'), value=ListNode((IntegerNode(2), IntegerNode(3), IntegerNode(4))))])
31 | >>> for edit in from_tree.get_all_edits(to_tree):
32 | ... print(edit)
33 | Remove(IntegerNode(1), remove_from=ListNode((IntegerNode(1), IntegerNode(2), IntegerNode(3), IntegerNode(4))))
34 | StringEdit(from_node=StringNode('foo'), to_node=StringNode('bar'))
35 |
36 | Applying Edits to Nodes
37 | -----------------------
38 |
39 | Both nodes and edits are immutable. We can perform a diff to apply edits to nodes, producing a new tree constructed of
40 | :class:`graphtage.EditedTreeNode` objects. Using some Python magic, the new tree's nodes maintain all of the same
41 | characteristics of the source nodes—including their source node class types—but are *also* :func:`instanceof`
42 | :class:`graphtage.EditedTreeNode`, too.
43 |
44 | Here is how to diff two nodes::
45 |
46 | >>> from_node.diff(to_node)
47 | >>> diff = from_tree.diff(to_tree)
48 | >>> diff
49 | EditedDictNode([EditedKeyValuePairNode(key=EditedStringNode('foo'), value=EditedListNode((EditedIntegerNode(1), EditedIntegerNode(2), EditedIntegerNode(3), EditedIntegerNode(4))))])
50 |
51 | As you can see, the tree was reconstructed with edited versions of each node. Each node will have a new member variable,
52 | :attr:`graphtage.EditedTreeNode.edit`, containing the edit that that chose to apply to itself (or :const:`None` if the
53 | node did not need to be edited). There are also additional member variables to indicate whether the node has been
54 | removed from its parent container.
55 |
56 | Formatting and Printing Results
57 | -------------------------------
58 |
59 | There are two components to outputting a tree or diff: a :class:`graphtage.formatter.Formatter`, which is responsible
60 | for the syntax of the output, and a :class:`graphtage.printer.Printer`, which is responsible for rendering that output
61 | to a stream. For example, to print our diff in JSON format to the default printer (STDOUT), we would do::
62 |
63 | >>> from graphtage import printer
64 | >>> with printer.DEFAULT_PRINTER as p:
65 | ... json.JSONFormatter.DEFAULT_INSTANCE.print(printer.DEFAULT_PRINTER, diff)
66 | ...
67 | {
68 | "++bar++~~foo~~": [
69 | ~~1~~,
70 | 2,
71 | 3,
72 | 4
73 | ]
74 | }
75 |
76 | Since Graphtage's formatters are independent of the input format, thanks to the intermediate representation, we can
77 | just as easily output the diff in another format, like YAML::
78 |
79 | >>> from graphtage import yaml
80 | >>> with printer.DEFAULT_PRINTER as p:
81 | ... yaml.YAMLFormatter.DEFAULT_INSTANCE.print(printer.DEFAULT_PRINTER, diff)
82 | ...
83 | ++bar++~~foo~~:
84 | - ~~1~~
85 | - 2
86 | - 3
87 | - 4
88 |
89 | Diffing In-Memory Python Objects
90 | --------------------------------
91 |
92 | When used as a library, Graphtage has the ability to diff in-memory Python objects. This can be useful when debugging,
93 | for example, to quickly determine the difference between two Python objects that cause a differential.::
94 |
95 | >>> from graphtage.pydiff import print_diff
96 | >>> with printer.DEFAULT_PRINTER as p:
97 | ... obj1 = [1, 2, {3: "three"}, 4]
98 | ... obj2 = [1, 2, {3: 3}, "four"]
99 | ... print_diff(obj1, obj2, printer=p)
100 | [1,2,{3: "three" -> 3},++"four"++~~4~~]
101 |
102 | Python object diffing also works with custom classes::
103 |
104 | >>> class Foo:
105 | ... def __init__(self, bar, baz):
106 | ... self.bar = bar
107 | ... self.baz = baz
108 | >>> with printer.DEFAULT_PRINTER as p:
109 | ... print_diff(Foo("bar", "baz"), Foo("bar", "bak"), printer=p)
110 | Foo(bar="bar", baz="ba++k++~~z~~")
111 |
--------------------------------------------------------------------------------
/docs/printing.rst:
--------------------------------------------------------------------------------
1 | .. _Printing Protocol:
2 |
3 | Printing Protocol
4 | =================
5 |
6 | The protocol for delegating how a :class:`graphtage.TreeNode` or :class:`graphtage.Edit` is printed in
7 | :meth:`graphtage.GraphtageFormatter.print` is as follows:
8 |
9 | #. Determine the actual object to be printed:
10 | * If ``node_or_edit`` is an :class:`graphtage.Edit`:
11 | * If ``with_edits``, then choose the edit
12 | * Otherwise, choose :attr:`node_or_edit.from_node `
13 | * If ``node_or_edit`` is a :class:`graphtage.TreeNode`:
14 | * If ``with_edits`` *and* the node is edited and has a non-zero cost,
15 | then choose :attr:`node_or_edit.edit `::
16 |
17 | node_or_edit.edit is not None and node_or_edit.edit.bounds().lower_bound > 0
18 |
19 | * Otherwise choose ``node_or_edit``
20 | #. If the chosen object is an edit:
21 | * See if there is a specialized formatter for this edit by calling
22 | :meth:`graphtage.formatter.Formatter.get_formatter`
23 | * If so, delegate to that formatter and return.
24 | * If not, try calling the edit's :func:`graphtage.Edit.print` method. If :exc:`NotImplementedError` is
25 | *not* raised, return.
26 | #. If the chosen object is a node, or if we failed to find a printer for the edit:
27 | * See if there is a specialized formatter for this node by calling
28 | :meth:`graphtage.formatter.Formatter.get_formatter`
29 | * If so, delegate to that formatter and return.
30 | * If not, print a debug warning and delegate to the node's internal print implementation
31 | :meth:`graphtage.TreeNode.print`.
32 |
33 | This is implemented in :meth:`graphtage.GraphtageFormatter.print`. See the :ref:`Formatting Protocol` for how formatters
34 | are chosen.
35 |
--------------------------------------------------------------------------------
/graphtage/__init__.py:
--------------------------------------------------------------------------------
1 | from . import graphtage
2 |
3 | from .graphtage import *
4 | from .tree import *
5 | from .edits import *
6 |
7 | from .version import __version__, VERSION_STRING
8 | from . import (
9 | ast, bounds, builder, constraints, dataclasses, edits, expressions, fibonacci, formatter, levenshtein, matching,
10 | object_set, pickle, printer, pydiff, search, sequences, tree, utils
11 | )
12 | from . import csv, json, xml, yaml, plist
13 |
14 | import inspect
15 |
16 | # All of the classes in SUBMODULES_TO_SUBSUME should really be in the top-level `graphtage` module.
17 | # They are separated into submodules solely for making the Python file sizes more manageable.
18 | # So the following code loops over those submodules and reassigns all of the classes to the top-level module.
19 | SUBMODULES_TO_SUBSUME = (graphtage, tree, edits)
20 | for module_to_subsume in SUBMODULES_TO_SUBSUME:
21 | for name, obj in inspect.getmembers(module_to_subsume):
22 | if hasattr(obj, '__module__') and obj.__module__ == module_to_subsume.__name__:
23 | obj.__module__ = 'graphtage'
24 | del module_to_subsume
25 |
26 | del inspect, SUBMODULES_TO_SUBSUME
27 |
--------------------------------------------------------------------------------
/graphtage/ast.py:
--------------------------------------------------------------------------------
1 | """
2 | Generic node types for representing abstract syntax trees.
3 | """
4 | from colorama import Fore
5 |
6 | from . import KeyValuePairNode, ListNode, Printer, TreeNode, DictNode, StringNode
7 | from .dataclasses import DataClassNode
8 | from .sequences import SequenceFormatter
9 |
10 |
11 | class KeywordArgument(KeyValuePairNode):
12 | pass
13 |
14 |
15 | class Module(ListNode):
16 | def print(self, printer: Printer):
17 | SequenceFormatter('', '', '\n').print(printer, self)
18 |
19 |
20 | class Assignment(DataClassNode):
21 | """A node representing an assignment."""
22 |
23 | targets: ListNode
24 | value: TreeNode
25 |
26 | def print(self, printer: Printer):
27 | """Prints this node."""
28 | SequenceFormatter('', '', ', ').print(printer, self.targets)
29 | with printer.bright():
30 | printer.write(" = ")
31 | self.value.print(printer)
32 |
33 | def __str__(self):
34 | return f"{', '.join(map(str, self.targets.children()))} = {self.value!s}"
35 |
36 |
37 | class CallArguments(ListNode):
38 | pass
39 |
40 |
41 | class CallKeywords(DictNode):
42 | pass
43 |
44 |
45 | class Call(DataClassNode):
46 | """A node representing a function call."""
47 |
48 | func: TreeNode
49 | args: CallArguments
50 | kwargs: CallKeywords
51 |
52 | def __init__(self, *args, **kwargs):
53 | super().__init__(*args, **kwargs)
54 | if isinstance(self.func, StringNode):
55 | self.func.quoted = False
56 |
57 | def print(self, printer: Printer):
58 | with printer.color(Fore.YELLOW):
59 | self.func.print(printer)
60 | printer.write("(")
61 | SequenceFormatter('', '', ', ').print(printer, self.args)
62 | if self.args and len(self.kwargs) > 0:
63 | printer.write(", ")
64 | for kvp in self.kwargs:
65 | with printer.color(Fore.RED):
66 | kvp.key.print(printer)
67 | with printer.bright():
68 | printer.write("=")
69 | kvp.value.print(printer)
70 | printer.write(")")
71 |
72 | def __str__(self):
73 | args = ", ".join([str(a) for a in self.args] + [
74 | f"{kvp.key!s}={kvp.value!s}"
75 | for kvp in self.kwargs
76 | ])
77 | return f"{self.func!s}({args})"
78 |
79 |
80 | class Subscript(DataClassNode):
81 | """A node representing an object subscript (i.e., the `[]` operator)"""
82 |
83 | value: TreeNode
84 | slice: TreeNode
85 |
86 | def print(self, printer: Printer):
87 | self.value.print(printer)
88 | with printer.color(Fore.LIGHTBLUE_EX):
89 | printer.write("[")
90 | self.slice.write(printer)
91 | with printer.color(Fore.LIGHTBLUE_EX):
92 | printer.write("]")
93 |
94 |
95 | class Import(DataClassNode):
96 | names: ListNode
97 | from_name: StringNode
98 |
99 | def __init__(self, names: ListNode, from_name: StringNode):
100 | super().__init__(names=names, from_name=from_name)
101 | self.from_name.quoted = False
102 | for child in self.names:
103 | if isinstance(child, StringNode):
104 | child.quoted = False
105 |
106 | def print(self, printer: Printer):
107 | if self.from_name.object:
108 | with printer.color(Fore.YELLOW):
109 | printer.write("from ")
110 | self.from_name.print(printer)
111 | printer.write(" ")
112 | with printer.color(Fore.YELLOW):
113 | printer.write("import ")
114 | SequenceFormatter('', '', ', ').print(printer, self.names)
115 |
--------------------------------------------------------------------------------
/graphtage/builder.py:
--------------------------------------------------------------------------------
1 | """A module intended to simplify building Graphtage IR trees from other tree-like data structures."""
2 |
3 | from abc import ABC
4 | import logging
5 | from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Type, TypeVar
6 |
7 | from . import (
8 | BoolNode, BuildOptions, DictNode, FixedKeyDictNode, FloatNode, IntegerNode, LeafNode, ListNode, MultiSetNode,
9 | NullNode, StringNode, TreeNode
10 | )
11 | from .object_set import IdentityHash
12 |
13 | C = TypeVar("C")
14 | T = TypeVar("T")
15 |
16 | log = logging.getLogger(__name__)
17 |
18 |
19 | class CyclicReference(LeafNode):
20 | def __init__(self, obj):
21 | super().__init__(IdentityHash(obj))
22 |
23 | def __hash__(self):
24 | return id(self.object)
25 |
26 | def __eq__(self, other):
27 | return isinstance(other, CyclicReference) and other.object is self.object
28 |
29 |
30 | class Builder(ABC):
31 | EXPANDERS: Dict[Type[Any], Callable[["Builder", Any], Optional[Iterable[Any]]]]
32 | BUILDERS: Dict[Type[Any], Callable[["Builder", Any, List[TreeNode]], TreeNode]]
33 |
34 | def __init__(self, options: Optional[BuildOptions] = None):
35 | if options is None:
36 | self.options: BuildOptions = BuildOptions()
37 | else:
38 | self.options = options
39 |
40 | @staticmethod
41 | def expander(node_type: Type[T]):
42 | def wrapper(func: Callable[[C, T], Iterable[Any]]) -> Callable[[C, T], Iterable[Any]]:
43 | if hasattr(func, "_visitor_expander_for_type"):
44 | func._visitor_expander_for_type = func._visitor_expander_for_type + (node_type,)
45 | else:
46 | setattr(func, "_visitor_expander_for_type", (node_type,))
47 | return func
48 |
49 | return wrapper
50 |
51 | @staticmethod
52 | def builder(node_type: Type[T]):
53 | def wrapper(func: Callable[[C, T, List[TreeNode]], TreeNode]) -> Callable[[C, T, List[TreeNode]], TreeNode]:
54 | if hasattr(func, "_visitor_builder_for_type"):
55 | func._visitor_builder_for_type = func._visitor_builder_for_type + (node_type,)
56 | else:
57 | setattr(func, "_visitor_builder_for_type", (node_type,))
58 | return func
59 |
60 | return wrapper
61 |
62 | def __init_subclass__(cls, **kwargs):
63 | super().__init_subclass__(**kwargs)
64 | if not hasattr(cls, "EXPANDERS") or cls.EXPANDERS is None:
65 | setattr(cls, "EXPANDERS", {})
66 | else:
67 | setattr(cls, "EXPANDERS", dict(cls.EXPANDERS))
68 | if not hasattr(cls, "BUILDERS") or cls.BUILDERS is None:
69 | setattr(cls, "BUILDERS", {})
70 | else:
71 | setattr(cls, "BUILDERS", dict(cls.BUILDERS))
72 | new_expanders = {}
73 | new_builders = {}
74 | for member_name, member in cls.__dict__.items():
75 | if hasattr(member, "_visitor_expander_for_type"):
76 | for expander_type in getattr(member, "_visitor_expander_for_type"):
77 | if not isinstance(expander_type, type):
78 | raise TypeError(f"{cls.__name__}.{member_name} was registered as an expander for "
79 | f"{expander_type!r}, which is not a type")
80 | elif expander_type in cls.EXPANDERS:
81 | raise TypeError(f"An expander for type {expander_type.__name__} is already registered to "
82 | f"{cls.EXPANDERS[expander_type]!r} and cannot be re-registered to "
83 | f"{cls.__name__}.{member_name}")
84 | elif expander_type in new_expanders:
85 | raise TypeError(f"An expander for type {expander_type.__name__} is already registered to "
86 | f"{new_expanders[expander_type]!r} and cannot be re-registered to "
87 | f"{cls.__name__}.{member_name}")
88 | new_expanders[expander_type] = member
89 | if hasattr(member, "_visitor_builder_for_type"):
90 | for builder_type in getattr(member, "_visitor_builder_for_type"):
91 | if not isinstance(builder_type, type):
92 | raise TypeError(f"{cls.__name__}.{member_name} was registered as an builder for "
93 | f"{builder_type!r}, which is not a type")
94 | elif builder_type in cls.EXPANDERS:
95 | raise TypeError(f"A builder for type {builder_type.__name__} is already registered to "
96 | f"{cls.BUILDERS[builder_type]!r} and cannot be re-registered to "
97 | f"{cls.__name__}.{builder_type}")
98 | elif builder_type in new_builders:
99 | raise TypeError(f"A builder for type {builder_type.__name__} is already registered to "
100 | f"{new_builders[builder_type]!r} and cannot be re-registered to "
101 | f"{cls.__name__}.{builder_type}")
102 | new_builders[builder_type] = member
103 | cls.EXPANDERS.update(new_expanders)
104 | cls.BUILDERS.update(new_builders)
105 |
106 | def default_expander(self, node: Any) -> Iterable[Any]:
107 | return ()
108 |
109 | def default_builder(self, node: Any, children: List[TreeNode]) -> TreeNode:
110 | raise NotImplementedError(f"A builder for type {node.__class__.__name__} is not defined for object {node!r}")
111 |
112 | @classmethod
113 | def _resolve(cls, obj_type: Type[Any], choices: Dict[Type[Any], T]) -> Optional[T]:
114 | """Resolves the most specialized expander or builder for `obj_type`"""
115 | for t in obj_type.__mro__:
116 | if t in choices:
117 | return choices[t]
118 | return None
119 |
120 | @classmethod
121 | def resolve_expander(cls, obj_type: Type[Any]) -> Optional[Callable[[Any], Optional[Iterable[Any]]]]:
122 | """Resolves the most specialized expander for `obj_type`"""
123 | return cls._resolve(obj_type, cls.EXPANDERS)
124 |
125 | @classmethod
126 | def resolve_builder(cls, obj_type: Type[Any]) -> Optional[Callable[[Any, List[TreeNode]], TreeNode]]:
127 | """Resolves the most specialized builder for `obj_type`"""
128 | return cls._resolve(obj_type, cls.BUILDERS)
129 |
130 | def expand(self, node: Any) -> Iterable[Any]:
131 | expander = self.resolve_expander(type(node))
132 | if expander is None:
133 | return self.default_expander(node)
134 | return expander(self, node)
135 |
136 | def build(self, node: Any, children: List[TreeNode]) -> TreeNode:
137 | builder = self.resolve_builder(type(node))
138 | if builder is None:
139 | result = self.default_builder(node, children)
140 | else:
141 | result = builder(self, node, children)
142 | if not isinstance(result, TreeNode):
143 | if builder is None:
144 | source = f"{self.__class__.__name__}.default_builder"
145 | else:
146 | source = f"{builder!r}"
147 | raise ValueError(f"{source}(node={node!r}, children={children!r}) returned {result!r}; "
148 | f"builders must return a graphtage.TreeNode")
149 | return result
150 |
151 | def build_tree(self, root_obj) -> TreeNode:
152 | children = self.expand(root_obj)
153 | work: List[Tuple[Any, List[TreeNode], List[Any]]] = [(root_obj, [], list(reversed(list(children))))]
154 | basic_builder = BasicBuilder(self.options)
155 | with self.options.printer.tqdm(
156 | desc="Walking the Tree", leave=False, delay=2.0, unit=" nodes", total=1 + len(work[-1][-1])
157 | ) as t:
158 | while work:
159 | node, processed_children, unprocessed_children = work[-1]
160 |
161 | if unprocessed_children:
162 | child = unprocessed_children.pop()
163 | t.update(1)
164 |
165 | grandchildren = list(self.expand(child))
166 |
167 | if grandchildren and self.options.check_for_cycles:
168 | # first, check if all of our grandchildren are leaves; if so, we don't need to check for a cycle
169 | all_are_leaves = all(
170 | all(False for _ in self.expand(grandchild))
171 | for grandchild in grandchildren
172 | )
173 | if not all_are_leaves:
174 | # make sure we aren't already in the process of expanding this child
175 | is_cycle = False
176 | for already_expanding, _, _ in work:
177 | if already_expanding is child:
178 | if self.options.ignore_cycles:
179 | log.debug(f"Detected a cycle in {node!r} at child {child!r}; ignoring…")
180 | processed_children.append(CyclicReference(child))
181 | is_cycle = True
182 | break
183 | else:
184 | raise ValueError(f"Detected a cycle in {node!r} at child {child!r}")
185 | if is_cycle:
186 | continue
187 | work.append((child, [], list(reversed(grandchildren))))
188 | t.total = t.total + 1 + len(grandchildren)
189 | t.refresh()
190 | continue
191 |
192 | _ = work.pop()
193 | t.update(1)
194 |
195 | new_node = self.build(node, processed_children)
196 | if not work:
197 | return new_node
198 | work[-1][1].append(new_node)
199 |
200 | return NullNode()
201 |
202 |
203 | class BasicBuilder(Builder):
204 | """A builder for basic Python types"""
205 |
206 | @Builder.builder(int)
207 | def build_int(self, obj: int, _) -> IntegerNode:
208 | return IntegerNode(obj)
209 |
210 | @Builder.builder(str)
211 | @Builder.builder(bytes)
212 | def build_str(self, obj: str, _) -> StringNode:
213 | return StringNode(obj)
214 |
215 | @Builder.builder(type(None))
216 | def build_none(self, obj, _) -> NullNode:
217 | assert obj is None
218 | return NullNode()
219 |
220 | @Builder.builder(float)
221 | def build_float(self, obj: float, _) -> FloatNode:
222 | return FloatNode(obj)
223 |
224 | @Builder.builder(bool)
225 | def build_bool(self, obj: bool, _) -> BoolNode:
226 | return BoolNode(obj)
227 |
228 | @Builder.expander(list)
229 | @Builder.expander(tuple)
230 | @Builder.expander(set)
231 | @Builder.expander(frozenset)
232 | def expand_list(self, obj: list):
233 | yield from obj
234 |
235 | @Builder.builder(list)
236 | @Builder.builder(tuple)
237 | def build_list(self, obj, children: List[TreeNode]) -> ListNode:
238 | return ListNode(
239 | children,
240 | allow_list_edits=self.options.allow_list_edits,
241 | allow_list_edits_when_same_length=self.options.allow_list_edits_when_same_length
242 | )
243 |
244 | @Builder.builder(set)
245 | @Builder.builder(frozenset)
246 | def build_set(self, obj, children: List[TreeNode]) -> MultiSetNode:
247 | return MultiSetNode(children)
248 |
249 | @Builder.expander(dict)
250 | def expand_dict(self, obj: dict):
251 | yield from obj.keys()
252 | yield from obj.values()
253 |
254 | @Builder.builder(dict)
255 | def build_dict(self, _, children: List[TreeNode]):
256 | n = len(children) // 2
257 | keys = children[:n]
258 | values = children[n:]
259 | dict_items = {
260 | k: v
261 | for k, v in zip(keys, values)
262 | }
263 | if self.options.allow_key_edits:
264 | dict_node = DictNode.from_dict(dict_items)
265 | dict_node.auto_match_keys = self.options.auto_match_keys
266 | return dict_node
267 | else:
268 | return FixedKeyDictNode.from_dict(dict_items)
269 |
--------------------------------------------------------------------------------
/graphtage/constraints.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 | import logging
3 | from typing import Optional
4 |
5 | from .edits import Edit
6 | from . import expressions
7 | from . import graphtage
8 |
9 | log = logging.getLogger('graphtage')
10 |
11 |
12 | class ConditionalMatcher(metaclass=ABCMeta):
13 | def __init__(self, condition: expressions.Expression):
14 | self.condition: expressions.Expression = condition
15 |
16 | @abstractmethod
17 | def __call__(self, from_node: graphtage.TreeNode, to_node: graphtage.TreeNode) -> Optional[Edit]:
18 | raise NotImplementedError()
19 |
20 | @classmethod
21 | def apply(cls, node: graphtage.TreeNode, condition: expressions.Expression):
22 | node.add_edit_modifier(cls(condition))
23 |
24 |
25 | class MatchIf(ConditionalMatcher):
26 | def __call__(self, from_node: graphtage.TreeNode, to_node: graphtage.TreeNode) -> Optional[Edit]:
27 | try:
28 | if self.condition.eval(locals={'from': from_node, 'to': to_node}):
29 | return None
30 | except Exception as e:
31 | log.debug(f"{e!s} while evaluating --match-if for nodes {from_node} and {to_node}")
32 | return graphtage.Replace(from_node, to_node)
33 |
34 |
35 | class MatchUnless(ConditionalMatcher):
36 | def __call__(self, from_node: graphtage.TreeNode, to_node: graphtage.TreeNode) -> Optional[Edit]:
37 | try:
38 | if self.condition.eval(locals={'from': from_node.to_obj(), 'to': to_node.to_obj()}):
39 | return graphtage.Replace(from_node, to_node)
40 | except Exception as e:
41 | log.debug(f"{e!s} while evaluating --match-unless for nodes {from_node} and {to_node}")
42 | return None
43 |
--------------------------------------------------------------------------------
/graphtage/csv.py:
--------------------------------------------------------------------------------
1 | """A :class:`graphtage.Filetype` for parsing, diffing, and rendering `CSV files`_.
2 |
3 | .. _CSV files:
4 | https://en.wikipedia.org/wiki/Comma-separated_values
5 |
6 | """
7 |
8 | import csv
9 | from io import StringIO
10 | from typing import Optional
11 |
12 | from . import graphtage, json
13 | from .json import JSONFormatter
14 | from .printer import Printer
15 | from .sequences import SequenceFormatter
16 | from .tree import GraphtageFormatter, TreeNode
17 |
18 |
19 | class CSVRow(graphtage.ListNode[TreeNode]):
20 | """A node representing a row of a CSV file."""
21 | def __bool__(self):
22 | return bool(self._children)
23 |
24 |
25 | class CSVNode(graphtage.ListNode[CSVRow]):
26 | """A node representing zero or more CSV rows."""
27 | def __bool__(self):
28 | return bool(self._children) and any(self._children)
29 |
30 | def __eq__(self, other: 'CSVNode'):
31 | return self._children == other._children or (not self and not other)
32 |
33 |
34 | def build_tree(path: str, options: Optional[graphtage.BuildOptions] = None, *args, **kwargs) -> CSVNode:
35 | """Constructs a :class:`CSVNode` from a CSV file.
36 |
37 | The file is parsed using Python's :func:`csv.reader`. The elements in each row are constructed by delegating to
38 | :func:`graphtage.json.build_tree`::
39 |
40 | CSVRow([json.build_tree(i, options=options) for i in row])
41 |
42 | Args:
43 | path: The path to the file to be parsed.
44 | options: Optional build options to pass on to :meth:`graphtage.json.build_tree`.
45 | *args: Any extra positional arguments are passed on to :func:`csv.reader`.
46 | **kwargs: Any extra keyword arguments are passed on to :func:`csv.reader`.
47 |
48 | Returns:
49 | CSVNode: The resulting CSV node object.
50 |
51 | """
52 | csv_data = []
53 | with open(path) as f:
54 | for row in csv.reader(f, *args, **kwargs):
55 | rowdata = [json.build_tree(i, options=options) for i in row]
56 | for col in rowdata:
57 | if isinstance(col, graphtage.StringNode):
58 | col.quoted = False
59 | csv_data.append(CSVRow(rowdata))
60 | return CSVNode(csv_data)
61 |
62 |
63 | class CSVRowFormatter(SequenceFormatter):
64 | """A formatter for CSV rows."""
65 | is_partial = True
66 |
67 | def __init__(self):
68 | """Initializes the formatter.
69 |
70 | Equivalent to::
71 |
72 | super().__init__('', '', ',')
73 |
74 | """
75 | super().__init__('', '', ',')
76 |
77 | def print_CSVRow(self, *args, **kwargs):
78 | """Prints a CSV row.
79 |
80 | Equivalent to::
81 |
82 | super().print_SequenceNode(*args, **kwargs)
83 |
84 | """
85 | super().print_SequenceNode(*args, **kwargs)
86 |
87 | def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False):
88 | """An empty implementation, since each row should be printed as a single line."""
89 | pass
90 |
91 |
92 | class CSVRows(SequenceFormatter):
93 | """A sub formatter for printing the sequence of rows in a CSV file."""
94 | is_partial = True
95 |
96 | sub_format_types = [CSVRowFormatter]
97 |
98 | def __init__(self):
99 | """Initializes the formatter.
100 |
101 | Equivalent to::
102 |
103 | super().__init__('', '', '')
104 |
105 | """
106 | super().__init__('', '', '')
107 |
108 | def print_CSVNode(self, *args, **kwargs):
109 | """Prints a CSV node.
110 |
111 | Equivalent to::
112 |
113 | super().print_SequenceNode(*args, **kwargs)
114 |
115 | """
116 | super().print_SequenceNode(*args, **kwargs)
117 |
118 | def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False):
119 | """Prints a newline on all but the first and last items."""
120 | if not is_first:
121 | printer.newline()
122 |
123 | def items_indent(self, printer: Printer):
124 | """Returns :obj:`printer` because CSV rows do not need to be indented."""
125 | return printer
126 |
127 |
128 | class CSVFormatter(GraphtageFormatter):
129 | """Top-level formatter for CSV files."""
130 | sub_format_types = [CSVRows, JSONFormatter]
131 |
132 | def print_LeafNode(self, printer: Printer, node: graphtage.LeafNode):
133 | """Prints a leaf node, which should always be a column in a CSV row.
134 |
135 | The node is escaped by first writing it to :func:`csv.writer`::
136 |
137 | csv.writer(...).writerow([node.object])
138 |
139 | """
140 | if node.edited and node.edit is not None:
141 | self.sub_formatters[1].print(printer, node.edit)
142 | return
143 | s = StringIO()
144 | writer = csv.writer(s)
145 | writer.writerow([node.object])
146 | r = s.getvalue()
147 | if r.endswith('\r\n'):
148 | r = r[:-2]
149 | elif r.endswith('\n') or r.endswith('\r'):
150 | r = r[:-1]
151 | printer.write(r)
152 | s.close()
153 |
154 |
155 | class CSV(graphtage.Filetype):
156 | """The CSV filetype."""
157 | def __init__(self):
158 | """Initializes the CSV filetype.
159 |
160 | CSV identifies itself with the MIME types `csv` and `text/csv`.
161 |
162 | """
163 | super().__init__(
164 | 'csv',
165 | 'text/csv'
166 | )
167 |
168 | def build_tree(self, path: str, options: Optional[graphtage.BuildOptions] = None) -> TreeNode:
169 | """Equivalent to :func:`build_tree`"""
170 | return build_tree(path, options=options)
171 |
172 | def build_tree_handling_errors(self, path: str, options: Optional[graphtage.BuildOptions] = None) -> TreeNode:
173 | return self.build_tree(path=path, options=options)
174 |
175 | def get_default_formatter(self) -> CSVFormatter:
176 | return CSVFormatter.DEFAULT_INSTANCE
177 |
--------------------------------------------------------------------------------
/graphtage/dataclasses.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Iterator, List, Tuple, Type
2 |
3 | from . import AbstractCompoundEdit, Edit, Range, Replace
4 | from .printer import Fore, Printer
5 | from .tree import ContainerNode, TreeNode
6 |
7 |
8 | class DataClassEdit(AbstractCompoundEdit):
9 | def __init__(self, from_node: "DataClassNode", to_node: "DataClassNode"):
10 | from_slots = dict(from_node.items())
11 | to_slots = dict(to_node.items())
12 | if from_slots.keys() != to_slots.keys():
13 | raise ValueError(f"Node {from_node!r} cannot be edited to {to_node!r} because they have incompatible slots")
14 | self.slot_edits: List[Edit] = [
15 | value.edits(to_slots[slot])
16 | for slot, value in from_slots.items()
17 | ]
18 | super().__init__(from_node, to_node)
19 |
20 | def bounds(self) -> Range:
21 | total = Range(0, 0)
22 | for e in self.slot_edits:
23 | total = total + e.bounds()
24 | return total
25 |
26 | def edits(self) -> Iterator[Edit]:
27 | yield from self.slot_edits
28 |
29 | def tighten_bounds(self) -> bool:
30 | for edit in self.slot_edits:
31 | if edit.tighten_bounds():
32 | return True
33 | return False
34 |
35 |
36 | class DataClassNode(ContainerNode):
37 | """A container node that can be initialized similar to a Python :func:`dataclasses.dataclass`"""
38 |
39 | _SLOTS: Tuple[str, ...]
40 | _SLOT_ANNOTATIONS: Dict[str, Type[TreeNode]]
41 | _DATA_CLASS_ANCESTORS: List[Type["DataClassNode"]]
42 |
43 | def __init__(self, *args, **kwargs):
44 | """Be careful extending __init__; consider using :func:`DataClassNode.post_init` instead."""
45 | our_kwargs = {
46 | k: v
47 | for k, v in kwargs.items()
48 | if k in self._SLOTS
49 | }
50 | parent_kwargs = {
51 | k: v
52 | for k, v in kwargs.items()
53 | if k not in self._SLOTS
54 | }
55 | required_positional_args = len(self._SLOTS) - len(our_kwargs)
56 | assert required_positional_args >= 0
57 | if required_positional_args > len(args):
58 | raise ValueError(f"Not enough arguments sent to {self.__class__.__name__}.__init__: {args!r} {kwargs!r}; "
59 | f"expected at least {len(self._SLOTS)}")
60 | start_index = len(args) - required_positional_args
61 | parent_args = args[:start_index]
62 | super().__init__(*parent_args, **parent_kwargs)
63 | our_args = list(args[start_index:])
64 | for s in self._SLOTS:
65 | if s in our_kwargs:
66 | value = our_kwargs[s]
67 | elif not our_args:
68 | raise ValueError(f"Missing argument for {self.__class__.__name__}.{s}")
69 | else:
70 | value = our_args[0]
71 | our_args = our_args[1:]
72 | expected_type = self._SLOT_ANNOTATIONS[s]
73 | if not isinstance(value, expected_type):
74 | raise ValueError(f"Expected a node of type {expected_type.__name__} for argument "
75 | f"{self.__class__.__name__}.{s} but instead got {value!r}")
76 | setattr(self, s, value)
77 | # self.__hash__ gets called so often, we cache the result:
78 | self.__hash = hash(tuple(self))
79 | for ancestor in self._DATA_CLASS_ANCESTORS:
80 | ancestor.post_init(self)
81 |
82 | def post_init(self):
83 | """Callback called after this class's members have been initialized.
84 |
85 | This callback should not call `super().post_init()`. Each superclass's `post_init()` will be automatically
86 | called in order of the `__mro__`.
87 | """
88 | pass
89 |
90 | def __init_subclass__(cls, **kwargs):
91 | super().__init_subclass__(**kwargs)
92 | ancestors = [
93 | c
94 | for c in cls.__mro__
95 | if c is not cls and issubclass(c, DataClassNode) and c is not DataClassNode
96 | ]
97 | cls._DATA_CLASS_ANCESTORS = ancestors
98 | ancestor_slot_names = {
99 | name: a
100 | for a in ancestors
101 | for name in a._SLOTS
102 | }
103 | if not hasattr(cls, "_SLOT_ANNOTATIONS") or cls._SLOT_ANNOTATIONS is None:
104 | cls._SLOT_ANNOTATIONS = {}
105 | cls._SLOTS = ()
106 | else:
107 | cls._SLOT_ANNOTATIONS = dict(cls._SLOT_ANNOTATIONS)
108 | new_slots = []
109 | for i, (name, slot_type) in enumerate(cls.__annotations__.items()):
110 | if not isinstance(slot_type, type) or not issubclass(slot_type, TreeNode):
111 | continue
112 | if name in ancestor_slot_names:
113 | raise TypeError(f"Dataclass {cls.__name__} cannot redefine slot {name!r} because it is already "
114 | f"defined in its superclass {ancestor_slot_names[name].__name__}")
115 | new_slots.append(name)
116 | cls._SLOT_ANNOTATIONS[name] = slot_type
117 | cls._SLOTS = cls._SLOTS + tuple(new_slots)
118 |
119 | def __hash__(self):
120 | return self.__hash
121 |
122 | def __iter__(self) -> Iterator[TreeNode]:
123 | for _, value in self.items():
124 | yield value
125 |
126 | def items(self) -> Iterator[Tuple[str, TreeNode]]:
127 | for slot in self._SLOTS:
128 | yield slot, getattr(self, slot)
129 |
130 | def to_obj(self):
131 | return {
132 | slot: getattr(self, slot).to_obj()
133 | for slot in self._SLOTS
134 | }
135 |
136 | def edits(self, node: TreeNode) -> Edit:
137 | if isinstance(node, DataClassNode):
138 | our_slots = set(self._SLOTS)
139 | their_slots = set(node._SLOTS)
140 | if our_slots == their_slots:
141 | return DataClassEdit(self, node)
142 | return Replace(self, node)
143 |
144 | def calculate_total_size(self) -> int:
145 | return sum(s.calculate_total_size() for s in self)
146 |
147 | def print(self, printer: Printer):
148 | with printer.color(Fore.Yellow):
149 | printer.write(self.__class__.__name__)
150 | printer.write("(")
151 | for i, slot in enumerate(self._SLOTS):
152 | if i > 0:
153 | printer.write(", ")
154 | with printer.color(Fore.RED):
155 | printer.write(slot)
156 | with printer.bright():
157 | printer.write("=")
158 | getattr(self, slot).print(printer)
159 | printer.write(")")
160 |
161 | def __len__(self):
162 | return len(self._SLOTS)
163 |
164 | def __eq__(self, other):
165 | return isinstance(other, DataClassNode) and dict(self.items()) == dict(other.items())
166 |
167 | def __repr__(self):
168 | attrs = ", ".join(
169 | f"{slot}={value!r}"
170 | for slot, value in self.items()
171 | )
172 | return f"{self.__class__.__name__}({attrs})"
173 |
--------------------------------------------------------------------------------
/graphtage/debug.py:
--------------------------------------------------------------------------------
1 | """
2 | Utilities to aid in debugging
3 | """
4 |
5 | from functools import partial
6 | from inspect import getmembers
7 |
8 | DEBUG_MODE = False
9 |
10 |
11 | if DEBUG_MODE:
12 | class Debuggable:
13 | _DEBUG_PATCHED: bool = False
14 |
15 | def __new__(cls, *args, **kwargs):
16 | instance = super().__new__(cls)
17 | if not instance._DEBUG_PATCHED:
18 | debug_all_member = None
19 | for name, member in getmembers(instance):
20 | if not name.startswith("_debug_"):
21 | continue
22 | name = name[len("_debug_"):]
23 | if name == "__all__":
24 | debug_all_member = member
25 | continue
26 | elif not hasattr(instance, name):
27 | continue
28 | func = getattr(instance, name)
29 | setattr(instance, f"_original_{name}", func)
30 | setattr(instance, name, member)
31 | if debug_all_member is not None:
32 | for name, member in getmembers(instance):
33 | if name.startswith("_") or not callable(member):
34 | continue
35 |
36 | setattr(instance, name, partial(debug_all_member, name, member))
37 | instance._DEBUG_PATCHED = True
38 | return instance
39 | else:
40 | class Debuggable:
41 | pass
42 |
--------------------------------------------------------------------------------
/graphtage/fibonacci.py:
--------------------------------------------------------------------------------
1 | """A pure Python implementation of a `Fibonacci Heap`_.
2 |
3 | Many of the algorithms in Graphtage only require partially sorting collections, so we can get a speedup from using a
4 | Fibonacci Heap that has amortized constant time insertion.
5 |
6 | .. _Fibonacci Heap:
7 | https://en.wikipedia.org/wiki/Fibonacci_heap
8 |
9 | """
10 |
11 | from typing import Callable, Generic, Iterator, Optional, TypeVar
12 |
13 | T = TypeVar('T')
14 | Key = TypeVar('Key')
15 | DefaultKey = object()
16 |
17 |
18 | class HeapNode(Generic[T, Key]):
19 | """A node in a :class:`FibonacciHeap`."""
20 | def __init__(self, item: T, key: Key = DefaultKey):
21 | """Initializes a Fibonacci heap node.
22 |
23 | Args:
24 | item: The heap item associated with the node.
25 | key: An optional key to use for the item in sorting. If omitted, the item itself will be used.
26 |
27 | """
28 | self.item: T = item
29 | """The item associated with this heap node."""
30 | if id(key) == id(DefaultKey):
31 | key = item
32 | self.key: Key = key
33 | """The key to be used when sorting this heap node."""
34 | self.parent: Optional[HeapNode[T, Key]] = None
35 | """The node's parent."""
36 | self.child: Optional[HeapNode[T, Key]] = None
37 | """The node's child."""
38 | self.left: HeapNode[T, Key] = self
39 | """The left sibling of this node, or :obj:`self` if it has no left sibling."""
40 | self.right: HeapNode[T, Key] = self
41 | """The right sibling of this node, or :obj:`self` if it has no left sibling."""
42 | self.degree: int = 0
43 | """The degree of this node (*i.e.*, the number of its children)."""
44 | self.mark: bool = False
45 | """The node's marked state."""
46 | self.deleted: bool = False
47 | """Whether the node has been deleted.
48 |
49 | This is to prevent nodes from being manipulated after they have been removed from a heap.
50 |
51 | Warning:
52 | Do not set :attr:`HeapNode.deleted` to :const:`True` unless the node has already been removed from the heap.
53 |
54 | """
55 |
56 | def add_child(self, node):
57 | """Adds a child to this heap node, incrementing its degree."""
58 | assert node != self
59 | if self.child is None:
60 | self.child = node
61 | else:
62 | node.right = self.child.right
63 | node.left = self.child
64 | self.child.right.left = node
65 | self.child.right = node
66 | self.degree += 1
67 |
68 | def remove_child(self, node):
69 | """Removes a child from this heap node, decrementing its degree."""
70 | assert self.child is not None
71 | if self.child == self.child.right:
72 | self.child = None
73 | elif self.child == node:
74 | self.child = node.right
75 | node.right.parent = self
76 | node.left.right = node.right
77 | node.right.left = node.left
78 | self.degree -= 1
79 |
80 | @property
81 | def siblings(self) -> Iterator['HeapNode[T, Key]']:
82 | """Iterates over this node's siblings.
83 |
84 | Equivalent to::
85 |
86 | node = self.right
87 | while node != self:
88 | yield node
89 | node = node.right
90 |
91 | """
92 | node = self.right
93 | while node != self:
94 | yield node
95 | node = node.right
96 |
97 | @property
98 | def children(self) -> Iterator['HeapNode[T, Key]']:
99 | """Iterates over this node's children.
100 |
101 | Equivalent to::
102 |
103 | if self.child is not None:
104 | yield self.child
105 | yield from self.child.siblings
106 |
107 | """
108 | assert (self.degree == 0 and self.child is None) or (self.degree == 1 + sum(1 for _ in self.child.siblings))
109 | if self.child is not None:
110 | yield self.child
111 | yield from self.child.siblings
112 |
113 | def __iter__(self) -> Iterator['HeapNode[T, Key]']:
114 | """Iterates over all of this node's descendants, including itself."""
115 | yield self
116 | if self.child:
117 | yield from iter(self.child)
118 | node = self.right
119 | while node != self:
120 | yield node
121 | if node.child is not None:
122 | yield from iter(node.child)
123 | node = node.right
124 |
125 | def __lt__(self, other):
126 | return (self.deleted and not other.deleted) or self.key < other.key
127 |
128 | def __le__(self, other):
129 | return self < other or self.key == other.key
130 |
131 | def __eq__(self, other):
132 | return id(self) == id(other)
133 |
134 | def __hash__(self):
135 | return hash(self.item)
136 |
137 | def __repr__(self):
138 | return f"{self.__class__.__name__}(item={self.item!r}, key={self.key!r})"
139 |
140 |
141 | class FibonacciHeap(Generic[T, Key]):
142 | """A Fibonacci Heap."""
143 | def __init__(self, key: Optional[Callable[[T], Key]] = None):
144 | """Initializes a Fibonacci heap.
145 |
146 | Args:
147 | key: An optional function that accepts an item and returns the key to be used for comparing that item.
148 | If omitted, it is equivalent to::
149 |
150 | lambda item: item
151 |
152 | """
153 | if key is None:
154 | self.key = lambda a: a
155 | """The function to extract comparison keys from items."""
156 | else:
157 | self.key: Callable[[T], Key] = key
158 | self._min: Optional[HeapNode[T, Key]] = None
159 | self._root: Optional[HeapNode[T, Key]] = None
160 | self._n: int = 0
161 |
162 | def clear(self):
163 | """Removes all items from this heap."""
164 | self._min = None
165 | self._root = None
166 | self._n = 0
167 |
168 | def peek(self) -> T:
169 | """Returns the smallest element of the heap without removing it.
170 |
171 | Returns:
172 | T: The smallest element of the heap.
173 |
174 | """
175 | while self._min is not None and self._min.deleted:
176 | self._extract_min()
177 | return self._min.item
178 |
179 | def remove(self, node: HeapNode[T, Key]):
180 | """Removes the given node from this heap.
181 |
182 | Args:
183 | node: The node to be removed.
184 |
185 | Warning:
186 | This function assumes that the provided node is actually a member of this heap. It also assumes (but does
187 | not check) that :attr:`node.deleted ` is :const:`False`. If either of these assumptions
188 | is incorrect, it will lead to undefined behavior and corruption of the heap.
189 |
190 | """
191 | node.deleted = True
192 | y = node.parent
193 | if y is not None and node < y:
194 | self._cut(node, y)
195 | self._cascading_cut(y)
196 | self._min = node
197 | self._extract_min()
198 |
199 | @property
200 | def min_node(self) -> HeapNode[T, Key]:
201 | """Returns the heap node associated with the smallest item in the heap, without removing it."""
202 | return self._min
203 |
204 | @property
205 | def _roots(self) -> Iterator[HeapNode[T, Key]]:
206 | if self._root is not None:
207 | yield self._root
208 | yield from self._root.siblings
209 |
210 | def __len__(self):
211 | return self._n
212 |
213 | def __bool__(self):
214 | return self._n > 0
215 |
216 | def __iter__(self) -> Iterator[T]:
217 | for node in self._root:
218 | yield node.item
219 |
220 | def nodes(self) -> Iterator[HeapNode[T, Key]]:
221 | """Iterates over all of the heap nodes in this heap."""
222 | if self._root is None:
223 | return
224 | yield from iter(self._root)
225 |
226 | def _extract_min(self) -> HeapNode[T, Key]:
227 | z = self._min
228 | if z is not None:
229 | if z.child is not None:
230 | for child in list(z.children):
231 | self._append_root(child)
232 | child.parent = None
233 | self._remove_root(z)
234 | if z == z.right:
235 | self._min = self._root = None
236 | else:
237 | self._min = z.right
238 | self._consolidate()
239 | self._n -= 1
240 | return z
241 |
242 | def push(self, item: T) -> HeapNode[T, Key]:
243 | """Adds a new item to this heap.
244 |
245 | Returns:
246 | HeapNode[T, Key]: The heap node created to store the new item.
247 |
248 | """
249 | node = HeapNode(item=item, key=self.key(item))
250 | node.left = node.right = node
251 | self._append_root(node)
252 | if self._min is None or node < self._min:
253 | self._min = node
254 | self._n += 1
255 | return node
256 |
257 | def decrease_key(self, x: HeapNode[T, Key], k: Key):
258 | """Decreases the key value associated with the given node.
259 |
260 | Args:
261 | x: The node to modify.
262 | k: The new key value.
263 |
264 | Raises:
265 | ValueError: If :attr:`x.key ` is less than :obj:`k`.
266 |
267 | """
268 | if x.key < k:
269 | raise ValueError(f"The key can only decrease! New key {k!r} > old key {x.key!r}.")
270 | x.key = k
271 | y = x.parent
272 | if y is not None and x < y:
273 | self._cut(x, y)
274 | self._cascading_cut(y)
275 | if x < self._min:
276 | self._min = x
277 |
278 | def __add__(self, other):
279 | if not other:
280 | return self
281 | elif not self:
282 | return other
283 | merged = FibonacciHeap(key=self.key)
284 | merged._root, merged._min = self._root, self._min
285 | merged.key = self.key
286 | last = other._root.left
287 | other._root.left = merged._root.left
288 | merged._root.left.right = other._root
289 | merged._root.left = last
290 | merged._root.left.right = merged._root
291 | if other._min < merged._min:
292 | merged._min = other._min
293 | merged._n = self._n + other._n
294 | return merged
295 |
296 | def _cut(self, x: HeapNode[T, Key], y: HeapNode[T, Key]):
297 | y.remove_child(x)
298 | self._append_root(x)
299 | x.parent = None
300 | x.mark = False
301 |
302 | def _cascading_cut(self, y: HeapNode[T, Key]):
303 | z = y.parent
304 | if z is not None:
305 | if y.mark is False:
306 | y.mark = True
307 | else:
308 | self._cut(y, z)
309 | self._cascading_cut(z)
310 |
311 | def _consolidate(self):
312 | a = [None] * self._n
313 | for x in list(self._roots):
314 | d = x.degree
315 | while a[d] is not None:
316 | y = a[d]
317 | if y < x:
318 | x, y = y, x
319 | self._link(y, x)
320 | a[d] = None
321 | d += 1
322 | a[d] = x
323 | for i in range(0, len(a)):
324 | if a[i] is not None:
325 | if a[i] <= self._min:
326 | self._min = a[i]
327 |
328 | def _link(self, y: HeapNode[T, Key], x: HeapNode[T, Key]):
329 | self._remove_root(y)
330 | y.left = y.right = y
331 | x.add_child(y)
332 | y.parent = x
333 | y.mark = False
334 |
335 | def _append_root(self, node: HeapNode[T, Key]):
336 | if self._root is None:
337 | self._root = node
338 | else:
339 | node.right = self._root.right
340 | node.left = self._root
341 | self._root.right.left = node
342 | self._root.right = node
343 |
344 | def _remove_root(self, node: HeapNode[T, Key]):
345 | if node == self._root:
346 | self._root = node.right
347 | node.left.right = node.right
348 | node.right.left = node.left
349 |
350 | def pop(self) -> T:
351 | """Returns and removes the smallest item from this heap."""
352 | while self._min is not None and self._min.deleted:
353 | self._extract_min()
354 | return self._extract_min().item
355 |
356 |
357 | class ReversedComparator(Generic[Key]):
358 | """A wrapper that reverses the semantics of its comparison operators."""
359 | def __init__(self, key: Key):
360 | self.key = key
361 |
362 | def __lt__(self, other):
363 | return self.key > other.key
364 |
365 | def __le__(self, other):
366 | return self.key >= other.key
367 |
368 | def __eq__(self, other):
369 | return self.key == other.key
370 |
371 | def __hash__(self):
372 | return hash(self.key)
373 |
374 |
375 | class MaxFibonacciHeap(Generic[T, Key], FibonacciHeap[T, ReversedComparator[Key]]):
376 | """A Fibonacci Heap that yields items in decreasing order, using a :class:`ReversedComparator`."""
377 | def __init__(self, key: Optional[Callable[[T], Key]] = None):
378 | if key is None:
379 | def key(n: T):
380 | return n
381 | super().__init__(key=lambda n: ReversedComparator(key(n)))
382 |
--------------------------------------------------------------------------------
/graphtage/json.py:
--------------------------------------------------------------------------------
1 | """A :class:`graphtage.Filetype` for parsing, diffing, and rendering `JSON files`_.
2 |
3 | .. _JSON files:
4 | https://tools.ietf.org/html/std90
5 |
6 | """
7 |
8 | import json
9 | import json5
10 | import os
11 | from typing import Optional, Union
12 |
13 | from .graphtage import BoolNode, BuildOptions, DictNode, Filetype, FixedKeyDictNode, \
14 | FloatNode, IntegerNode, KeyValuePairNode, LeafNode, ListNode, NullNode, StringFormatter, StringNode
15 | from .printer import DEFAULT_PRINTER, Fore, Printer
16 | from .sequences import SequenceFormatter
17 | from .tree import ContainerNode, GraphtageFormatter, TreeNode
18 |
19 |
20 | def build_tree(
21 | python_obj: Union[int, float, bool, str, bytes, list, dict],
22 | options: Optional[BuildOptions] = None,
23 | force_leaf_node: bool = False) -> TreeNode:
24 | """Builds a Graphtage tree from an arbitrary Python object.
25 |
26 | Args:
27 | python_obj: The object from which to build the tree.
28 | options: An optional set of options for building the tree.
29 | force_leaf_node: If :const:`True`, assume that :obj:`python_obj` is *not* a :func:`list` or :func:`dict`.
30 |
31 | Returns:
32 | TreeNode: The resulting tree.
33 |
34 | Raises:
35 | ValueError: If :obj:`force_leaf_node` is :const:`True` and :obj:`python_obj` is *not* one of :class:`int`,
36 | :class:`float`, :class:`bool`, :class:`str`, or :class:`bytes`.
37 | ValueError: If the object is of an unsupported type.
38 |
39 | """
40 | if options is None:
41 | options = BuildOptions()
42 | if isinstance(python_obj, bool):
43 | return BoolNode(python_obj)
44 | elif isinstance(python_obj, int):
45 | return IntegerNode(python_obj)
46 | elif isinstance(python_obj, float):
47 | return FloatNode(python_obj)
48 | elif isinstance(python_obj, str):
49 | return StringNode(python_obj)
50 | elif isinstance(python_obj, bytes):
51 | return StringNode(python_obj.decode('utf-8'))
52 | elif force_leaf_node:
53 | raise ValueError(f"{python_obj!r} was expected to be an int or string, but was instead a {type(python_obj)}")
54 | elif isinstance(python_obj, list) or isinstance(python_obj, tuple):
55 | return ListNode(
56 | [build_tree(n, options=options) for n in
57 | DEFAULT_PRINTER.tqdm(python_obj, delay=2.0, desc="Loading JSON List", leave=False)],
58 | allow_list_edits=options.allow_list_edits,
59 | allow_list_edits_when_same_length=options.allow_list_edits_when_same_length
60 | )
61 | elif isinstance(python_obj, dict):
62 | dict_items = {
63 | build_tree(k, options=options, force_leaf_node=True):
64 | build_tree(v, options=options) for k, v in
65 | DEFAULT_PRINTER.tqdm(python_obj.items(), delay=2.0, desc="Loading JSON Dict", leave=False)
66 | }
67 | if options.allow_key_edits:
68 | dict_node = DictNode.from_dict(dict_items)
69 | dict_node.auto_match_keys = options.auto_match_keys
70 | return dict_node
71 | else:
72 | return FixedKeyDictNode.from_dict(dict_items)
73 | elif python_obj is None:
74 | return NullNode()
75 | else:
76 | raise ValueError(f"Unsupported Python object {python_obj!r} of type {type(python_obj)}")
77 |
78 |
79 | class JSONListFormatter(SequenceFormatter):
80 | """A sub-formatter for JSON lists."""
81 | is_partial = True
82 |
83 | def __init__(self):
84 | """Initializes the JSON list formatter.
85 |
86 | Equivalent to::
87 |
88 | super().__init__('[', ']', ',')
89 |
90 | """
91 | super().__init__('[', ']', ',')
92 |
93 | def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False):
94 | if not hasattr(printer, 'join_lists') or not printer.join_lists:
95 | printer.newline()
96 |
97 | def print_ListNode(self, *args, **kwargs):
98 | """Prints a :class:`graphtage.ListNode`.
99 |
100 | Equivalent to::
101 |
102 | super().print_SequenceNode(*args, **kwargs)
103 |
104 | """
105 | super().print_SequenceNode(*args, **kwargs)
106 |
107 | def print_SequenceNode(self, *args, **kwargs):
108 | """Prints a non-List sequence.
109 |
110 | This delegates to the parent formatter's implementation::
111 |
112 | self.parent.print(*args, **kwargs)
113 |
114 | which should invoke :meth:`JSONFormatter.print`, thereby delegating to the :class:`JSONDictFormatter` in
115 | instances where a list contains a dict.
116 |
117 | """
118 | self.parent.print(*args, **kwargs)
119 |
120 |
121 | class JSONDictFormatter(SequenceFormatter):
122 | """A sub-formatter for JSON dicts."""
123 | is_partial = True
124 |
125 | def __init__(self):
126 | super().__init__('{', '}', ',')
127 |
128 | def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False):
129 | if not hasattr(printer, 'join_dict_items') or not printer.join_dict_items:
130 | printer.newline()
131 |
132 | def print_MultiSetNode(self, *args, **kwargs):
133 | """Prints a :class:`graphtage.MultiSetNode`.
134 |
135 | Equivalent to::
136 |
137 | super().print_SequenceNode(*args, **kwargs)
138 |
139 | """
140 | super().print_SequenceNode(*args, **kwargs)
141 |
142 | def print_MappingNode(self, *args, **kwargs):
143 | """Prints a :class:`graphtage.MappingNode`.
144 |
145 | Equivalent to::
146 |
147 | super().print_SequenceNode(*args, **kwargs)
148 |
149 | """
150 | super().print_SequenceNode(*args, **kwargs)
151 |
152 | def print_SequenceNode(self, *args, **kwargs):
153 | """Prints a non-Dict sequence.
154 |
155 | This delegates to the parent formatter's implementation::
156 |
157 | self.parent.print(*args, **kwargs)
158 |
159 | which should invoke :meth:`JSONFormatter.print`, thereby delegating to the :class:`JSONListFormatter` in
160 | instances where a dict contains a list.
161 |
162 | """
163 | self.parent.print(*args, **kwargs)
164 |
165 |
166 | class JSONStringFormatter(StringFormatter):
167 | """A JSON formatter for strings."""
168 | is_partial = True
169 |
170 | def write_start_quote(self, printer: Printer, _):
171 | """Prints a starting quote for the string"""
172 | # JSON strings are always quoted
173 | self.is_quoted = True
174 | printer.write('"')
175 |
176 | def write_end_quote(self, printer: Printer, _):
177 | """Prints an ending quote for the string"""
178 | # JSON strings are always quoted
179 | self.is_quoted = True
180 | printer.write('"')
181 |
182 | def escape(self, c: str) -> str:
183 | """String escape.
184 |
185 | This function is called once for each character in the string.
186 |
187 | Returns:
188 | str: The escaped version of `c`, or `c` itself if no escaping is required.
189 |
190 | This is equivalent to::
191 |
192 | printer.write(json.dumps(c)[1:-1])
193 |
194 | """
195 | # json.dumps will enclose the string in quotes, so remove them
196 | return json.dumps(c)[1:-1]
197 |
198 |
199 | class JSONFormatter(GraphtageFormatter):
200 | """The default JSON formatter."""
201 | sub_format_types = [JSONStringFormatter, JSONListFormatter, JSONDictFormatter]
202 |
203 | def print_LeafNode(self, printer: Printer, node: LeafNode):
204 | """Prints a :class:`graphtage.LeafNode`.
205 |
206 | This is equivalent to::
207 |
208 | printer.write(json.dumps(node.object))
209 |
210 | """
211 | printer.write(json.dumps(node.object))
212 |
213 | def print_KeyValuePairNode(self, printer: Printer, node: KeyValuePairNode):
214 | """Prints a :class:`graphtage.KeyValuePairNode`.
215 |
216 | By default, the key is printed in blue, followed by a bright ": ", followed by the value.
217 |
218 | """
219 | with printer.color(Fore.BLUE):
220 | self.print(printer, node.key)
221 | with printer.bright():
222 | printer.write(": ")
223 | self.print(printer, node.value)
224 |
225 | def print_ContainerNode(self, printer: Printer, node: ContainerNode):
226 | """Prints a :class:`graphtage.ContainerNode`.
227 |
228 | This is a fallback to permit the printing of custom containers, like :class:`graphtage.xml.XMLElement`.
229 |
230 | """
231 | # Treat the container like a list
232 | list_node = ListNode((c.copy() for c in node.children()))
233 | self.print(printer, list_node)
234 |
235 |
236 | class JSON(Filetype):
237 | """The JSON file type."""
238 | def __init__(self):
239 | """Initializes the JSON file type.
240 |
241 | By default, JSON associates itself with the "json", "application/json", "application/x-javascript",
242 | "text/javascript", "text/x-javascript", and "text/x-json" MIME types.
243 |
244 | """
245 | super().__init__(
246 | 'json',
247 | 'application/json',
248 | 'application/x-javascript',
249 | 'text/javascript',
250 | 'text/x-javascript',
251 | 'text/x-json'
252 | )
253 |
254 | def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode:
255 | with open(path) as f:
256 | return build_tree(json.load(f), options)
257 |
258 | def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]:
259 | try:
260 | return self.build_tree(path=path, options=options)
261 | except json.decoder.JSONDecodeError as de:
262 | return f'Error parsing {os.path.basename(path)}: {de.msg}: line {de.lineno}, column {de.colno} ' \
263 | f'(char {de.pos})'
264 |
265 | def get_default_formatter(self) -> JSONFormatter:
266 | return JSONFormatter.DEFAULT_INSTANCE
267 |
268 |
269 | class JSON5(Filetype):
270 | """The JSON5 file type."""
271 | def __init__(self):
272 | """Initializes the JSON5 file type.
273 |
274 | By default, JSON5 associates itself with the "json5", "application/json5", and "text/x-json5" MIME types.
275 |
276 | """
277 | super().__init__(
278 | 'json5',
279 | 'application/json5',
280 | 'text/x-json5'
281 | )
282 |
283 | def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode:
284 | with open(path) as f:
285 | return build_tree(json5.load(f), options)
286 |
287 | def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]:
288 | try:
289 | return self.build_tree(path=path, options=options)
290 | except ValueError as ve:
291 | return f'Error parsing {os.path.basename(path)}: {ve:!s}'
292 |
293 | def get_default_formatter(self) -> JSONFormatter:
294 | return JSONFormatter.DEFAULT_INSTANCE
295 |
--------------------------------------------------------------------------------
/graphtage/multiset.py:
--------------------------------------------------------------------------------
1 | """A module for representing an edit on a multiset.
2 |
3 | This is used by :class:`graphtage.MultiSetNode` and :class:`graphtage.DictNode`, since the latter is a multiset containg
4 | :class:`graphtage.KeyValuePairNode` objects.
5 |
6 | """
7 |
8 | from typing import Iterator, List
9 |
10 | import graphtage
11 | from .bounds import Range
12 | from .edits import Insert, Match, Remove
13 | from .matching import WeightedBipartiteMatcher
14 | from .sequences import SequenceEdit, SequenceNode
15 | from .tree import Edit, TreeNode
16 | from .utils import HashableCounter, largest
17 |
18 |
19 | class MultiSetEdit(SequenceEdit):
20 | """An edit matching one unordered collection of items to another.
21 |
22 | It works by using a :class:`graphtage.matching.WeightedBipartiteMatcher` to find the minimum cost matching from
23 | the elements of one collection to the elements of the other.
24 |
25 | """
26 | def __init__(
27 | self,
28 | from_node: SequenceNode,
29 | to_node: SequenceNode,
30 | from_set: HashableCounter[TreeNode],
31 | to_set: HashableCounter[TreeNode],
32 | auto_match_keys: bool = True
33 | ):
34 | """Initializes the edit.
35 |
36 | Args:
37 | from_node: Any sequence node from which to match.
38 | to_node: Any sequence node to which to match.
39 | from_set: The set of nodes from which to match. These should typically be children of :obj:`from_node`, but
40 | this is neither checked nor enforced.
41 | to_set: The set of nodes to which to match. These should typically be children of :obj:`to_node`, but this
42 | is neither checked nor enforced.
43 | auto_match_keys: If `True`, any :class:`graphtage.KeyValuePairNode`s in :obj:`from_set` that have keys
44 | equal to :class:`graphtage.KeyValuePairNode`s in :obj:`to_set` will automatically be matched. Setting
45 | this to `False` will require a significant amount more computation for larger dictionaries.
46 |
47 | """
48 | self._matched_kvp_edits: List[Edit] = []
49 | if auto_match_keys:
50 | to_set = HashableCounter(to_set)
51 | from_set = HashableCounter(from_set)
52 | to_remove_from = []
53 | for f in from_set.keys():
54 | if not isinstance(f, graphtage.KeyValuePairNode):
55 | continue
56 | for t in to_set.keys():
57 | if not isinstance(f, graphtage.KeyValuePairNode):
58 | continue
59 | if f.key == t.key:
60 | num_matched = min(from_set[f], to_set[t])
61 | for _ in range(num_matched):
62 | self._matched_kvp_edits.append(f.edits(t))
63 | to_remove_from.append((f, num_matched))
64 | break
65 | else:
66 | continue
67 | to_set[t] -= num_matched
68 | for f, num_matched in to_remove_from:
69 | from_set[f] -= num_matched
70 | self.to_insert = to_set - from_set
71 | """The set of nodes in :obj:`to_set` that do not exist in :obj:`from_set`."""
72 | self.to_remove = from_set - to_set
73 | """The set of nodes in :obj:`from_set` that do not exist in :obj:`to_set`."""
74 | to_match = from_set & to_set
75 | self._edits: List[Edit] = [Match(n, n, 0) for n in to_match.elements()]
76 | self._matcher = WeightedBipartiteMatcher(
77 | from_nodes=self.to_remove.elements(),
78 | to_nodes=self.to_insert.elements(),
79 | get_edge=lambda f, t: f.edits(t)
80 | )
81 | super().__init__(
82 | from_node=from_node,
83 | to_node=to_node
84 | )
85 |
86 | def is_complete(self) -> bool:
87 | return self._matcher.is_complete()
88 |
89 | def edits(self) -> Iterator[Edit]:
90 | yield from self._edits
91 | yield from self._matched_kvp_edits
92 | remove_matched: HashableCounter[TreeNode] = HashableCounter()
93 | insert_matched: HashableCounter[TreeNode] = HashableCounter()
94 | for (rem, (ins, edit)) in self._matcher.matching.items():
95 | yield edit
96 | remove_matched[rem] += 1
97 | insert_matched[ins] += 1
98 | for rm in (self.to_remove - remove_matched).elements():
99 | yield Remove(to_remove=rm, remove_from=self.from_node)
100 | for ins in (self.to_insert - insert_matched).elements():
101 | yield Insert(to_insert=ins, insert_into=self.from_node)
102 |
103 | def tighten_bounds(self) -> bool:
104 | """Delegates to :meth:`WeightedBipartiteMatcher.tighten_bounds`."""
105 | for kvp_edit in self._matched_kvp_edits:
106 | if kvp_edit.tighten_bounds():
107 | return True
108 | return self._matcher.tighten_bounds()
109 |
110 | def bounds(self) -> Range:
111 | b = self._matcher.bounds()
112 | for kvp_edit in self._matched_kvp_edits:
113 | b = b + kvp_edit.bounds()
114 | if len(self.to_remove) > len(self.to_insert):
115 | for edit in largest(
116 | *(Remove(to_remove=r, remove_from=self.from_node) for r in self.to_remove),
117 | n=len(self.to_remove) - len(self.to_insert),
118 | key=lambda e: e.bounds()
119 | ):
120 | b = b + edit.bounds()
121 | elif len(self.to_remove) < len(self.to_insert):
122 | for edit in largest(
123 | *(Insert(to_insert=i, insert_into=self.from_node) for i in self.to_insert),
124 | n=len(self.to_insert) - len(self.to_remove),
125 | key=lambda e: e.bounds()
126 | ):
127 | b = b + edit.bounds()
128 | return b
129 |
--------------------------------------------------------------------------------
/graphtage/object_set.py:
--------------------------------------------------------------------------------
1 | """
2 | A data structure that can hold a set of unique Python objects, even if those objects are not hashable.
3 | Uniqueness is determined based upon identity.
4 | """
5 |
6 | from collections.abc import MutableSet
7 | from typing import Any, Iterable, Set
8 |
9 |
10 | class IdentityHash:
11 | def __init__(self, obj):
12 | self.obj = obj
13 |
14 | def __hash__(self):
15 | return id(self.obj)
16 |
17 | def __eq__(self, other):
18 | if not isinstance(other, IdentityHash):
19 | return False
20 | return id(self.obj) == id(other.obj)
21 |
22 |
23 | class ObjectSet(MutableSet):
24 | """A set that can hold unhashable Python objects
25 |
26 | Uniqueness is determined based upon identity.
27 |
28 | """
29 | def __init__(self, initial_objs: Iterable[Any] = ()):
30 | self.objs: Set[IdentityHash] = set()
31 | for obj in initial_objs:
32 | self.add(obj)
33 |
34 | def add(self, value):
35 | self.objs.add(IdentityHash(value))
36 |
37 | def discard(self, value):
38 | value = IdentityHash(value)
39 | self.objs.remove(value)
40 |
41 | def __contains__(self, x):
42 | x = IdentityHash(x)
43 | return x in self.objs
44 |
45 | def __len__(self):
46 | return len(self.objs)
47 |
48 | def __iter__(self):
49 | for obj in self.objs:
50 | yield obj.obj
51 |
52 | def __str__(self):
53 | return f"{{{', '.join(map(str, self.objs))}}}"
54 |
--------------------------------------------------------------------------------
/graphtage/pickle.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import Optional, Union
3 |
4 | from fickling.fickle import Interpreter, Pickled, PickleDecodeError
5 |
6 | from .graphtage import BuildOptions, Filetype, TreeNode
7 | from .pydiff import ast_to_tree, PyDiffFormatter
8 |
9 |
10 | class Pickle(Filetype):
11 | """The Python Pickle file type."""
12 | def __init__(self):
13 | """Initializes the Pickle file type.
14 |
15 | By default, Pickle associates itself with the "pickle", "application/python-pickle",
16 | and "application/x-python-pickle" MIME types.
17 |
18 | """
19 | super().__init__(
20 | 'pickle',
21 | 'application/python-pickle',
22 | 'application/x-python-pickle'
23 | )
24 |
25 | def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode:
26 | with open(path, "rb") as f:
27 | pickle = Pickled.load(f)
28 | interpreter = Interpreter(pickle)
29 | ast = interpreter.to_ast()
30 | return ast_to_tree(ast, options)
31 |
32 | def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]:
33 | try:
34 | return self.build_tree(path=path, options=options)
35 | except PickleDecodeError as e:
36 | return f'Error deserializing {os.path.basename(path)}: {e!s}'
37 |
38 | def get_default_formatter(self) -> PyDiffFormatter:
39 | return PyDiffFormatter.DEFAULT_INSTANCE
40 |
--------------------------------------------------------------------------------
/graphtage/plist.py:
--------------------------------------------------------------------------------
1 | """A :class:`graphtage.Filetype` for parsing, diffing, and rendering Apple plist files."""
2 | import os
3 | from xml.parsers.expat import ExpatError
4 | from typing import Optional, Tuple, Union
5 |
6 | from plistlib import dumps, load
7 |
8 | from . import json
9 | from .edits import Edit, EditCollection, Match
10 | from .graphtage import BoolNode, BuildOptions, Filetype, FloatNode, KeyValuePairNode, IntegerNode, LeafNode, StringNode
11 | from .printer import Printer
12 | from .sequences import SequenceFormatter, SequenceNode
13 | from .tree import ContainerNode, GraphtageFormatter, TreeNode
14 |
15 |
16 | class PLISTNode(ContainerNode):
17 | def __init__(self, root: TreeNode):
18 | self.root: TreeNode = root
19 |
20 | def to_obj(self):
21 | return self.root.to_obj()
22 |
23 | def edits(self, node: 'TreeNode') -> Edit:
24 | if isinstance(node, PLISTNode):
25 | return EditCollection(
26 | from_node=self,
27 | to_node=node,
28 | edits=iter((
29 | Match(self, node, 0),
30 | self.root.edits(node.root)
31 | )),
32 | collection=list,
33 | add_to_collection=list.append,
34 | explode_edits=False
35 | )
36 | return self.root.edits(node)
37 |
38 | def calculate_total_size(self) -> int:
39 | return self.root.calculate_total_size()
40 |
41 | def print(self, printer: Printer):
42 | printer.write(PLIST_HEADER)
43 | self.root.print(printer)
44 | printer.write(PLIST_FOOTER)
45 |
46 | def __iter__(self):
47 | yield self.root
48 |
49 | def __len__(self) -> int:
50 | return 1
51 |
52 |
53 | def build_tree(path: str, options: Optional[BuildOptions] = None, *args, **kwargs) -> PLISTNode:
54 | """Constructs a PLIST tree from an PLIST file."""
55 | with open(path, "rb") as stream:
56 | data = load(stream)
57 | return PLISTNode(json.build_tree(data, options=options, *args, **kwargs))
58 |
59 |
60 | class PLISTSequenceFormatter(SequenceFormatter):
61 | is_partial = True
62 |
63 | def __init__(self):
64 | super().__init__('', '', '')
65 |
66 | def print_SequenceNode(self, printer: Printer, node: SequenceNode):
67 | self.parent.print(printer, node)
68 |
69 | def print_ListNode(self, printer: Printer, *args, **kwargs):
70 | printer.write("")
71 | super().print_SequenceNode(printer, *args, **kwargs)
72 | printer.write("")
73 |
74 | def print_MultiSetNode(self, printer: Printer, *args, **kwargs):
75 | printer.write("")
76 | super().print_SequenceNode(printer, *args, **kwargs)
77 | printer.write("")
78 |
79 | def print_KeyValuePairNode(self, printer: Printer, node: KeyValuePairNode):
80 | printer.write("")
81 | if isinstance(node.key, StringNode):
82 | printer.write(node.key.object)
83 | else:
84 | self.print(printer, node.key)
85 | printer.write("")
86 | printer.newline()
87 | self.print(printer, node.value)
88 |
89 | print_MappingNode = print_MultiSetNode
90 |
91 |
92 | def _plist_header_footer() -> Tuple[str, str]:
93 | string = "1234567890"
94 | encoded = dumps(string).decode("utf-8")
95 | expected = f"{string}"
96 | body_offset = encoded.find(expected)
97 | if body_offset <= 0:
98 | raise ValueError("Unexpected plist encoding!")
99 | return encoded[:body_offset], encoded[body_offset+len(expected):]
100 |
101 |
102 | PLIST_HEADER: str
103 | PLIST_FOOTER: str
104 | PLIST_HEADER, PLIST_FOOTER = _plist_header_footer()
105 |
106 |
107 | class PLISTFormatter(GraphtageFormatter):
108 | sub_format_types = [PLISTSequenceFormatter]
109 |
110 | def print(self, printer: Printer, *args, **kwargs):
111 | # PLIST uses an eight-space indent
112 | printer.indent_str = " " * 8
113 | super().print(printer, *args, **kwargs)
114 |
115 | @staticmethod
116 | def write_obj(printer: Printer, obj):
117 | encoded = dumps(obj).decode("utf-8")
118 | printer.write(encoded[len(PLIST_HEADER):-len(PLIST_FOOTER)])
119 |
120 | def print_StringNode(self, printer: Printer, node: StringNode):
121 | printer.write(f"{node.object}")
122 |
123 | def print_IntegerNode(self, printer: Printer, node: IntegerNode):
124 | printer.write(f"{node.object}")
125 |
126 | def print_FloatNode(self, printer: Printer, node: FloatNode):
127 | printer.write(f"{node.object}")
128 |
129 | def print_BoolNode(self, printer, node: BoolNode):
130 | if node.object:
131 | printer.write("")
132 | else:
133 | printer.write("")
134 |
135 | def print_LeafNode(self, printer: Printer, node: LeafNode):
136 | self.write_obj(printer, node.object)
137 |
138 | def print_PLISTNode(self, printer: Printer, node: PLISTNode):
139 | printer.write(PLIST_HEADER)
140 | self.print(printer, node.root)
141 | printer.write(PLIST_FOOTER)
142 |
143 |
144 | class PLIST(Filetype):
145 | """The Apple PLIST filetype."""
146 | def __init__(self):
147 | """Initializes the PLIST file type.
148 |
149 | By default, PLIST associates itself with the "plist" and "application/x-plist" MIME types.
150 |
151 | """
152 | super().__init__(
153 | 'plist',
154 | 'application/x-plist'
155 | )
156 |
157 | def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode:
158 | tree = build_tree(path=path, options=options)
159 | for node in tree.dfs():
160 | if isinstance(node, StringNode):
161 | node.quoted = False
162 | return tree
163 |
164 | def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]:
165 | try:
166 | return self.build_tree(path=path, options=options)
167 | except ExpatError as ee:
168 | return f'Error parsing {os.path.basename(path)}: {ee})'
169 |
170 | def get_default_formatter(self) -> PLISTFormatter:
171 | return PLISTFormatter.DEFAULT_INSTANCE
172 |
--------------------------------------------------------------------------------
/graphtage/progress.py:
--------------------------------------------------------------------------------
1 | """A module for printing status messages and progress bars to the command line."""
2 |
3 | import io
4 | import sys
5 | from types import TracebackType
6 | from typing import AnyStr, Iterable, Iterator, IO, List, Optional, TextIO, Type
7 |
8 | from tqdm import tqdm, trange
9 |
10 |
11 | class StatusWriter(IO[str]):
12 | """A writer compatible with the :class:`graphtage.printer.Writer` protocol that can print status.
13 |
14 | See :meth:`StatusWriter.tqdm` and :meth:`StatusWriter.trange`. If :attr:`StatusWriter.status_stream` is either
15 | :attr:`sys.stdout` or :attr:`sys.stderr`, then bytes printed to this writer will be buffered. For each full line
16 | buffered, a call to :func:`tqdm.write` will be made.
17 |
18 | A status writer whose lifetime is not controlled by instantiation in a ``with`` block must be manually flushed
19 | with :meth:`StatusWriter.flush(final=True)` after its final write, or else the last line
20 | written may be lost.
21 |
22 | """
23 | def __init__(self, out_stream: Optional[TextIO] = None, quiet: bool = False):
24 | """Initializes a status writer.
25 |
26 | Args:
27 | out_stream: An optional stream to which to write. If omitted this defaults to :attr:`sys.stdout`.
28 | quiet: Whether or not :mod:`tqdm` status messages and progress should be suppressed.
29 |
30 | """
31 | self.quiet = quiet
32 | """Whether or not :mod:`tqdm` status messages and progress should be suppressed."""
33 | self._reentries: int = 0
34 | if out_stream is None:
35 | out_stream = sys.stdout
36 | self.status_stream: TextIO = out_stream
37 | """The status stream to which to print."""
38 | self._buffer: List[str] = []
39 | try:
40 | self.write_raw = self.quiet or (
41 | out_stream.fileno() != sys.stderr.fileno() and out_stream.fileno() != sys.stdout.fileno()
42 | )
43 | """If :const:`True`, this writer *will not* buffer output and use :func:`tqdm.write`.
44 |
45 | This defaults to::
46 |
47 | self.write_raw = self.quiet or (
48 | out_stream.fileno() != sys.stderr.fileno() and out_stream.fileno() != sys.stdout.fileno()
49 | )
50 |
51 | """
52 | except io.UnsupportedOperation as e:
53 | self.write_raw = True
54 |
55 | def tqdm(self, *args, **kwargs) -> tqdm:
56 | """Returns a :class:`tqdm.tqdm` object."""
57 | if self.quiet or 'disable' not in kwargs:
58 | kwargs['disable'] = self.quiet
59 | return tqdm(*args, **kwargs)
60 |
61 | def trange(self, *args, **kwargs) -> trange:
62 | """Returns a :class:`tqdm.trange` object."""
63 | if self.quiet or 'disable' not in kwargs:
64 | kwargs['disable'] = self.quiet
65 | return trange(*args, **kwargs)
66 |
67 | def flush(self, final=False):
68 | """Flushes this writer.
69 |
70 | If :obj:`final` is :const:`True`, any extra bytes will be flushed along with a final newline.
71 |
72 | """
73 | if final and self._buffer and not self._buffer[-1].endswith('\n'):
74 | self._buffer.append('\n')
75 | while self._buffer:
76 | if '\n' in self._buffer[0]:
77 | trailing_newline = self._buffer[0].endswith('\n')
78 | lines = self._buffer[0].split('\n')
79 | if not trailing_newline:
80 | if len(self._buffer) == 1:
81 | self._buffer.append(lines[-1])
82 | else:
83 | self._buffer[1] = f"{lines[-1]}{self._buffer[1]}"
84 | for line in lines[:-1]:
85 | tqdm.write(line, file=self.status_stream)
86 | self._buffer = self._buffer[1:]
87 | elif len(self._buffer) == 1:
88 | break
89 | else:
90 | self._buffer = [''.join(self._buffer)]
91 | return self.status_stream.flush()
92 |
93 | def write(self, text: str) -> int:
94 | if self.write_raw:
95 | return self.status_stream.write(text)
96 | self._buffer.append(text)
97 | if '\n' in text:
98 | self.flush()
99 | return len(text)
100 |
101 | def close(self) -> None:
102 | self.flush(final=True)
103 | return self.status_stream.close()
104 |
105 | def fileno(self) -> int:
106 | return self.status_stream.fileno()
107 |
108 | def isatty(self) -> bool:
109 | return self.status_stream.isatty()
110 |
111 | def read(self, n: int = ...) -> AnyStr:
112 | return self.status_stream.read(n)
113 |
114 | def readable(self) -> bool:
115 | return self.status_stream.readable()
116 |
117 | def readline(self, limit: int = ...) -> AnyStr:
118 | return self.status_stream.readline(limit)
119 |
120 | def readlines(self, hint: int = ...) -> List[AnyStr]:
121 | return self.status_stream.readlines(hint)
122 |
123 | def seek(self, offset: int, whence: int = ...) -> int:
124 | return self.status_stream.seek(offset, whence)
125 |
126 | def seekable(self) -> bool:
127 | return self.status_stream.seekable()
128 |
129 | def tell(self) -> int:
130 | return self.status_stream.tell()
131 |
132 | def truncate(self, size: Optional[int] = ...) -> int:
133 | return self.status_stream.truncate(size)
134 |
135 | def writable(self) -> bool:
136 | return self.status_stream.writable()
137 |
138 | def writelines(self, lines: Iterable[AnyStr]) -> None:
139 | return self.status_stream.writelines(lines)
140 |
141 | @property
142 | def closed(self) -> bool:
143 | return self.status_stream.closed
144 |
145 | @property
146 | def mode(self) -> str:
147 | return self.status_stream.mode
148 |
149 | @property
150 | def name(self) -> str:
151 | return self.status_stream.name
152 |
153 | def __next__(self) -> AnyStr:
154 | return next(self.status_stream)
155 |
156 | def __iter__(self) -> Iterator[AnyStr]:
157 | return iter(self.status_stream)
158 |
159 | def __enter__(self) -> IO[AnyStr]:
160 | self._reentries += 1
161 | return self
162 |
163 | def __exit__(self, t: Optional[Type[BaseException]], value: Optional[BaseException],
164 | traceback: Optional[TracebackType]) -> Optional[bool]:
165 | self._reentries -= 1
166 | if self._reentries == 0:
167 | self.flush(final=True)
168 |
169 | def __delete__(self, instance):
170 | self.flush(final=True)
171 |
--------------------------------------------------------------------------------
/graphtage/search.py:
--------------------------------------------------------------------------------
1 | """A module for solving a search problem in an iteratively revealed search space.
2 |
3 | **Given:** an iterator that yields an unknown but finite number of integer range objects, *e.g.*, ``[100, 200]``,
4 | ``[50, 1000]``, ``[60, 500]``, …. Each integer range object has a member function that is guaranteed to tighten the
5 | bounds of the range, such that the range monotonically shrinks and converges toward a specific number (*i.e.*, it
6 | conforms to the :class:`graphtage.bounds.Bounded` protocol). For example, ``[100, 200].tighten()`` →
7 | ``[150, 160].tighten()`` → ``[150, 155].tighten()`` → ``[153, 153]`` → ``153``. Each object might have a different
8 | tighten function; we cannot make any assumptions about the rate of convergence, other than that the bounds are
9 | guaranteed to shrink with each call to ``tighten()``.
10 |
11 | **Goal:** Create the most computationally efficient algorithm to determine the range object that converges to the
12 | smallest integer (*i.e.*, with the fewest possible tightenings).
13 |
14 | """
15 |
16 | from typing import Generic, Iterator, Optional, TypeVar
17 |
18 | from .bounds import Bounded, NEGATIVE_INFINITY, POSITIVE_INFINITY, Range
19 | from .fibonacci import FibonacciHeap, HeapNode
20 |
21 | B = TypeVar('B', bound=Bounded)
22 |
23 |
24 | class IterativeTighteningSearch(Bounded, Generic[B]):
25 | """Implementation of iterative tightening search on a given sequence of :class:`graphtage.bounds.Bounded` objects.
26 |
27 | The search class itself is :class:`graphtage.bounds.Bounded`, with bounds on the value of the optimal solution.
28 | Each call to :meth:`IterativeTighteningSearch.tighten_bounds` will improve these bounds, if possible.
29 |
30 | """
31 | def __init__(self,
32 | possibilities: Iterator[B],
33 | initial_bounds: Optional[Range] = None):
34 | """Initializes the search.
35 |
36 | Args:
37 | possibilities: An iterator yielding :class:`graphtage.bounded.Bounded` objects over which to search.
38 | initial_bounds: Bounds on the optimal solution, if known. Having good initial bounds can greatly speed up
39 | the search. However, if the initial bounds are incorrect (*i.e.*, if the true optimal solution lies
40 | outside of :obj:`initial_bounds`, then the resulting solution may be incorrect.
41 |
42 | """
43 |
44 | def get_range(bounded: Bounded) -> Range:
45 | return bounded.bounds()
46 |
47 | self._unprocessed: Iterator[B] = possibilities
48 |
49 | # Heap to track the ranges with the lowest upper bound
50 | self._untightened: FibonacciHeap[B, Range] = FibonacciHeap(key=get_range)
51 |
52 | # Fully tightened (`definitive`) ranges, sorted by increasing bound
53 | self._tightened: FibonacciHeap[B, Range] = FibonacciHeap(key=get_range)
54 |
55 | if initial_bounds is None:
56 | self.initial_bounds = Range(NEGATIVE_INFINITY, POSITIVE_INFINITY)
57 | else:
58 | self.initial_bounds = initial_bounds
59 |
60 | def __bool__(self):
61 | """Returns whether or not this search's bounds are :meth:`definitive`."""
62 | return bool(self._unprocessed or ((self._untightened or self._tightened) and not self.bounds().definitive()))
63 |
64 | @property
65 | def best_match(self) -> Optional[B]:
66 | """Returns the best solution the search has thus found.
67 |
68 | Returns:
69 | Optional[B]: The best solution the search has thus found, or :const:`None` if it has not yet found a
70 | feasible solution.
71 |
72 | """
73 | if self._unprocessed is not None or not (self._untightened or self._tightened):
74 | return None
75 | elif self._tightened and self._untightened:
76 | if self._untightened.peek().bounds() < self._tightened.peek().bounds():
77 | return self._untightened.peek()
78 | else:
79 | return self._tightened.peek()
80 | elif self._tightened:
81 | return self._tightened.peek()
82 | else:
83 | return self._untightened.peek()
84 |
85 | def remove_best(self) -> Optional[B]:
86 | """Removes and returns the current best solution found by the search, if one exists.
87 |
88 | This enables one to iteratively sort the input sequence. However, this function is only guaranteed to return
89 | the globally optimal item if :meth:`IterativeTighteningSearch.goal_test` returns :const:`True`. Therefore,
90 | to generate a total ordering over the input sequence, you should tighten bounds until the goal is reached before
91 | each call to this function::
92 |
93 | while search.tighten_bounds():
94 | while not search.goal_test() and search.tighten_bounds():
95 | pass
96 | if search.goal_test():
97 | yield search.remove_best()
98 | while search.goal_test():
99 | yield search.remove_best()
100 |
101 | However, if your goal is to produce a total ordering, :func:`graphtage.bounds.sort` is more efficient.
102 |
103 | """
104 | if self._unprocessed is not None or not (self._untightened or self._tightened):
105 | return None
106 | elif self._tightened and self._untightened:
107 | if self._untightened.peek().bounds() < self._tightened.peek().bounds():
108 | heap = self._untightened
109 | else:
110 | heap = self._tightened
111 | elif self._tightened:
112 | heap = self._tightened
113 | else:
114 | heap = self._untightened
115 | return heap.pop()
116 |
117 | def search(self) -> B:
118 | """Finds and returns the smallest item, fully tightened.
119 |
120 | This is equivalent to::
121 |
122 | while self.tighten_bounds():
123 | pass
124 | return self.best_match
125 |
126 | """
127 | while self.tighten_bounds():
128 | pass
129 | return self.best_match
130 |
131 | def _nodes(self) -> Iterator[HeapNode[B, Range]]:
132 | yield from self._untightened.nodes()
133 | yield from self._tightened.nodes()
134 |
135 | def bounds(self) -> Range:
136 | if self.best_match is None:
137 | return self.initial_bounds
138 | else:
139 | if self._unprocessed is None and (self._untightened or self._tightened):
140 | lb = POSITIVE_INFINITY
141 | for node in self._nodes():
142 | if not node.deleted:
143 | lb = min(node.key.lower_bound, lb)
144 | if lb == POSITIVE_INFINITY or lb < self.initial_bounds.lower_bound:
145 | lb = self.initial_bounds.lower_bound
146 | else:
147 | lb = self.initial_bounds.lower_bound
148 | return Range(min(lb, self.best_match.bounds().upper_bound), self.best_match.bounds().upper_bound)
149 |
150 | def _delete_node(self, node: HeapNode[B, Range]):
151 | self._untightened.decrease_key(node, Range(NEGATIVE_INFINITY, NEGATIVE_INFINITY))
152 | self._untightened.pop()
153 | node.deleted = True
154 |
155 | def _update_bounds(self, node: HeapNode[B, Range]):
156 | if self.best_match is not None \
157 | and self.best_match != node.item \
158 | and self.best_match.bounds().dominates(node.item.bounds()):
159 | self._delete_node(node)
160 | return
161 | elif self.initial_bounds.dominates(node.item.bounds()):
162 | self._delete_node(node)
163 | return
164 | bounds: Range = node.item.bounds()
165 | if bounds.definitive():
166 | self._delete_node(node)
167 | self._tightened.push(node.item)
168 | elif bounds.lower_bound > node.key.lower_bound:
169 | # The lower bound increased, so we need to remove and re-add the node
170 | # because the Fibonacci heap only permits making keys smaller
171 | self._untightened.decrease_key(node, Range(NEGATIVE_INFINITY, NEGATIVE_INFINITY))
172 | self._untightened.pop()
173 | self._untightened.push(node.item)
174 |
175 | def goal_test(self) -> bool:
176 | """Returns whether :meth:`best_match` is the optimal solution."""
177 | if self._unprocessed is not None:
178 | return False
179 | best = self.best_match
180 | return best is not None and best.bounds().dominates(self.bounds())
181 |
182 | def tighten_bounds(self) -> bool:
183 | starting_bounds = self.bounds()
184 | while True:
185 | if self._unprocessed is not None:
186 | try:
187 | next_best: B = next(self._unprocessed)
188 | if self.initial_bounds.lower_bound > NEGATIVE_INFINITY and \
189 | self.initial_bounds.lower_bound >= next_best.bounds().upper_bound:
190 | # We can't do any better than this choice!
191 | self._unprocessed = None
192 | self._untightened.clear()
193 | self._tightened.clear()
194 | if next_best.bounds().definitive():
195 | self._tightened.push(next_best)
196 | else:
197 | self._untightened.push(next_best)
198 | return True
199 | if starting_bounds.dominates(next_best.bounds()) or \
200 | (self.best_match is not None
201 | and self.best_match.bounds().dominates(next_best.bounds())) or \
202 | self.initial_bounds.dominates(next_best.bounds()):
203 | # No need to add this new edit if it is strictly worse than the current best!
204 | pass
205 | if next_best.bounds().definitive():
206 | self._tightened.push(next_best)
207 | else:
208 | self._untightened.push(next_best)
209 | except StopIteration:
210 | self._unprocessed = None
211 | tightened = False
212 | if self._untightened:
213 | if self._unprocessed is None:
214 | if len(self._untightened) == 1:
215 | untightened = self._untightened.peek()
216 | if untightened.tighten_bounds() and untightened.bounds().definitive():
217 | self._untightened.clear()
218 | self._tightened.push(untightened)
219 | if self.goal_test():
220 | best = self.best_match
221 | self._untightened.clear()
222 | self._tightened.clear()
223 | ret = best.tighten_bounds()
224 | if best.bounds().definitive():
225 | self._tightened.push(best)
226 | else:
227 | self._untightened.push(best)
228 | assert self.best_match == best
229 | return ret
230 | for node in list(self._untightened.min_node):
231 | if node.deleted:
232 | continue
233 | tightened = node.item.tighten_bounds()
234 | if tightened:
235 | self._update_bounds(node)
236 | break
237 | if starting_bounds.lower_bound < self.bounds().lower_bound \
238 | or starting_bounds.upper_bound > self.bounds().upper_bound:
239 | return True
240 | elif self._unprocessed is None and not tightened:
241 | return False
242 |
--------------------------------------------------------------------------------
/graphtage/version.py:
--------------------------------------------------------------------------------
1 | """A module that centralizes the version information for Graphtage.
2 |
3 | Changing the version here not only affects the version printed with the ``--version`` command line option, but it also
4 | automatically updates the version used in ``setup.py`` and rendered in the documentation.
5 |
6 | Attributes:
7 | DEV_BUILD (bool): Sets whether this build is a development build.
8 | This should only be set to :const:`True` to coincide with a release. It should *always* be :const:`True` before
9 | deploying to PyPI.
10 |
11 | If :const:`False`, the git branch will be included in :attr:`graphtage.version.__version__`.
12 |
13 | __version__ (Tuple[Union[int, str], ...]): The version of Graphtage. This tuple can contain any sequence of ints and
14 | strings. Typically this will be three ints: major/minor/revision number. However, it can contain additional
15 | ints and strings. If :attr:`graphtage.version.DEV_BUILD`, then `("git", git_branch())` will be appended to the
16 | version.
17 |
18 | VERSION_STRING (str): A rendered string containing the version of Graphtage. Each element of
19 | :attr:`graphtage.version.__version__` is appended to the string, delimited by a "." if the element is an ``int``
20 | or a "-" if the element is a string.
21 |
22 | """
23 |
24 | import os
25 | import subprocess
26 | from typing import Optional, Tuple, Union
27 |
28 |
29 | def git_branch() -> Optional[str]:
30 | """Returns the git branch for the codebase, or :const:`None` if it could not be determined.
31 |
32 | The git branch is determined by running
33 |
34 | .. code-block:: console
35 |
36 | $ git symbolic-ref -q HEAD
37 |
38 | """
39 | try:
40 | branch = subprocess.check_output(
41 | ['git', 'symbolic-ref', '-q', 'HEAD'],
42 | cwd=os.path.dirname(os.path.realpath(__file__)),
43 | stderr=subprocess.DEVNULL
44 | )
45 | branch = branch.decode('utf-8').strip().split('/')[-1]
46 | return branch
47 | except Exception:
48 | return None
49 |
50 |
51 | DEV_BUILD = False
52 | """Sets whether this build is a development build.
53 |
54 | This should only be set to :const:`False` to coincide with a release. It should *always* be :const:`False` before
55 | deploying to PyPI.
56 |
57 | If :const:`True`, the git branch will be included in the version string.
58 |
59 | """
60 |
61 |
62 | __version__: Tuple[Union[int, str], ...] = (0, 3, 1)
63 |
64 | if DEV_BUILD:
65 | branch_name = git_branch()
66 | if branch_name is None:
67 | __version__ = __version__ + ('git',)
68 | else:
69 | __version__ = __version__ + ('git', branch_name)
70 |
71 | VERSION_STRING = ''
72 |
73 | for element in __version__:
74 | if isinstance(element, int):
75 | if VERSION_STRING:
76 | VERSION_STRING += f'.{element}'
77 | else:
78 | VERSION_STRING = str(element)
79 | else:
80 | if VERSION_STRING:
81 | VERSION_STRING += f'-{element!s}'
82 | else:
83 | VERSION_STRING += str(element)
84 |
85 |
86 | if __name__ == '__main__':
87 | print(VERSION_STRING)
88 |
--------------------------------------------------------------------------------
/graphtage/yaml.py:
--------------------------------------------------------------------------------
1 | """A :class:`graphtage.Filetype` for parsing, diffing, and rendering YAML files."""
2 | import os
3 | from io import StringIO
4 | from typing import Optional, Union
5 |
6 | from yaml import dump, load_all, YAMLError
7 | try:
8 | from yaml import CLoader as Loader, CDumper as Dumper
9 | except ImportError:
10 | from yaml import Loader, Dumper
11 |
12 | from . import json
13 | from .edits import Insert, Match
14 | from .graphtage import BuildOptions, Filetype, KeyValuePairNode, LeafNode, ListNode, MappingNode, StringNode, \
15 | StringEdit, StringFormatter
16 | from .printer import Fore, Printer
17 | from .sequences import SequenceFormatter, SequenceNode
18 | from .tree import ContainerNode, Edit, GraphtageFormatter, TreeNode
19 |
20 |
21 | def build_tree(path: str, options: Optional[BuildOptions] = None, *args, **kwargs) -> TreeNode:
22 | """Constructs a YAML tree from an YAML file."""
23 | with open(path, 'rb') as stream:
24 | document_stream = load_all(stream, Loader=Loader)
25 | documents = list(document_stream)
26 | if len(documents) == 0:
27 | return json.build_tree(None, options=options, *args, **kwargs)
28 | elif len(documents) > 1:
29 | return json.build_tree(documents, options=options, *args, **kwargs)
30 | else:
31 | singleton = documents[0]
32 | return json.build_tree(singleton, options=options, *args, **kwargs)
33 |
34 |
35 | class YAMLListFormatter(SequenceFormatter):
36 | is_partial = True
37 |
38 | def __init__(self):
39 | super().__init__('', '', '')
40 |
41 | def print_SequenceNode(self, printer: Printer, node: SequenceNode):
42 | self.parent.print(printer, node)
43 |
44 | def print_ListNode(self, printer: Printer, *args, **kwargs):
45 | printer.newline()
46 | super().print_SequenceNode(printer, *args, **kwargs)
47 |
48 | def edit_print(self, printer: Printer, edit: Edit):
49 | printer.indents += 1
50 | self.print(printer, edit)
51 | printer.indents -= 1
52 |
53 | def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False):
54 | if not is_last:
55 | if not is_first:
56 | printer.newline()
57 | with printer.bright().color(Fore.WHITE):
58 | printer.write('- ')
59 |
60 | def items_indent(self, printer: Printer):
61 | return printer
62 |
63 |
64 | class YAMLKeyValuePairFormatter(GraphtageFormatter):
65 | is_partial = True
66 |
67 | def print_KeyValuePairNode(self, printer: Printer, node: KeyValuePairNode):
68 | if printer.context().fore is None:
69 | with printer.color(Fore.BLUE) as p:
70 | self.print(p, node.key)
71 | else:
72 | self.print(printer, node.key)
73 | with printer.bright().color(Fore.CYAN):
74 | printer.write(": ")
75 | if isinstance(node.value, MappingNode):
76 | printer.newline()
77 | printer.indents += 1
78 | self.parent.print(printer, node.value)
79 | printer.indents -= 1
80 | elif isinstance(node.value, SequenceNode):
81 | self.parent.parent.print(printer, node.value)
82 | else:
83 | self.print(printer, node.value)
84 |
85 |
86 | class YAMLDictFormatter(SequenceFormatter):
87 | is_partial = True
88 | sub_format_types = [YAMLKeyValuePairFormatter]
89 |
90 | def __init__(self):
91 | super().__init__('', '', '')
92 |
93 | def print_MultiSetNode(self, *args, **kwargs):
94 | super().print_SequenceNode(*args, **kwargs)
95 |
96 | def print_MappingNode(self, *args, **kwargs):
97 | super().print_SequenceNode(*args, **kwargs)
98 |
99 | def print_SequenceNode(self, *args, **kwargs):
100 | self.parent.print(*args, **kwargs)
101 |
102 | def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False):
103 | if not is_first and not is_last:
104 | printer.newline()
105 |
106 | def items_indent(self, printer: Printer):
107 | return printer
108 |
109 |
110 | class YAMLStringFormatter(StringFormatter):
111 | is_partial = True
112 | has_newline = False
113 |
114 | def write_start_quote(self, printer: Printer, edit: StringEdit):
115 | for sub_edit in edit.edit_distance.edits():
116 | if isinstance(sub_edit, Match) and '\n' in sub_edit.from_node.object:
117 | self.has_newline = True
118 | break
119 | elif isinstance(sub_edit, Insert) and '\n' in sub_edit.from_node.object:
120 | self.has_newline = True
121 | break
122 | else:
123 | self.has_newline = False
124 | if self.has_newline:
125 | printer.write('|')
126 | printer.indents += 1
127 | printer.newline()
128 |
129 | def context(self, printer: Printer):
130 | return printer
131 |
132 | def write_end_quote(self, printer: Printer, edit: StringEdit):
133 | if self.has_newline:
134 | printer.indents -= 1
135 |
136 | def print_StringNode(self, printer: Printer, node: 'StringNode'):
137 | s = node.object
138 | if '\n' in s:
139 | if printer.context().fore is None:
140 | context = printer.color(Fore.CYAN)
141 | else:
142 | context = printer
143 | with context as c:
144 | c.write('|')
145 | with c.indent():
146 | lines = s.split('\n')
147 | if lines[-1] == '':
148 | # Remove trailing newline
149 | lines = lines[:-1]
150 | for line in lines:
151 | c.newline()
152 | self.parent.write_obj(c, line)
153 | else:
154 | self.parent.write_obj(printer, s)
155 |
156 | def write_char(self, printer: Printer, c: str, index: int, num_edits: int, removed=False, inserted=False):
157 | if c == '\n':
158 | if removed or inserted:
159 | super().write_char(printer, '\u23CE', index, num_edits, removed, inserted)
160 | if not removed and index < num_edits - 1:
161 | # Do not print a trailing newline
162 | printer.newline()
163 | else:
164 | super().write_char(printer, c, index, num_edits, removed, inserted)
165 |
166 |
167 | class YAMLFormatter(GraphtageFormatter):
168 | sub_format_types = [YAMLStringFormatter, YAMLDictFormatter, YAMLListFormatter]
169 |
170 | def print(self, printer: Printer, *args, **kwargs):
171 | # YAML only gets a two-space indent
172 | printer.indent_str = ' '
173 | super().print(printer, *args, **kwargs)
174 |
175 | @staticmethod
176 | def write_obj(printer: Printer, obj):
177 | if obj == '':
178 | return
179 | s = StringIO()
180 | dump(obj, stream=s, Dumper=Dumper)
181 | ret = s.getvalue()
182 | if isinstance(obj, str) and obj.strip().startswith('#'):
183 | if ret.startswith("'"):
184 | ret = ret[1:]
185 | if ret.endswith("\n"):
186 | ret = ret[:-1]
187 | if ret.endswith("'"):
188 | ret = ret[:-1]
189 | if ret.endswith('\n...\n'):
190 | ret = ret[:-len('\n...\n')]
191 | elif ret.endswith('\n'):
192 | ret = ret[:-1]
193 | printer.write(ret)
194 |
195 | def print_LeafNode(self, printer: Printer, node: LeafNode):
196 | self.write_obj(printer, node.object)
197 |
198 | def print_ContainerNode(self, printer: Printer, node: ContainerNode):
199 | """Prints a :class:`graphtage.ContainerNode`.
200 |
201 | This is a fallback to permit the printing of custom containers, like :class:`graphtage.xml.XMLElement`.
202 |
203 | """
204 | # Treat the container like a list
205 | list_node = ListNode(node.children())
206 | self.print(printer, list_node)
207 |
208 |
209 | class YAML(Filetype):
210 | """The YAML filetype."""
211 | def __init__(self):
212 | """Initializes the YAML file type.
213 |
214 | By default, YAML associates itself with the "yaml", "application/x-yaml", "application/yaml", "text/yaml",
215 | "text/x-yaml", and "text/vnd.yaml" MIME types.
216 |
217 | """
218 | super().__init__(
219 | 'yaml',
220 | 'application/x-yaml',
221 | 'application/yaml',
222 | 'text/yaml',
223 | 'text/x-yaml',
224 | 'text/vnd.yaml'
225 | )
226 |
227 | def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode:
228 | tree = build_tree(path=path, options=options)
229 | for node in tree.dfs():
230 | if isinstance(node, StringNode):
231 | node.quoted = False
232 | return tree
233 |
234 | def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]:
235 | try:
236 | return self.build_tree(path=path, options=options)
237 | except YAMLError as ye:
238 | return f'Error parsing {os.path.basename(path)}: {ye})'
239 |
240 | def get_default_formatter(self) -> YAMLFormatter:
241 | return YAMLFormatter.DEFAULT_INSTANCE
242 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | from setuptools import setup, find_packages
3 |
4 | HERE = os.path.realpath(os.path.dirname(__file__))
5 |
6 | VERSION_MODULE_PATH = os.path.join(HERE, "graphtage", "version.py")
7 | README_PATH = os.path.join(HERE, "README.md")
8 |
9 |
10 | def get_version_string():
11 | version = {}
12 | with open(VERSION_MODULE_PATH) as f:
13 | exec(f.read(), version)
14 | return version['VERSION_STRING']
15 |
16 |
17 | def get_readme():
18 | with open(README_PATH, encoding='utf-8') as f:
19 | return f.read()
20 |
21 |
22 | setup(
23 | name='graphtage',
24 | description='A utility to diff tree-like files such as JSON and XML.',
25 | license="LGPL-3.0-or-later",
26 | long_description=get_readme(),
27 | long_description_content_type="text/markdown",
28 | url='https://github.com/trailofbits/graphtage',
29 | project_urls={
30 | 'Documentation': 'https://trailofbits.github.io/graphtage',
31 | 'Source': 'https://github.com/trailofbits/graphtage',
32 | 'Tracker': 'https://github.com/trailofbits/graphtage/issues',
33 | },
34 | author='Trail of Bits',
35 | version=get_version_string(),
36 | packages=find_packages(exclude=['test']),
37 | python_requires='>=3.8',
38 | install_requires=[
39 | "colorama",
40 | "fickling>=0.0.8",
41 | "intervaltree",
42 | "json5==0.9.5",
43 | "numpy>=1.19.4",
44 | "PyYAML",
45 | "scipy>=1.4.0",
46 | "tqdm",
47 | "typing_extensions>=3.7.4.3"
48 | ],
49 | entry_points={
50 | 'console_scripts': [
51 | 'graphtage = graphtage.__main__:main'
52 | ]
53 | },
54 | extras_require={
55 | "dev": [
56 | "flake8",
57 | "Sphinx",
58 | "pytest",
59 | "sphinx_rtd_theme==1.2.2",
60 | "twine",
61 | # workaround for https://github.com/python/importlib_metadata/issues/406:
62 | "importlib_metadata<5; python_version == '3.7'"
63 | ]
64 | },
65 | classifiers=[
66 | 'Development Status :: 4 - Beta',
67 | 'Environment :: Console',
68 | 'Intended Audience :: Science/Research',
69 | 'License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)',
70 | 'Programming Language :: Python :: 3 :: Only',
71 | 'Topic :: Utilities'
72 | ],
73 | include_package_data=True
74 | )
75 |
--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trailofbits/graphtage/23654acf488eb803a60ce27ac515ee0755feb1a7/test/__init__.py
--------------------------------------------------------------------------------
/test/test_bounds.py:
--------------------------------------------------------------------------------
1 | import random
2 | from typing import Optional
3 | from unittest import TestCase
4 |
5 | from tqdm import trange
6 |
7 | from graphtage.bounds import Bounded, make_distinct, Range, sort
8 |
9 |
10 | class RandomDecreasingRange(Bounded):
11 | def __init__(self, fixed_lb: int = 0, fixed_ub: int = 2000000, final_value: Optional[int] = None):
12 | if final_value is None:
13 | self.final_value = random.randint(fixed_lb, fixed_lb + (fixed_ub - fixed_lb) // 2)
14 | elif final_value < fixed_lb:
15 | raise ValueError(f"final_value of {final_value} < fixed lower bound of {fixed_lb}")
16 | elif final_value > fixed_ub:
17 | raise ValueError(f"final_value of {final_value} > fixed upper bound of {fixed_ub}")
18 | else:
19 | self.final_value = final_value
20 | self._lb = random.randint(fixed_lb, self.final_value)
21 | self._ub = random.randint(self.final_value, fixed_ub)
22 | self.tightenings: int = 0
23 |
24 | def bounds(self) -> Range:
25 | return Range(self._lb, self._ub)
26 |
27 | def tighten_bounds(self) -> bool:
28 | bounds_before = self.bounds()
29 | lb_diff = self.final_value - self._lb
30 | ub_diff = self._ub - self.final_value
31 | if lb_diff == ub_diff == 0:
32 | return False
33 | if lb_diff <= 1:
34 | self._lb = self.final_value
35 | else:
36 | self._lb += random.randint(max(int(0.5 * lb_diff), 1), lb_diff)
37 | if ub_diff <= 1:
38 | self._ub = self.final_value
39 | else:
40 | self._ub -= random.randint(max(int(0.5 * ub_diff), 1), ub_diff)
41 | if bounds_before.lower_bound < self._lb or bounds_before.upper_bound > self._ub:
42 | self.tightenings += 1
43 | return True
44 | else:
45 | return False
46 |
47 | def __repr__(self):
48 | return repr(self.bounds())
49 |
50 |
51 | class TestBounds(TestCase):
52 | def test_random_decreasing_range(self):
53 | for _ in range(1000):
54 | r = RandomDecreasingRange()
55 | last_range = r.bounds()
56 | while r.tighten_bounds():
57 | next_range = r.bounds()
58 | self.assertTrue(next_range.lower_bound >= last_range.lower_bound
59 | and next_range.upper_bound <= last_range.upper_bound
60 | and (
61 | next_range.lower_bound > last_range.lower_bound or
62 | next_range.upper_bound < last_range.upper_bound
63 | ))
64 | last_range = next_range
65 |
66 | def test_sort(self):
67 | for _ in trange(100):
68 | ranges = [RandomDecreasingRange() for _ in range(100)]
69 | sorted_ranges = sorted(ranges, key=lambda r: r.final_value)
70 | for expected, actual in zip(sorted_ranges, sort(ranges)):
71 | self.assertEqual(expected.final_value, actual.final_value)
72 |
73 | def test_make_distinct(self):
74 | speedups = 0
75 | tests = 0
76 | try:
77 | with trange(0, 100) as t:
78 | for i in t:
79 | ranges = [RandomDecreasingRange() for _ in range(i)]
80 | make_distinct(*ranges)
81 | last_range = None
82 | for r in sort(ranges):
83 | rbounds = r.bounds()
84 | if last_range is not None:
85 | self.assertTrue((last_range.definitive() and rbounds.definitive() and last_range == rbounds) or
86 | last_range.upper_bound < rbounds.lower_bound,
87 | f"{last_range!r} was followed by {rbounds!r}")
88 | last_range = rbounds
89 | tightenings = sum(r.tightenings for r in ranges)
90 | if tightenings > 0:
91 | untightened = 0
92 | for r in ranges:
93 | t_before = r.tightenings
94 | while r.tighten_bounds():
95 | pass
96 | untightened += r.tightenings - t_before
97 | t.desc = f"{(untightened + tightenings) / tightenings:.01f}x Speedup"
98 | speedups += (untightened + tightenings) / tightenings
99 | tests += 1
100 | finally:
101 | print(f"Average speedup: {speedups / tests:.01f}x")
102 |
--------------------------------------------------------------------------------
/test/test_builder.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 | from unittest import TestCase
3 |
4 | from graphtage import IntegerNode, ListNode, TreeNode
5 | from graphtage.builder import BasicBuilder, Builder
6 |
7 |
8 | class TestBuilder(TestCase):
9 | def test_basic_builder(self):
10 | result = BasicBuilder().build_tree([1, "a", (2, "b"), {1, 2}, {"a": "b"}, None])
11 | self.assertIsInstance(result, ListNode)
12 | self.assertEqual(6, len(result.children()))
13 |
14 | def test_custom_builder(self):
15 | test = self
16 |
17 | class Foo:
18 | def __init__(self, bar):
19 | self.bar = bar
20 |
21 | class Tester(BasicBuilder):
22 | @Builder.expander(Foo)
23 | def expand_foo(self, obj: Foo):
24 | yield obj.bar
25 |
26 | @Builder.builder(Foo)
27 | def build_foo(self, obj: Foo, children: List[TreeNode]):
28 | test.assertEqual(1, len(children))
29 | return children[0]
30 |
31 | tree = Tester().build_tree(Foo(10))
32 | self.assertIsInstance(tree, IntegerNode)
33 | self.assertEqual(10, tree.object)
34 |
--------------------------------------------------------------------------------
/test/test_constraints.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | import graphtage
4 | from graphtage.constraints import MatchIf, MatchUnless
5 | from graphtage.json import build_tree
6 | from graphtage import expressions
7 |
8 |
9 | class TestConstraints(TestCase):
10 | def test_match_if(self):
11 | expr = expressions.parse("from.key == 'foo' && to.key == 'bar'")
12 | from_tree = build_tree({
13 | "foo": [1, 2, 3]
14 | })
15 | for node in from_tree.dfs():
16 | MatchIf.apply(node, expr)
17 | to_tree = build_tree({
18 | "bar": [1, 2, 4]
19 | })
20 | diff = from_tree.diff(to_tree)
21 | self.assertIsInstance(diff.edit, graphtage.Replace)
22 |
23 | def test_match_unless(self):
24 | expr = expressions.parse("from.key == 'foo' && to.key == 'bar'")
25 | from_tree = build_tree({
26 | "foo": [1, 2, 3]
27 | })
28 | for node in from_tree.dfs():
29 | MatchUnless.apply(node, expr)
30 | to_tree = build_tree({
31 | "bar": [1, 2, 4]
32 | })
33 | diff = from_tree.diff(to_tree)
34 | self.assertIsInstance(diff.edit, graphtage.MultiSetEdit)
35 |
--------------------------------------------------------------------------------
/test/test_dataclasses.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from graphtage import IntegerNode, Replace, StringNode
4 | from graphtage.dataclasses import DataClassEdit, DataClassNode
5 |
6 |
7 | class TestDataclasses(TestCase):
8 | def test_inheritance(self):
9 | class Foo(DataClassNode):
10 | foo: IntegerNode
11 | initialized = False
12 |
13 | def post_init(self):
14 | self.initialized = True
15 |
16 | class Bar(Foo):
17 | bar: StringNode
18 | initialized = False
19 |
20 | def post_init(self):
21 | self.initialized = True
22 |
23 | self.assertEqual(("foo",), Foo._SLOTS)
24 | self.assertEqual(0, len(Foo._DATA_CLASS_ANCESTORS))
25 | self.assertEqual(("foo", "bar",), Bar._SLOTS)
26 | self.assertEqual(1, len(Bar._DATA_CLASS_ANCESTORS))
27 |
28 | b = Bar(foo=IntegerNode(10), bar=StringNode("bar"))
29 | self.assertEqual(10, b.foo.object)
30 | self.assertEqual("bar", b.bar.object)
31 | self.assertTrue(b.initialized)
32 |
33 | # now test a mixture of positional and keyword arguments
34 | b = Bar(StringNode("bar"), foo=IntegerNode(10))
35 | self.assertEqual(10, b.foo.object)
36 | self.assertEqual("bar", b.bar.object)
37 | self.assertTrue(b.initialized)
38 |
39 | # test equality
40 | self.assertEqual(Bar(IntegerNode(10), StringNode("bar")), b)
41 | self.assertNotEqual(Bar(IntegerNode(11), StringNode("bar")), b)
42 |
43 | # test diffing of different dataclasses
44 | f = Foo(IntegerNode(10))
45 | edit = f.edits(b)
46 | self.assertIsInstance(edit, Replace)
47 | c = Foo(IntegerNode(12))
48 | edit = f.edits(c)
49 | self.assertIsInstance(edit, DataClassEdit)
50 |
51 | def test_inheritance_with_duplicate(self):
52 | def define_duplicate():
53 | class BaseFoo(DataClassNode):
54 | foo: StringNode
55 |
56 | class DuplicateFoo(BaseFoo):
57 | bar: IntegerNode
58 | foo: IntegerNode
59 |
60 | self.assertRaises(TypeError, define_duplicate)
61 |
62 | def test_runtime_type_checking(self):
63 | class Foo(DataClassNode):
64 | foo: IntegerNode
65 |
66 | def try_wrong_type():
67 | return Foo(StringNode("foo"))
68 |
69 | self.assertRaises(ValueError, try_wrong_type)
70 |
--------------------------------------------------------------------------------
/test/test_expressions.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from graphtage.expressions import parse, ParseError, StringToken
4 |
5 |
6 | class TestExpressions(TestCase):
7 | def test_string_parsing(self):
8 | input_str = 'This is a test'
9 | ret = parse(f'"{input_str}"').eval()
10 | self.assertIsInstance(ret, StringToken)
11 | self.assertEqual(input_str, str(ret))
12 |
13 | def test_string_escaping(self):
14 | input_str = 'foo " bar'
15 | escaped_input = input_str.replace('"', '\\"')
16 | ret = parse(f'"{escaped_input}"').eval()
17 | self.assertIsInstance(ret, StringToken)
18 | self.assertEqual(input_str, str(ret))
19 | with self.assertRaises(ParseError):
20 | parse(f'{input_str}')
21 |
22 | def test_getitem(self):
23 | self.assertEqual(1234, parse('foo[(bar + 10) * 2]').eval({
24 | 'foo': {
25 | 40: 1234
26 | },
27 | 'bar': 10
28 | }))
29 |
30 | def test_bracket_parsing(self):
31 | with self.assertRaises(ParseError):
32 | parse('foo[bar(])')
33 | with self.assertRaises(ParseError):
34 | parse('(bar[)]')
35 |
36 | def test_evaluation(self):
37 | assignments = {
38 | 'sampling_factors': 1234,
39 | 'thumbnail_x': 5,
40 | 'thumbnail_y': 7
41 | }
42 | self.assertEqual(65, parse('(sampling_factors & -0xf0) >> 4').eval(assignments))
43 | self.assertEqual(105, parse('thumbnail_x * thumbnail_y * 3').eval(assignments))
44 |
45 | def test_functions(self):
46 | self.assertEqual(sum([1, 2, 3, 4]), parse('sum([1, 2, 3, 4])').eval())
47 | self.assertEqual('a, b, c, d', parse('", ".join(["a", "b", "c", "d"])').eval())
48 |
49 | def test_member_access(self):
50 | class Foo:
51 | def __init__(self, bar):
52 | self.bar = bar
53 |
54 | assignments = {
55 | 'foo': Foo(1234)
56 | }
57 |
58 | self.assertEqual(1234, parse('foo.bar').eval(assignments))
59 | with self.assertRaises(ParseError):
60 | parse('foo.__dict__').eval(assignments)
61 |
62 | def test_containers(self):
63 | self.assertEqual([[1, (3,)]], parse('[[1, (3,)]]').eval())
64 | self.assertEqual([1, 2, 3, 4], parse('[1, 2, 3, 4]').eval())
65 | self.assertEqual((1, 2, 3, 4), parse('(1, 2, 3, 4)').eval())
66 | self.assertEqual([[1, 2, [3], 4]], parse('[[1, 2, [3], 4]]').eval())
67 | self.assertEqual((1,), parse('(1,)').eval())
68 | self.assertEqual([1], parse('[1]').eval())
69 | with self.assertRaises(ParseError):
70 | self.assertEqual([1], parse('[1,]').eval())
71 |
--------------------------------------------------------------------------------
/test/test_fibonacci.py:
--------------------------------------------------------------------------------
1 | import random
2 | from collections import defaultdict
3 | from typing import Callable, Dict, List, Optional, Set
4 | from unittest import TestCase
5 |
6 | from tqdm import tqdm, trange
7 |
8 | from graphtage.fibonacci import FibonacciHeap, HeapNode, MaxFibonacciHeap
9 |
10 |
11 | class TestFibonacciHeap(TestCase):
12 | @classmethod
13 | def setUpClass(cls):
14 | cls.random_list: List[int] = [random.randint(0, 10000) for _ in range(10000)]
15 | cls.sorted_list: List[int] = sorted(cls.random_list)
16 |
17 | def test_duplicate_items(self):
18 | heap = FibonacciHeap()
19 | test_list = [2, 1, 2]
20 | for i in test_list:
21 | heap.push(i)
22 | heap_sorted = [heap.pop() for _ in range(len(test_list))]
23 | self.assertEqual(sorted(test_list), heap_sorted)
24 |
25 | def random_heap(self) -> FibonacciHeap[int, int]:
26 | heap: FibonacciHeap[int, int] = FibonacciHeap()
27 | for rand_int in self.random_list:
28 | heap.push(rand_int)
29 | return heap
30 |
31 | def random_max_heap(self, key: Optional[Callable[[int], int]] = None) -> MaxFibonacciHeap[int, int]:
32 | heap: FibonacciHeap[int, int] = MaxFibonacciHeap(key=key)
33 | for rand_int in self.random_list:
34 | heap.push(rand_int)
35 | return heap
36 |
37 | def test_fibonacci_heap(self):
38 | heap = self.random_heap()
39 | heap_sorted = [heap.pop() for _ in range(len(self.random_list))]
40 | self.assertEqual(self.sorted_list, heap_sorted)
41 |
42 | def test_max_fibonacci_heap(self):
43 | heap = self.random_max_heap()
44 | heap_sorted = [heap.pop() for _ in range(len(self.random_list))]
45 | self.assertEqual(list(reversed(self.sorted_list)), heap_sorted)
46 |
47 | def test_max_fibonacci_heap_with_key(self):
48 | heap = self.random_max_heap(key=lambda i: -i)
49 | heap_sorted = [heap.pop() for _ in range(len(self.random_list))]
50 | self.assertEqual(self.sorted_list, heap_sorted)
51 |
52 | def test_node_traversal(self):
53 | heap = self.random_heap()
54 | self.assertEqual(sum(1 for _ in heap.nodes()), len(heap))
55 |
56 | def test_manual_node_deletion(self):
57 | heap = self.random_heap()
58 | for i in trange(len(self.random_list)//20):
59 | random_node: HeapNode[int, int] = random.choice(list(heap.nodes()))
60 | heap.decrease_key(random_node, -1)
61 | heap.pop()
62 | random_node.deleted = True
63 | self.assertEqual(len(heap), len(self.random_list) - i - 1)
64 |
65 | def test_node_deletion(self):
66 | heap = self.random_heap()
67 | for i in trange(len(self.random_list)//20):
68 | random_node: HeapNode[int, int] = random.choice(list(heap.nodes()))
69 | heap.remove(random_node)
70 | self.assertEqual(len(heap), len(self.random_list) - i - 1)
71 |
72 | def test_decrease_key(self):
73 | heap = self.random_heap()
74 | nodes_by_value: Dict[int, Set[HeapNode[int, int]]] = defaultdict(set)
75 | for node in heap.nodes():
76 | nodes_by_value[node.key].add(node)
77 | changes: Dict[int, int] = {}
78 | for _ in trange(len(self.random_list)//20):
79 | while True:
80 | random_sorted_index = random.randint(0, len(self.random_list) - 1)
81 | if random_sorted_index not in changes:
82 | break
83 | random_node: HeapNode[int, int] = next(iter(nodes_by_value[self.sorted_list[random_sorted_index]]))
84 | self.assertEqual(random_node.key, self.sorted_list[random_sorted_index])
85 | if random_node.key <= 0:
86 | continue
87 | new_key = random.randint(0, random_node.key - 1)
88 | nodes_by_value[random_node.key].remove(random_node)
89 | nodes_by_value[new_key].add(random_node)
90 | changes[random_sorted_index] = new_key
91 | heap.decrease_key(random_node, new_key)
92 | updated_list = []
93 | for i, expected in enumerate(self.sorted_list):
94 | if i in changes:
95 | updated_list.append(changes[i])
96 | else:
97 | updated_list.append(expected)
98 | expected_list = sorted(updated_list)
99 | for expected in tqdm(expected_list):
100 | node = heap.min_node
101 | heap.pop()
102 | self.assertEqual(node.key, expected)
103 |
--------------------------------------------------------------------------------
/test/test_formatting.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import json
3 | import plistlib
4 | import random
5 | from functools import partial, wraps
6 | from io import StringIO
7 | from typing import FrozenSet, Optional, Tuple, Type, Union
8 | from unittest import TestCase
9 |
10 | import yaml
11 | from tqdm import trange
12 |
13 | import graphtage
14 | from graphtage import xml
15 |
16 |
17 | STR_BYTES: FrozenSet[str] = frozenset([
18 | chr(i) for i in range(32, 127)
19 | ] + ['\n', '\t', '\r'])
20 | LETTERS: Tuple[str, ...] = tuple(
21 | chr(i) for i in range(ord('a'), ord('z'))
22 | ) + tuple(
23 | chr(i) for i in range(ord('A'), ord('Z'))
24 | )
25 |
26 | FILETYPE_TEST_PREFIX = 'test_'
27 | FILETYPE_TEST_SUFFIX = '_formatting'
28 |
29 |
30 | def filetype_test(test_func=None, *, test_equality: bool = True, iterations: int = 1000):
31 | if test_func is None:
32 | return partial(filetype_test, test_equality=test_equality, iterations=iterations)
33 |
34 | @wraps(test_func)
35 | def wrapper(self: 'TestFormatting'):
36 | name = test_func.__name__
37 | if not name.startswith(FILETYPE_TEST_PREFIX):
38 | raise ValueError(f'@filetype_test {name} must start with "{FILETYPE_TEST_PREFIX}"')
39 | elif not name.endswith(FILETYPE_TEST_SUFFIX):
40 | raise ValueError(f'@filetype_test {name} must end with "{FILETYPE_TEST_SUFFIX}"')
41 | filetype_name = name[len(FILETYPE_TEST_PREFIX):-len(FILETYPE_TEST_SUFFIX)]
42 | if filetype_name not in graphtage.FILETYPES_BY_TYPENAME:
43 | raise ValueError(f'Filetype "{filetype_name}" for @filetype_test {name} not found in graphtage.FILETYPES_BY_TYPENAME')
44 | filetype = graphtage.FILETYPES_BY_TYPENAME[filetype_name]
45 | formatter = filetype.get_default_formatter()
46 |
47 | for _ in trange(iterations):
48 | orig_obj, representation = test_func(self)
49 | if isinstance(representation, str):
50 | representation = representation.encode("utf-8")
51 | with graphtage.utils.Tempfile(representation) as t:
52 | tree = filetype.build_tree(t)
53 | stream = StringIO()
54 | printer = graphtage.printer.Printer(out_stream=stream, ansi_color=False)
55 | formatter.print(printer, tree)
56 | formatted_str = stream.getvalue()
57 | with graphtage.utils.Tempfile(formatted_str.encode('utf-8')) as t:
58 | try:
59 | new_obj = filetype.build_tree(t)
60 | except Exception as e:
61 | self.fail(f"""{filetype_name.upper()} decode error {e}: Original object:
62 | {orig_obj!r}
63 | Expected format:
64 | {representation.decode("utf-8")}
65 | Actual format:
66 | {formatted_str!s}""")
67 | if test_equality:
68 | self.assertEqual(tree, new_obj)
69 |
70 | return wrapper
71 |
72 |
73 | class TestFormatting(TestCase):
74 | @staticmethod
75 | def make_random_int() -> int:
76 | return random.randint(-1000000, 1000000)
77 |
78 | @staticmethod
79 | def make_random_float() -> float:
80 | return random.random()
81 |
82 | @staticmethod
83 | def make_random_bool() -> bool:
84 | return random.choice([True, False])
85 |
86 | @staticmethod
87 | def make_random_str(exclude_bytes: FrozenSet[str] = frozenset(), allow_empty_strings: bool = True) -> str:
88 | if allow_empty_strings:
89 | min_length = 0
90 | else:
91 | min_length = 1
92 | return ''.join(random.choices(list(STR_BYTES - exclude_bytes), k=random.randint(min_length, 128)))
93 |
94 | @staticmethod
95 | def make_random_non_container(exclude_bytes: FrozenSet[str] = frozenset(), allow_empty_strings: bool = True):
96 | return random.choice([
97 | TestFormatting.make_random_int,
98 | TestFormatting.make_random_bool,
99 | TestFormatting.make_random_float,
100 | partial(
101 | TestFormatting.make_random_str, exclude_bytes=exclude_bytes, allow_empty_strings=allow_empty_strings
102 | )
103 | ])()
104 |
105 | @staticmethod
106 | def _make_random_obj(obj_stack, force_container_type: Optional[Type[Union[dict, list]]] = None, *args, **kwargs):
107 | r = random.random()
108 | NON_CONTAINER_PROB = 0.1
109 | CONTAINER_PROB = (1.0 - NON_CONTAINER_PROB) / 2.0
110 | if r <= NON_CONTAINER_PROB:
111 | ret = TestFormatting.make_random_non_container(*args, **kwargs)
112 | elif r <= NON_CONTAINER_PROB + CONTAINER_PROB:
113 | if force_container_type is not None:
114 | ret = force_container_type()
115 | else:
116 | ret = []
117 | obj_stack.append(ret)
118 | else:
119 | if force_container_type is not None:
120 | ret = force_container_type()
121 | else:
122 | ret = {}
123 | obj_stack.append(ret)
124 | return ret
125 |
126 | @staticmethod
127 | def make_random_obj(
128 | force_string_keys: bool = False,
129 | allow_empty_containers: bool = True,
130 | alternate_containers: bool = False,
131 | *args, **kwargs):
132 | obj_stack = []
133 | ret = TestFormatting._make_random_obj(obj_stack, *args, **kwargs)
134 |
135 | while obj_stack:
136 | expanding = obj_stack.pop()
137 | size = int(random.betavariate(0.75, 5) * 10)
138 | if isinstance(expanding, dict):
139 | if size == 0 and not allow_empty_containers:
140 | if force_string_keys:
141 | expanding[TestFormatting.make_random_str(*args, **kwargs)] = \
142 | TestFormatting.make_random_non_container(*args, **kwargs)
143 | else:
144 | expanding[TestFormatting.make_random_non_container(*args, **kwargs)] = \
145 | TestFormatting.make_random_non_container(*args, **kwargs)
146 | else:
147 | if alternate_containers:
148 | force_container_type = list
149 | else:
150 | force_container_type = None
151 | for _ in range(size):
152 | if force_string_keys:
153 | expanding[TestFormatting.make_random_str(*args, **kwargs)] = \
154 | TestFormatting._make_random_obj(
155 | obj_stack, force_container_type=force_container_type, *args, **kwargs
156 | )
157 | else:
158 | expanding[TestFormatting.make_random_non_container(*args, **kwargs)] = \
159 | TestFormatting._make_random_obj(
160 | obj_stack, force_container_type=force_container_type, *args, **kwargs
161 | )
162 | else:
163 | if size == 0 and not allow_empty_containers:
164 | expanding.append(TestFormatting.make_random_non_container(*args, **kwargs))
165 | else:
166 | if alternate_containers:
167 | force_container_type = dict
168 | else:
169 | force_container_type = None
170 | for _ in range(size):
171 | expanding.append(TestFormatting._make_random_obj(
172 | obj_stack, force_container_type=force_container_type, *args, **kwargs
173 | ))
174 | return ret
175 |
176 | def test_formatter_coverage(self):
177 | for name in graphtage.FILETYPES_BY_TYPENAME.keys():
178 | if not hasattr(self, f'test_{name}_formatting'):
179 | self.fail(f"Filetype {name} is missing a `test_{name}_formatting` test function")
180 |
181 | @filetype_test
182 | def test_json_formatting(self):
183 | orig_obj = TestFormatting.make_random_obj(force_string_keys=True)
184 | return orig_obj, json.dumps(orig_obj)
185 |
186 | @filetype_test
187 | def test_csv_formatting(self):
188 | orig_obj = [
189 | [TestFormatting.make_random_non_container(
190 | exclude_bytes=frozenset('\n\r\t,"\'')
191 | ) for _ in range(random.randint(0, 10))]
192 | for _ in range(random.randint(0, 10))
193 | ]
194 | s = StringIO()
195 | writer = csv.writer(s)
196 | for row in orig_obj:
197 | writer.writerow(row)
198 | return orig_obj, s.getvalue()
199 |
200 | @staticmethod
201 | def make_random_xml() -> xml.XMLElementObj:
202 | ret = xml.XMLElementObj('', {})
203 | elem_stack = [ret]
204 | while elem_stack:
205 | elem = elem_stack.pop()
206 | elem.tag = ''.join(random.choices(LETTERS, k=random.randint(1, 20)))
207 | elem.attrib = {
208 | ''.join(random.choices(LETTERS, k=random.randint(1, 10))): TestFormatting.make_random_str()
209 | for _ in range(int(random.betavariate(0.75, 5) * 10))
210 | }
211 | if random.random() <= 0.5:
212 | elem.text = TestFormatting.make_random_str()
213 | elem.children = [xml.XMLElementObj('', {}) for _ in range(int(random.betavariate(0.75, 5) * 10))]
214 | elem_stack.extend(elem.children)
215 | return ret
216 |
217 | # Do not test equality for XML because the XMLFormatter auto-indents and thereby adds extra spaces to element text
218 | @filetype_test(test_equality=False, iterations=250)
219 | def test_xml_formatting(self):
220 | orig_obj = self.make_random_xml()
221 | return orig_obj, str(orig_obj)
222 |
223 | def test_html_formatting(self):
224 | # For now, HTML support is implemented through XML, so we don't need a separate test.
225 | # However, test_formatter_coverage will complain unless this function is here!
226 | pass
227 |
228 | def test_json5_formatting(self):
229 | # For now, JSON5 support is implemented using the regular JSON formatter, so we don't need a separate test.
230 | # However, test_formatter_coverage will complain unless this function is here!
231 | pass
232 |
233 | def test_pickle_formatting(self):
234 | # test_formatter_coverage will complain unless this function is here!
235 | # TODO: Implement a Pickle formatting test
236 | pass
237 |
238 | @filetype_test
239 | def test_yaml_formatting(self):
240 | orig_obj = TestFormatting.make_random_obj(
241 | allow_empty_containers=False,
242 | # The YAML formatter doesn't properly handle certain special characters
243 | # TODO: Relax the excluded bytes in the following argument once the formatter properly handles special chars
244 | exclude_bytes=frozenset('\t \\\'"\r:[]{}&\n()`|+%<>#*^%$@!~_+-=.,;\n?/'),
245 | # The YAML formatter doesn't properly handle nested lists yet
246 | # TODO: Remove the next argument once the formatter properly formats nested lists
247 | alternate_containers=True,
248 | # The YAML formatter also doesn't properly handle empty strings that are dict keys:
249 | # TODO: Remove the next argument once the formatter properly formats empty strings as dict keys
250 | allow_empty_strings=False
251 | )
252 |
253 | s = StringIO()
254 | yaml.dump(orig_obj, s, Dumper=graphtage.yaml.Dumper)
255 | return orig_obj, s.getvalue()
256 |
257 | @filetype_test(test_equality=False)
258 | def test_plist_formatting(self):
259 | orig_obj = TestFormatting.make_random_obj(force_string_keys=True, exclude_bytes=frozenset('<>/\n&?|@{}[]'))
260 | return orig_obj, plistlib.dumps(orig_obj)
261 |
--------------------------------------------------------------------------------
/test/test_graphtage.py:
--------------------------------------------------------------------------------
1 | from io import StringIO
2 | from unittest import TestCase
3 |
4 | import graphtage
5 | import graphtage.json
6 | import graphtage.multiset
7 | import graphtage.tree
8 |
9 | from graphtage.printer import Printer
10 |
11 |
12 | class TestGraphtage(TestCase):
13 | @classmethod
14 | def setUpClass(cls):
15 | cls.small_from = graphtage.json.build_tree({
16 | "test": "foo",
17 | "baz": 1
18 | })
19 | cls.small_to = graphtage.json.build_tree({
20 | "test": "bar",
21 | "baz": 2
22 | })
23 | cls.list_from = graphtage.json.build_tree([0, 1, 2, 3, 4, 5])
24 | cls.list_to = graphtage.json.build_tree([1, 2, 3, 4, 5])
25 |
26 | def test_string_diff_printing(self):
27 | s1 = graphtage.StringNode("abcdef")
28 | s2 = graphtage.StringNode("azced")
29 | diff = s1.diff(s2)
30 | out_stream = StringIO()
31 | p = Printer(ansi_color=True, out_stream=out_stream)
32 | diff.print(p)
33 | self.assertEqual(diff.edited_cost(), 5)
34 | self.assertEqual('\x1b[32m"\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32ma\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1mz̟\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1mb̶\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32mc\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1md̶\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32me\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1md̟\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1mf̶\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m"\x1b[39m', out_stream.getvalue())
35 |
36 | def test_string_diff_remove_insert_reordering(self):
37 | s1 = graphtage.StringNode('abcdefg')
38 | s2 = graphtage.StringNode('abhijfg')
39 | diff = s1.diff(s2)
40 | out_stream = StringIO()
41 | p = Printer(ansi_color=True, out_stream=out_stream)
42 | diff.print(p)
43 | self.assertEqual('\x1b[32m"\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32ma\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32mb\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1mh̟\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1mi̟\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1mj̟\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1mc̶\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1md̶\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[41m\x1b[1me̶\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32mf\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32mg\x1b[37m\x1b[41m\x1b[1m\x1b[0m\x1b[49m\x1b[32m\x1b[37m\x1b[42m\x1b[1m\x1b[0m\x1b[49m\x1b[32m"\x1b[39m', out_stream.getvalue())
44 |
45 | def test_small_diff(self):
46 | diff = self.small_from.diff(self.small_to)
47 | self.assertIsInstance(diff, graphtage.DictNode)
48 | self.assertIsInstance(diff, graphtage.tree.EditedTreeNode)
49 | self.assertEqual(1, len(diff.edit_list))
50 | self.assertIsInstance(diff.edit_list[0], graphtage.multiset.MultiSetEdit)
51 | has_test_match = False
52 | has_baz_match = False
53 | for edit in diff.edit_list[0].edits():
54 | if edit.bounds().upper_bound > 0:
55 | self.assertIsInstance(edit, graphtage.KeyValuePairEdit)
56 | key_edit = edit.key_edit
57 | value_edit = edit.value_edit
58 | if isinstance(value_edit.from_node, graphtage.StringNode):
59 | self.assertIsInstance(key_edit.to_node, graphtage.StringNode)
60 | self.assertEqual(key_edit.from_node.object, 'test')
61 | self.assertEqual(value_edit.from_node.object, 'foo')
62 | self.assertEqual(value_edit.to_node.object, 'bar')
63 | self.assertEqual(edit.bounds().upper_bound, 6)
64 | self.assertFalse(has_test_match)
65 | has_test_match = True
66 | elif isinstance(value_edit.from_node, graphtage.IntegerNode):
67 | self.assertIsInstance(value_edit.to_node, graphtage.IntegerNode)
68 | self.assertEqual(value_edit.from_node.object, 1)
69 | self.assertEqual(value_edit.to_node.object, 2)
70 | self.assertEqual(value_edit.bounds().upper_bound, 1)
71 | self.assertFalse(has_baz_match)
72 | has_baz_match = True
73 | else:
74 | self.fail()
75 | self.assertTrue(has_test_match)
76 | self.assertTrue(has_baz_match)
77 |
78 | def test_list_diff(self):
79 | diff = self.list_from.diff(self.list_to)
80 | self.assertIsInstance(diff, graphtage.ListNode)
81 | self.assertIsInstance(diff, graphtage.tree.EditedTreeNode)
82 | self.assertEqual(1, len(diff.edit_list))
83 | self.assertIsInstance(diff.edit_list[0], graphtage.EditDistance)
84 | for edit in diff.edit_list[0].edits():
85 | if edit.bounds().upper_bound > 0:
86 | self.assertIsInstance(edit, graphtage.Remove)
87 | self.assertIsInstance(edit.from_node, graphtage.IntegerNode)
88 | self.assertEqual(edit.from_node.object, 0)
89 | self.assertIsInstance(edit.to_node, graphtage.ListNode)
90 | self.assertEqual(edit.to_node, self.list_from)
91 | else:
92 | self.assertIsInstance(edit, graphtage.Match)
93 |
94 | def test_single_element_list(self):
95 | diff = graphtage.json.build_tree([1]).diff(graphtage.json.build_tree([2]))
96 | self.assertIsInstance(diff, graphtage.ListNode)
97 | self.assertIsInstance(diff, graphtage.tree.EditedTreeNode)
98 | self.assertEqual(1, len(diff.edit_list))
99 | self.assertIsInstance(diff.edit_list[0], graphtage.FixedLengthSequenceEdit)
100 |
101 | def test_empty_list(self):
102 | diff = graphtage.ListNode(()).diff(graphtage.ListNode(()))
103 | self.assertEqual(1, len(diff.edit_list))
104 | self.assertIsInstance(diff.edit_list[0], graphtage.Match)
105 | self.assertEqual(0, diff.edit_list[0].bounds().upper_bound)
106 |
107 | def test_null_json(self):
108 | diff = graphtage.json.build_tree([None]).diff(graphtage.json.build_tree([1]))
109 | self.assertIsInstance(diff, graphtage.ListNode)
110 | self.assertIsInstance(diff, graphtage.tree.EditedTreeNode)
111 | self.assertEqual(1, len(diff.edit_list))
112 | self.assertIsInstance(diff.edit_list[0], graphtage.FixedLengthSequenceEdit)
113 |
--------------------------------------------------------------------------------
/test/test_levenshtein.py:
--------------------------------------------------------------------------------
1 | import random
2 | from typing import List
3 | from unittest import TestCase
4 |
5 | from tqdm import trange
6 |
7 | from graphtage.edits import Edit, Insert, Match, Remove
8 | from graphtage import EditDistance, string_edit_distance
9 |
10 |
11 | LETTERS: str = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
12 |
13 |
14 | class TestEditDistance(TestCase):
15 | def test_string_edit_distance_reconstruction(self):
16 | for _ in trange(200):
17 | str1_len = random.randint(10, 30)
18 | str2_len = random.randint(10, 30)
19 | str_from = ''.join(random.choices(LETTERS, k=str1_len))
20 | str_to = ''.join(random.choices(LETTERS, k=str2_len))
21 | distance: EditDistance = string_edit_distance(str_from, str_to)
22 | edits: List[Edit] = list(distance.edits())
23 | reconstructed_from = ''
24 | reconstructed_to = ''
25 | for edit in edits:
26 | if isinstance(edit, Match):
27 | reconstructed_from += edit.from_node.object
28 | reconstructed_to += edit.to_node.object
29 | elif isinstance(edit, Remove):
30 | reconstructed_from += edit.from_node.object
31 | elif isinstance(edit, Insert):
32 | reconstructed_to += edit.from_node.object
33 | else:
34 | self.fail()
35 | self.assertEqual(str_from, reconstructed_from)
36 | self.assertEqual(str_to, reconstructed_to)
37 |
38 | def test_string_edit_distance_optimality(self):
39 | for _ in trange(200):
40 | str_len = random.randint(10, 30)
41 | str_from = ''.join(random.choices(LETTERS, k=str_len))
42 | num_ground_truth_edits: int = 0
43 | str_to = ''
44 | for i in range(str_len):
45 | while random.random() < 0.2:
46 | # 20% chance of inserting a new character
47 | str_to += random.choice(LETTERS)
48 | num_ground_truth_edits += 1
49 | num_ground_truth_edits += 1
50 | if random.random() < 0.2:
51 | # 20% chance of removing the original character
52 | pass
53 | else:
54 | str_to += str_from[i]
55 | distance: EditDistance = string_edit_distance(str_from, str_to)
56 | edits: List[Edit] = list(distance.edits())
57 | num_edits = len(edits)
58 | if num_ground_truth_edits < num_edits:
59 | print()
60 | print('\n'.join([e.__class__.__name__ for e in edits]))
61 | print(str_from, str_to)
62 | self.assertGreaterEqual(num_ground_truth_edits, num_edits)
63 |
64 | def test_empty_string_edit_distance(self):
65 | with self.assertRaises(StopIteration):
66 | next(string_edit_distance('', '').edits())
67 | self.assertEqual(
68 | 3,
69 | sum(1 for _ in string_edit_distance('foo', '').edits())
70 | )
71 | self.assertEqual(
72 | 3,
73 | sum(1 for _ in string_edit_distance('', 'foo').edits())
74 | )
75 |
--------------------------------------------------------------------------------
/test/test_matching.py:
--------------------------------------------------------------------------------
1 | import itertools
2 | import random
3 | from unittest import TestCase
4 |
5 | import numpy as np
6 | from tqdm import tqdm, trange
7 |
8 | from graphtage.matching import get_dtype, min_weight_bipartite_matching, WeightedBipartiteMatcher
9 |
10 | from .test_bounds import RandomDecreasingRange
11 |
12 |
13 | class TestWeightedBipartiteMatcher(TestCase):
14 | def test_weighted_bipartite_matching(self):
15 | for n in trange(1, 25, 3):
16 | from_nodes = list(range(n))
17 | to_nodes = list(range(n))
18 | edges = [
19 | [RandomDecreasingRange() for _ in range(len(to_nodes))] for _ in range(len(from_nodes))
20 | ]
21 | for i in range(min(len(from_nodes), len(to_nodes))):
22 | edges[i][i] = RandomDecreasingRange(fixed_lb=0, fixed_ub=100000, final_value=0)
23 | matcher = WeightedBipartiteMatcher(
24 | from_nodes=from_nodes,
25 | to_nodes=to_nodes,
26 | get_edge=lambda n1, n2: edges[n1][n2]
27 | )
28 | initial_bounds = matcher.bounds()
29 | prev_diff = initial_bounds.upper_bound - initial_bounds.lower_bound
30 | with tqdm(leave=False, total=prev_diff) as t:
31 | t.update(0)
32 | while matcher.tighten_bounds():
33 | new_bounds = matcher.bounds()
34 | new_diff = new_bounds.upper_bound - new_bounds.lower_bound
35 | self.assertLess(new_diff, prev_diff)
36 | t.update(prev_diff - new_diff)
37 | prev_diff = new_diff
38 | self.assertTrue(matcher.bounds().definitive())
39 | self.assertEqual(0, matcher.bounds().upper_bound)
40 |
41 | def test_min_weight_bipartite_matching(self):
42 | for _ in trange(50):
43 | num_from = random.randint(1, 500)
44 | num_to = random.randint(1, 500)
45 | from_nodes = [f'f{i}' for i in range(num_from)]
46 | to_nodes = [f't{i}' for i in range(num_to)]
47 | # Force an optimal, zero-value matching:
48 | expected_matching = {
49 | i: (i, 0) for i in range(min(num_from, num_to))
50 | }
51 | edges = {
52 | (from_nodes[i], to_nodes[i]): 0 for i in range(min(num_from, num_to))
53 | }
54 | edge_probability = 0.9
55 | edges.update({
56 | (i, j): random.randint(1, 2**16) for i, j in itertools.product(from_nodes, to_nodes)
57 | if (i, j) not in edges and random.random() < edge_probability
58 | })
59 |
60 | def get_edge(f, t):
61 | if (f, t) in edges:
62 | return edges[(f, t)]
63 | else:
64 | return None
65 |
66 | matching = min_weight_bipartite_matching(from_nodes=from_nodes, to_nodes=to_nodes, get_edges=get_edge)
67 |
68 | self.assertEqual(expected_matching, matching)
69 |
70 | def test_get_dtype(self):
71 | for min_range, max_range, expected in (
72 | (0, 255, np.uint8),
73 | (-1, 127, np.int8),
74 | (-128, 255, np.int16),
75 | (0, 2**64 - 1, np.uint64),
76 | (0, 2**64, int)
77 | ):
78 | actual = get_dtype(min_range, max_range)
79 | self.assertEqual(np.dtype(expected), actual)
80 |
--------------------------------------------------------------------------------
/test/test_object_set.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from graphtage.object_set import ObjectSet
4 |
5 |
6 | class UnhashableWithBrokenEquality:
7 | def __init__(self, value):
8 | self.value = value
9 |
10 | def __eq__(self, other):
11 | raise ValueError()
12 |
13 |
14 | class Unhashable(UnhashableWithBrokenEquality):
15 | def __eq__(self, other):
16 | return isinstance(other, Unhashable) and self.value == other.value
17 |
18 |
19 | class TestObjectSet(TestCase):
20 | def test_unhashability(self):
21 | self.assertRaises(TypeError, lambda: hash(Unhashable(10)))
22 |
23 | def test_object_set(self):
24 | u = Unhashable(10)
25 | u2 = Unhashable(11)
26 | objs = ObjectSet((10, u, u2))
27 | self.assertIn(10, objs)
28 | self.assertIn(u, objs)
29 | self.assertIn(u2, objs)
30 | self.assertEqual(3, len(objs))
31 | objs.remove(u)
32 | self.assertIn(10, objs)
33 | self.assertNotIn(u, objs)
34 | self.assertIn(u2, objs)
35 | self.assertEqual(2, len(objs))
36 |
37 | def test_broken_equality(self):
38 | u = UnhashableWithBrokenEquality(10)
39 | u2 = UnhashableWithBrokenEquality(10)
40 | # this will default to uniqueness by identity
41 | objs = ObjectSet((10, u, u2))
42 | self.assertIn(10, objs)
43 | self.assertIn(u, objs)
44 | self.assertIn(u2, objs)
45 | self.assertEqual(3, len(objs))
46 |
--------------------------------------------------------------------------------
/test/test_pydiff.py:
--------------------------------------------------------------------------------
1 | import dataclasses
2 | from unittest import TestCase
3 |
4 | import graphtage
5 | from graphtage.pydiff import build_tree, print_diff, PyDiffFormatter
6 |
7 | from .timing import run_with_time_limit
8 |
9 |
10 | class TestPyDiff(TestCase):
11 | def test_build_tree(self):
12 | self.assertIsInstance(build_tree([1, 2, 3, 4]), graphtage.ListNode)
13 | self.assertIsInstance(build_tree({1: 2, 'a': 'b'}), graphtage.DictNode)
14 |
15 | def test_diff(self):
16 | t1 = [1, 2, {3: "three"}, 4]
17 | t2 = [1, 2, {3: 3}, "four"]
18 | printer = graphtage.printer.Printer(ansi_color=True)
19 | print_diff(t1, t2, printer=printer)
20 |
21 | def test_custom_class(self):
22 | class Foo:
23 | def __init__(self, bar, baz):
24 | self.bar = bar
25 | self.baz = baz
26 |
27 | printer = graphtage.printer.Printer(ansi_color=True)
28 | print_diff(Foo("bar", "baz"), Foo("bar", "bak"), printer=printer)
29 |
30 | def test_nested_tuple_diff(self):
31 | tree = build_tree({"a": (1, 2)})
32 | self.assertIsInstance(tree, graphtage.DictNode)
33 | children = tree.children()
34 | self.assertEqual(1, len(children))
35 | kvp = children[0]
36 | self.assertIsInstance(kvp, graphtage.KeyValuePairNode)
37 | self.assertIsInstance(kvp.key, graphtage.StringNode)
38 | self.assertIsInstance(kvp.value, graphtage.ListNode)
39 |
40 | def test_infinite_loop(self):
41 | """Reproduces https://github.com/trailofbits/graphtage/issues/82"""
42 |
43 | @dataclasses.dataclass
44 | class Thing:
45 | foo: str
46 |
47 | with run_with_time_limit(60):
48 | _ = graphtage.pydiff.diff([Thing("ok")], [Thing("bad")])
49 |
--------------------------------------------------------------------------------
/test/test_search.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from tqdm import trange
4 |
5 | from graphtage.search import IterativeTighteningSearch
6 | from .test_bounds import RandomDecreasingRange
7 |
8 |
9 | class TestIterativeTighteningSearch(TestCase):
10 | def test_iterative_tightening_search(self):
11 | speedups = 0
12 | tests = 0
13 | try:
14 | t = trange(100)
15 | for _ in t:
16 | ranges = [RandomDecreasingRange() for _ in range(100)]
17 | best_range: RandomDecreasingRange = None
18 | for r in ranges:
19 | if best_range is None or r.final_value < best_range.final_value:
20 | best_range = r
21 | search = IterativeTighteningSearch(iter(ranges))
22 | while search.tighten_bounds():
23 | pass
24 | result = search.best_match
25 | tightenings = sum(r.tightenings for r in ranges)
26 | untightened = 0
27 | for r in ranges:
28 | t_before = r.tightenings
29 | while r.tighten_bounds():
30 | pass
31 | untightened += r.tightenings - t_before
32 | t.desc = f"{(untightened + tightenings) / tightenings:.01f}x Speedup"
33 | speedups += (untightened + tightenings) / tightenings
34 | tests += 1
35 | self.assertEqual(best_range.final_value, result.final_value)
36 | finally:
37 | print(f"Average speedup: {speedups / tests:.01f}x")
38 |
--------------------------------------------------------------------------------
/test/test_timing.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from .timing import run_with_time_limit
4 |
5 |
6 | def infinite_loop():
7 | while True:
8 | pass
9 |
10 |
11 | def limited_infinite_loop():
12 | with run_with_time_limit(seconds=1):
13 | infinite_loop()
14 |
15 |
16 | class TestTiming(TestCase):
17 | def test_time_limit(self):
18 | self.assertRaises(TimeoutError, limited_infinite_loop)
19 |
20 | def test_non_infinite_loop(self):
21 | with run_with_time_limit(seconds=60):
22 | _ = 10
23 |
--------------------------------------------------------------------------------
/test/test_utils.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from unittest import TestCase
3 |
4 | from graphtage.utils import largest, smallest, SparseMatrix
5 |
6 |
7 | class TestSparseMatrix(TestCase):
8 | def test_matrix_bounds(self):
9 | matrix: SparseMatrix[int] = SparseMatrix(num_rows=10, num_cols=10, default_value=None)
10 | with self.assertRaises(IndexError):
11 | _ = matrix[matrix.num_rows]
12 | with self.assertRaises(IndexError):
13 | _ = matrix[0][matrix.num_cols]
14 |
15 | def test_matrix_default_value(self):
16 | matrix: SparseMatrix[int] = SparseMatrix(default_value=10)
17 | self.assertEqual(matrix[0][0], 10)
18 | matrix[0][0] = 11
19 | self.assertEqual(matrix[0][0], 11)
20 |
21 | def test_matrix_getsizeof(self):
22 | matrix: SparseMatrix[int] = SparseMatrix()
23 | size_before = matrix.getsizeof()
24 | dim = 1000
25 | int_sizes = 0
26 | for i in range(dim):
27 | for j in range(dim):
28 | matrix[i][j] = i * dim + j
29 | int_sizes += sys.getsizeof(matrix[i][j])
30 | size_after = matrix.getsizeof()
31 | self.assertGreaterEqual(size_after - size_before, int_sizes)
32 |
33 | def test_matrix_shape(self):
34 | matrix: SparseMatrix[int] = SparseMatrix()
35 | self.assertEqual((0, 0), matrix.shape())
36 | matrix[10][20] = 1
37 | self.assertEqual((11, 21), matrix.shape())
38 | matrix = SparseMatrix(num_rows=10, num_cols=10)
39 | self.assertEqual((10, 10), matrix.shape())
40 |
41 | def test_smallest(self):
42 | for i in smallest(range(1000), n=10):
43 | self.assertGreater(10, i)
44 | for i in smallest(*list(range(1000)), n=10):
45 | self.assertGreater(10, i)
46 |
47 | def test_largest(self):
48 | for i in largest(range(1000), n=10):
49 | self.assertLess(1000 - 11, i)
50 | for i in largest(*list(range(1000)), n=10):
51 | self.assertLess(1000 - 11, i)
52 |
--------------------------------------------------------------------------------
/test/test_xml.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 |
4 | from graphtage.utils import Tempfile
5 | from graphtage.xml import XML
6 |
7 |
8 | class TestXML(unittest.TestCase):
9 | def test_infinite_loop(self):
10 | """Reproduces https://github.com/trailofbits/graphtage/issues/32"""
11 | xml = XML.default_instance
12 | one_xml = b"""
13 |
14 |
15 | child1
16 | child2
17 |
18 |
19 | """
20 | two_xml = b"""
21 |
22 |
23 | child1
24 | child2
25 |
26 |
27 | """
28 | with Tempfile(one_xml) as one, Tempfile(two_xml) as two:
29 | t1 = xml.build_tree(one)
30 | t2 = xml.build_tree(two)
31 | for edit in t1.get_all_edits(t2):
32 | print(edit)
33 |
--------------------------------------------------------------------------------
/test/timing.py:
--------------------------------------------------------------------------------
1 | import threading
2 | import _thread
3 | from contextlib import contextmanager
4 |
5 |
6 | @contextmanager
7 | def run_with_time_limit(seconds: int):
8 | timer = threading.Timer(seconds, _thread.interrupt_main)
9 | timer.start()
10 |
11 | try:
12 | yield
13 | return
14 | except:
15 | pass
16 | finally:
17 | timer.cancel()
18 | raise TimeoutError(f"timeout after {seconds} seconds")
19 |
--------------------------------------------------------------------------------