├── .bzrignore
├── .codespellrc
├── .github
    ├── CODEOWNERS
    ├── dependabot.yml
    └── workflows
    │   ├── auto-merge.yaml
    │   ├── auto-merge.yml
    │   ├── disperse.yml
    │   ├── pythonpackage.yml
    │   └── wheels.yaml
├── .gitignore
├── AUTHORS
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── COPYING
├── Cargo.lock
├── Cargo.toml
├── MANIFEST.in
├── README.rst
├── build.cmd
├── disperse.toml
├── patiencediff
    ├── __init__.py
    ├── __main__.py
    ├── _patiencediff_c.c
    ├── _patiencediff_c.pyi
    ├── _patiencediff_py.py
    ├── _patiencediff_rs.pyi
    ├── py.typed
    └── test_patiencediff.py
├── pyproject.toml
├── setup.py
└── src
    └── lib.rs


/.bzrignore:
--------------------------------------------------------------------------------
1 | build
2 | patiencediff.egg-info
3 | dist/
4 | *~
5 | 


--------------------------------------------------------------------------------
/.codespellrc:
--------------------------------------------------------------------------------
1 | [codespell]
2 | ignore-words-list = alo
3 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @jelmer
2 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # Keep GitHub Actions up to date with GitHub's Dependabot...
 2 | # https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot
 3 | # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem
 4 | version: 2
 5 | updates:
 6 |   - package-ecosystem: "github-actions"
 7 |     directory: "/"
 8 |     schedule:
 9 |       interval: weekly
10 |   - package-ecosystem: "pip"
11 |     directory: "/"
12 |     schedule:
13 |       interval: weekly
14 | 


--------------------------------------------------------------------------------
/.github/workflows/auto-merge.yaml:
--------------------------------------------------------------------------------
 1 | name: Dependabot auto-merge
 2 | on: pull_request_target
 3 | 
 4 | permissions:
 5 |   pull-requests: write
 6 |   contents: write
 7 | 
 8 | jobs:
 9 |   dependabot:
10 |     runs-on: ubuntu-latest
11 |     if: ${{ github.actor == 'dependabot[bot]' }}
12 |     steps:
13 |       - name: Dependabot metadata
14 |         id: metadata
15 |         uses: dependabot/fetch-metadata@v2
16 |         with:
17 |           github-token: "${{ secrets.GITHUB_TOKEN }}"
18 |       - name: Enable auto-merge for Dependabot PRs
19 |         run: gh pr merge --auto --squash "$PR_URL"
20 |         env:
21 |           PR_URL: ${{github.event.pull_request.html_url}}
22 |           GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
23 | 


--------------------------------------------------------------------------------
/.github/workflows/auto-merge.yml:
--------------------------------------------------------------------------------
 1 | name: Dependabot auto-merge
 2 | on: pull_request_target
 3 | 
 4 | permissions:
 5 |   pull-requests: write
 6 |   contents: write
 7 | 
 8 | jobs:
 9 |   dependabot:
10 |     runs-on: ubuntu-latest
11 |     if: ${{ github.actor == 'dependabot[bot]' }}
12 |     steps:
13 |       - name: Dependabot metadata
14 |         id: metadata
15 |         uses: dependabot/fetch-metadata@v2
16 |         with:
17 |           github-token: "${{ secrets.GITHUB_TOKEN }}"
18 |       - name: Enable auto-merge for Dependabot PRs
19 |         run: gh pr merge --auto --squash "$PR_URL"
20 |         env:
21 |           PR_URL: ${{github.event.pull_request.html_url}}
22 |           GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
23 | 


--------------------------------------------------------------------------------
/.github/workflows/disperse.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Disperse configuration
 3 | 
 4 | "on":
 5 |   - push
 6 | 
 7 | jobs:
 8 |   build:
 9 | 
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - uses: actions/checkout@v4
14 |       - uses: jelmer/action-disperse-validate@v2
15 | 


--------------------------------------------------------------------------------
/.github/workflows/pythonpackage.yml:
--------------------------------------------------------------------------------
 1 | name: Python package
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   build:
 7 | 
 8 |     runs-on: ${{ matrix.os }}
 9 |     strategy:
10 |       matrix:
11 |         os: [ubuntu-latest, macos-latest, windows-latest]
12 |         python-version:
13 |         - '3.13'
14 |         - '3.12'
15 |         - '3.11'
16 |         - '3.10'
17 |         - '3.9'
18 |       fail-fast: false
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v4
22 |     - name: Set up Python ${{ matrix.python-version }}
23 |       uses: actions/setup-python@v5
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 |     - name: Install dependencies
27 |       run: |
28 |         python -m pip install --upgrade pip mypy setuptools setuptools-rust
29 |         pip install -U pip ".[dev]"
30 |     - name: Style checks
31 |       run: |
32 |         python -m ruff check .
33 |         python -m ruff format --check .
34 |     - name: Typing checks
35 |       run: python -m mypy patiencediff
36 |     - name: Build
37 |       run: python setup.py build_ext -i
38 |     - name: Test suite run
39 |       run: python -m unittest patiencediff.test_patiencediff
40 |       env:
41 |         PYTHONHASHSEED: random
42 | 


--------------------------------------------------------------------------------
/.github/workflows/wheels.yaml:
--------------------------------------------------------------------------------
 1 | name: Build Python distributions
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |   schedule:
 7 |   - cron: "0 6 * * *"   # Daily 6AM UTC build
 8 | 
 9 | jobs:
10 |   build-wheels:
11 |     runs-on: ${{ matrix.os }}
12 |     strategy:
13 |       matrix:
14 |         os: [ubuntu-latest, macos-latest, windows-latest]
15 |       fail-fast: true
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v4
19 |     - uses: actions/setup-python@v5
20 |     - name: Install dependencies
21 |       run: |
22 |         python -m pip install --upgrade pip
23 |         pip install setuptools wheel cibuildwheel
24 |     - name: Set up QEMU
25 |       uses: docker/setup-qemu-action@v3
26 |       if: "matrix.os == 'ubuntu-latest'"
27 |     - name: Build wheels
28 |       run: python -m cibuildwheel --output-dir wheelhouse
29 |     - name: Upload wheels
30 |       uses: actions/upload-artifact@v4
31 |       with:
32 |         name: artifact-${{ matrix.os }}
33 |         path: ./wheelhouse/*.whl
34 | 
35 |   build-sdist:
36 |     runs-on: ubuntu-latest
37 |     steps:
38 |     - uses: actions/checkout@v4
39 |     - uses: actions/setup-python@v5
40 |     - name: Install dependencies
41 |       run: |
42 |         python -m pip install --upgrade pip
43 |         pip install build
44 |     - name: Build sdist
45 |       run: python -m build --sdist
46 |     - name: Upload sdist
47 |       uses: actions/upload-artifact@v4
48 |       with:
49 |         name: artifact-source
50 |         path: ./dist/*.tar.gz
51 | 
52 |   test-sdist:
53 |     needs:
54 |     - build-sdist
55 |     runs-on: ubuntu-latest
56 |     steps:
57 |     - uses: actions/setup-python@v5
58 |     - name: Install dependencies
59 |       run: |
60 |         python -m pip install --upgrade pip
61 |         # Upgrade packging to avoid a bug in twine.
62 |         # See https://github.com/pypa/twine/issues/1216
63 |         pip install "twine>=6.1.0" "packaging>=24.2"
64 |     - name: Download sdist
65 |       uses: actions/download-artifact@v4
66 |       with:
67 |         name: artifact-source
68 |         path: dist
69 |     - name: Test sdist
70 |       run: twine check dist/*
71 |     - name: Test installation from sdist
72 |       run: pip install dist/*.tar.gz
73 | 
74 |   publish:
75 |     runs-on: ubuntu-latest
76 |     needs:
77 |     - build-wheels
78 |     - build-sdist
79 |     if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
80 |     permissions:
81 |       id-token: write
82 |     environment:
83 |       name: pypi
84 |       url: https://pypi.org/p/patiencediff
85 |     steps:
86 |     - name: Download distributions
87 |       uses: actions/download-artifact@v4
88 |       with:
89 |         merge-multiple: true
90 |         pattern: artifact-*
91 |         path: dist
92 |     - name: Publish package distributions to PyPI
93 |       uses: pypa/gh-action-pypi-publish@release/v1
94 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | dist
 2 | build
 3 | __pycache__
 4 | *~
 5 | *.so
 6 | *.pyc
 7 | patiencediff.egg-info
 8 | target/
 9 | 
10 | **/.claude/settings.local.json
11 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | John Arbash Meinel <john@arbash-meinel.com>
2 | Lukáš Lalinský <lalinsky@gmail.com>
3 | Martin Pool <mbp@sourcefrog.net>
4 | Jelmer Vernooĳ <jelmer@jelmer.uk>
5 | 


--------------------------------------------------------------------------------
/CLAUDE.md:
--------------------------------------------------------------------------------
 1 | # CLAUDE.md
 2 | 
 3 | This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
 4 | 
 5 | ## Repository Overview
 6 | 
 7 | patiencediff is a Python implementation of the "patience diff" algorithm first described by Bram Cohen. The package contains both a Python implementation and a faster C implementation of the algorithm.
 8 | 
 9 | Similar to Python's `difflib`, this module provides:
10 | - A `unified_diff` function for generating unified diffs of text files
11 | - A `SequenceMatcher` that can be used on arbitrary lists
12 | 
13 | The package was originally extracted from the Bazaar codebase and is now maintained by the Breezy team.
14 | 
15 | ## Building and Installation
16 | 
17 | To build the package:
18 | 
19 | ```bash
20 | # Build the package (including C extension)
21 | pip3 install -e .
22 | 
23 | # Build without C extension
24 | CIBUILDWHEEL=1 pip install -e .
25 | ```
26 | 
27 | ## Running Tests
28 | 
29 | Tests use Python's built-in unittest framework:
30 | 
31 | ```bash
32 | # Run all tests
33 | python3 -m unittest discover patiencediff
34 | 
35 | # Run a specific test class
36 | python3 -m unittest patiencediff.test_patiencediff.TestPatienceDiffLib
37 | 
38 | # Run a specific test method
39 | python3 -m unittest patiencediff.test_patiencediff.TestPatienceDiffLib.test_unique_lcs
40 | ```
41 | 
42 | ## Code Linting
43 | 
44 | The project uses ruff for linting:
45 | 
46 | ```bash
47 | # Install development dependencies (includes ruff)
48 | pip install -e ".[dev]"
49 | 
50 | # Run linting
51 | ruff .
52 | ```
53 | 
54 | ## Using patiencediff
55 | 
56 | To use the patiencediff module from the command line:
57 | 
58 | ```bash
59 | python3 -m patiencediff file_a file_b
60 | 
61 | # Use standard difflib algorithm instead of patience
62 | python3 -m patiencediff --difflib file_a file_b
63 | ```
64 | 
65 | From Python:
66 | 
67 | ```python
68 | import patiencediff
69 | 
70 | # Generate unified diff
71 | diff = patiencediff.unified_diff(
72 |     ['a\n', 'b\n', 'c\n'],
73 |     ['a\n', 'x\n', 'c\n']
74 | )
75 | print(''.join(diff))
76 | 
77 | # Use SequenceMatcher for custom diff operations
78 | matcher = patiencediff.PatienceSequenceMatcher(None, a_list, b_list)
79 | ```
80 | 
81 | ## Code Architecture
82 | 
83 | The package consists of two implementations:
84 | 
85 | 1. **Python implementation** (`_patiencediff_py.py`): Pure Python implementation of the algorithm, more readable but slower.
86 | 
87 | 2. **C implementation** (`_patiencediff_c.c`): Faster implementation in C, requires a C compiler to build.
88 | 
89 | The entry point (`__init__.py`) tries to load the C implementation first, and falls back to the Python implementation if the C extension isn't available.
90 | 
91 | Key components:
92 | - `unique_lcs`: Finds the longest common subsequence between two sequences
93 | - `recurse_matches`: Recursively finds matches between two sequences
94 | - `PatienceSequenceMatcher`: Main implementation of the diff algorithm, similar interface to `difflib.SequenceMatcher`
95 | - `unified_diff`: Creates a unified diff from two sequences
96 | - `unified_diff_files`: Reads two files and returns a unified diff
97 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Contributor Covenant Code of Conduct
  3 | 
  4 | ## Our Pledge
  5 | 
  6 | We as members, contributors, and leaders pledge to make participation in our
  7 | community a harassment-free experience for everyone, regardless of age, body
  8 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  9 | identity and expression, level of experience, education, socioeconomic status,
 10 | nationality, personal appearance, race, religion, or sexual identity
 11 | and orientation.
 12 | 
 13 | We pledge to act and interact in ways that contribute to an open, welcoming,
 14 | diverse, inclusive, and healthy community.
 15 | 
 16 | ## Our Standards
 17 | 
 18 | Examples of behavior that contributes to a positive environment for our
 19 | community include:
 20 | 
 21 | * Demonstrating empathy and kindness toward other people
 22 | * Being respectful of differing opinions, viewpoints, and experiences
 23 | * Giving and gracefully accepting constructive feedback
 24 | * Accepting responsibility and apologizing to those affected by our mistakes,
 25 |   and learning from the experience
 26 | * Focusing on what is best not just for us as individuals, but for the
 27 |   overall community
 28 | 
 29 | Examples of unacceptable behavior include:
 30 | 
 31 | * The use of sexualized language or imagery, and sexual attention or
 32 |   advances of any kind
 33 | * Trolling, insulting or derogatory comments, and personal or political attacks
 34 | * Public or private harassment
 35 | * Publishing others' private information, such as a physical or email
 36 |   address, without their explicit permission
 37 | * Other conduct which could reasonably be considered inappropriate in a
 38 |   professional setting
 39 | 
 40 | ## Enforcement Responsibilities
 41 | 
 42 | Community leaders are responsible for clarifying and enforcing our standards of
 43 | acceptable behavior and will take appropriate and fair corrective action in
 44 | response to any behavior that they deem inappropriate, threatening, offensive,
 45 | or harmful.
 46 | 
 47 | Community leaders have the right and responsibility to remove, edit, or reject
 48 | comments, commits, code, wiki edits, issues, and other contributions that are
 49 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 50 | decisions when appropriate.
 51 | 
 52 | ## Scope
 53 | 
 54 | This Code of Conduct applies within all community spaces, and also applies when
 55 | an individual is officially representing the community in public spaces.
 56 | Examples of representing our community include using an official email address,
 57 | posting via an official social media account, or acting as an appointed
 58 | representative at an online or offline event.
 59 | 
 60 | ## Enforcement
 61 | 
 62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 63 | reported to the community leaders responsible for enforcement at
 64 | [INSERT CONTACT METHOD].
 65 | All complaints will be reviewed and investigated promptly and fairly.
 66 | 
 67 | All community leaders are obligated to respect the privacy and security of the
 68 | reporter of any incident.
 69 | 
 70 | ## Enforcement Guidelines
 71 | 
 72 | Community leaders will follow these Community Impact Guidelines in determining
 73 | the consequences for any action they deem in violation of this Code of Conduct:
 74 | 
 75 | ### 1. Correction
 76 | 
 77 | **Community Impact**: Use of inappropriate language or other behavior deemed
 78 | unprofessional or unwelcome in the community.
 79 | 
 80 | **Consequence**: A private, written warning from community leaders, providing
 81 | clarity around the nature of the violation and an explanation of why the
 82 | behavior was inappropriate. A public apology may be requested.
 83 | 
 84 | ### 2. Warning
 85 | 
 86 | **Community Impact**: A violation through a single incident or series
 87 | of actions.
 88 | 
 89 | **Consequence**: A warning with consequences for continued behavior. No
 90 | interaction with the people involved, including unsolicited interaction with
 91 | those enforcing the Code of Conduct, for a specified period of time. This
 92 | includes avoiding interactions in community spaces as well as external channels
 93 | like social media. Violating these terms may lead to a temporary or
 94 | permanent ban.
 95 | 
 96 | ### 3. Temporary Ban
 97 | 
 98 | **Community Impact**: A serious violation of community standards, including
 99 | sustained inappropriate behavior.
100 | 
101 | **Consequence**: A temporary ban from any sort of interaction or public
102 | communication with the community for a specified period of time. No public or
103 | private interaction with the people involved, including unsolicited interaction
104 | with those enforcing the Code of Conduct, is allowed during this period.
105 | Violating these terms may lead to a permanent ban.
106 | 
107 | ### 4. Permanent Ban
108 | 
109 | **Community Impact**: Demonstrating a pattern of violation of community
110 | standards, including sustained inappropriate behavior,  harassment of an
111 | individual, or aggression toward or disparagement of classes of individuals.
112 | 
113 | **Consequence**: A permanent ban from any sort of public interaction within
114 | the community.
115 | 
116 | ## Attribution
117 | 
118 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
119 | version 2.0, available at
120 | [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0].
121 | 
122 | Community Impact Guidelines were inspired by 
123 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
124 | 
125 | For answers to common questions about this code of conduct, see the FAQ at
126 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available 
127 | at [https://www.contributor-covenant.org/translations][translations].
128 | 
129 | [homepage]: https://www.contributor-covenant.org
130 | [v2.0]: https://www.contributor-covenant.org/version/2/0/code_of_conduct.html
131 | [Mozilla CoC]: https://github.com/mozilla/diversity
132 | [FAQ]: https://www.contributor-covenant.org/faq
133 | [translations]: https://www.contributor-covenant.org/translations
134 | 
135 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     <one line to give the program's name and a brief idea of what it does.>
294 |     Copyright (C) <year>  <name of author>
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   <signature of Ty Coon>, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | version = 4
  4 | 
  5 | [[package]]
  6 | name = "autocfg"
  7 | version = "1.4.0"
  8 | source = "registry+https://github.com/rust-lang/crates.io-index"
  9 | checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
 10 | 
 11 | [[package]]
 12 | name = "cfg-if"
 13 | version = "1.0.0"
 14 | source = "registry+https://github.com/rust-lang/crates.io-index"
 15 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 16 | 
 17 | [[package]]
 18 | name = "heck"
 19 | version = "0.5.0"
 20 | source = "registry+https://github.com/rust-lang/crates.io-index"
 21 | checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 22 | 
 23 | [[package]]
 24 | name = "indoc"
 25 | version = "2.0.6"
 26 | source = "registry+https://github.com/rust-lang/crates.io-index"
 27 | checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
 28 | 
 29 | [[package]]
 30 | name = "libc"
 31 | version = "0.2.172"
 32 | source = "registry+https://github.com/rust-lang/crates.io-index"
 33 | checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
 34 | 
 35 | [[package]]
 36 | name = "memoffset"
 37 | version = "0.9.1"
 38 | source = "registry+https://github.com/rust-lang/crates.io-index"
 39 | checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
 40 | dependencies = [
 41 |  "autocfg",
 42 | ]
 43 | 
 44 | [[package]]
 45 | name = "once_cell"
 46 | version = "1.21.3"
 47 | source = "registry+https://github.com/rust-lang/crates.io-index"
 48 | checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
 49 | 
 50 | [[package]]
 51 | name = "patiencediff"
 52 | version = "0.2.1"
 53 | source = "registry+https://github.com/rust-lang/crates.io-index"
 54 | checksum = "2c707262dd66fabcb1b2b79f2d65e2a1f8abb7e31005e882504c99680e009225"
 55 | 
 56 | [[package]]
 57 | name = "patiencediff-rs"
 58 | version = "0.2.15"
 59 | dependencies = [
 60 |  "patiencediff",
 61 |  "pyo3",
 62 | ]
 63 | 
 64 | [[package]]
 65 | name = "portable-atomic"
 66 | version = "1.11.0"
 67 | source = "registry+https://github.com/rust-lang/crates.io-index"
 68 | checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e"
 69 | 
 70 | [[package]]
 71 | name = "proc-macro2"
 72 | version = "1.0.95"
 73 | source = "registry+https://github.com/rust-lang/crates.io-index"
 74 | checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
 75 | dependencies = [
 76 |  "unicode-ident",
 77 | ]
 78 | 
 79 | [[package]]
 80 | name = "pyo3"
 81 | version = "0.24.2"
 82 | source = "registry+https://github.com/rust-lang/crates.io-index"
 83 | checksum = "e5203598f366b11a02b13aa20cab591229ff0a89fd121a308a5df751d5fc9219"
 84 | dependencies = [
 85 |  "cfg-if",
 86 |  "indoc",
 87 |  "libc",
 88 |  "memoffset",
 89 |  "once_cell",
 90 |  "portable-atomic",
 91 |  "pyo3-build-config",
 92 |  "pyo3-ffi",
 93 |  "pyo3-macros",
 94 |  "unindent",
 95 | ]
 96 | 
 97 | [[package]]
 98 | name = "pyo3-build-config"
 99 | version = "0.24.2"
100 | source = "registry+https://github.com/rust-lang/crates.io-index"
101 | checksum = "99636d423fa2ca130fa5acde3059308006d46f98caac629418e53f7ebb1e9999"
102 | dependencies = [
103 |  "once_cell",
104 |  "target-lexicon",
105 | ]
106 | 
107 | [[package]]
108 | name = "pyo3-ffi"
109 | version = "0.24.2"
110 | source = "registry+https://github.com/rust-lang/crates.io-index"
111 | checksum = "78f9cf92ba9c409279bc3305b5409d90db2d2c22392d443a87df3a1adad59e33"
112 | dependencies = [
113 |  "libc",
114 |  "pyo3-build-config",
115 | ]
116 | 
117 | [[package]]
118 | name = "pyo3-macros"
119 | version = "0.24.2"
120 | source = "registry+https://github.com/rust-lang/crates.io-index"
121 | checksum = "0b999cb1a6ce21f9a6b147dcf1be9ffedf02e0043aec74dc390f3007047cecd9"
122 | dependencies = [
123 |  "proc-macro2",
124 |  "pyo3-macros-backend",
125 |  "quote",
126 |  "syn",
127 | ]
128 | 
129 | [[package]]
130 | name = "pyo3-macros-backend"
131 | version = "0.24.2"
132 | source = "registry+https://github.com/rust-lang/crates.io-index"
133 | checksum = "822ece1c7e1012745607d5cf0bcb2874769f0f7cb34c4cde03b9358eb9ef911a"
134 | dependencies = [
135 |  "heck",
136 |  "proc-macro2",
137 |  "pyo3-build-config",
138 |  "quote",
139 |  "syn",
140 | ]
141 | 
142 | [[package]]
143 | name = "quote"
144 | version = "1.0.40"
145 | source = "registry+https://github.com/rust-lang/crates.io-index"
146 | checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
147 | dependencies = [
148 |  "proc-macro2",
149 | ]
150 | 
151 | [[package]]
152 | name = "syn"
153 | version = "2.0.101"
154 | source = "registry+https://github.com/rust-lang/crates.io-index"
155 | checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf"
156 | dependencies = [
157 |  "proc-macro2",
158 |  "quote",
159 |  "unicode-ident",
160 | ]
161 | 
162 | [[package]]
163 | name = "target-lexicon"
164 | version = "0.13.2"
165 | source = "registry+https://github.com/rust-lang/crates.io-index"
166 | checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a"
167 | 
168 | [[package]]
169 | name = "unicode-ident"
170 | version = "1.0.18"
171 | source = "registry+https://github.com/rust-lang/crates.io-index"
172 | checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
173 | 
174 | [[package]]
175 | name = "unindent"
176 | version = "0.2.4"
177 | source = "registry+https://github.com/rust-lang/crates.io-index"
178 | checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
179 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "patiencediff-rs"
 3 | version = "0.2.15"
 4 | edition = "2021"
 5 | authors = ["Breezy Developers <team@breezy-vcs.org>"]
 6 | description = "Python bindings for patiencediff algorithm"
 7 | license = "GPL-2.0-or-later"
 8 | repository = "https://github.com/breezy-team/patiencediff"
 9 | 
10 | [lib]
11 | name = "_patiencediff_rs"
12 | crate-type = ["cdylib"]
13 | 
14 | [dependencies]
15 | patiencediff = { version = "0.2.1", default-features = false }
16 | pyo3 = { version = "0.24.0", features = ["extension-module"] }


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include AUTHORS
2 | include README.rst
3 | include COPYING
4 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | patiencediff
 2 | ############
 3 | 
 4 | This package contains the implementation of the ``patiencediff`` algorithm, as
 5 | `first described <https://bramcohen.livejournal.com/73318.html>`_ by Bram Cohen.
 6 | 
 7 | Like Python's ``difflib``, this module provides both a convenience ``unified_diff``
 8 | function for the generation of unified diffs of text files
 9 | as well as a SequenceMatcher that can be used on arbitrary lists.
10 | 
11 | Patiencediff provides a good balance of performance, nice output for humans,
12 | and implementation simplicity.
13 | 
14 | The code in this package was extracted from the `Bazaar <https://www.bazaar-vcs.org/>`_
15 | code base.
16 | 
17 | The package comes with two implementations:
18 | 
19 | * A Python implementation (_patiencediff_py.py); this implementation only
20 |   requires a Python interpreter and is the more readable version of the two
21 | 
22 | * A C implementation implementation (_patiencediff_c.c); this implementation
23 |   is faster, but requires a C compiler and is less readable
24 | 
25 | Usage
26 | =====
27 | 
28 | To invoke patiencediff from the command-line::
29 | 
30 |     python -m patiencediff file_a file_b
31 | 
32 | Or from Python::
33 | 
34 |     >>> import patiencediff
35 |     >>> print(''.join(patiencediff.unified_diff(
36 |     ...      ['a\n', 'b\n', 'b\n', 'c\n'],
37 |     ...      ['a\n', 'c\n', 'b\n'])))
38 |     ---
39 |     +++
40 |     @@ -1,4 +1,3 @@
41 |      a
42 |     +c
43 |      b
44 |     -b
45 |     -c
46 | 


--------------------------------------------------------------------------------
/build.cmd:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | :: To build extensions for 64 bit Python 3, we need to configure environment
 3 | :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of:
 4 | :: MS Windows SDK for Windows 7 and .NET Framework 4
 5 | ::
 6 | :: More details at:
 7 | :: https://github.com/cython/cython/wiki/CythonExtensionsOnWindows
 8 | 
 9 | IF "%DISTUTILS_USE_SDK%"=="1" (
10 |     ECHO Configuring environment to build with MSVC on a 64bit architecture
11 |     ECHO Using Windows SDK 7.1
12 |     "C:\Program Files\Microsoft SDKs\Windows\v7.1\Setup\WindowsSdkVer.exe" -q -version:v7.1
13 |     CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 /release
14 |     SET MSSdk=1
15 |     REM Need the following to allow tox to see the SDK compiler
16 |     SET TOX_TESTENV_PASSENV=DISTUTILS_USE_SDK MSSdk INCLUDE LIB
17 | ) ELSE (
18 |     ECHO Using default MSVC build environment
19 | )
20 | 
21 | CALL %*
22 | 


--------------------------------------------------------------------------------
/disperse.toml:
--------------------------------------------------------------------------------
 1 | name = "patiencediff"
 2 | tag-name = "v$VERSION"
 3 | verify-command = "python3 -m unittest patiencediff.test_patiencediff"
 4 | tarball-location = []
 5 | release-timeout = 5
 6 | 
 7 | [[update_version]]
 8 | path = "patiencediff/__init__.py"
 9 | match = "^__version__ = ((.*))$"
10 | new-line = "__version__ = $TUPLED_VERSION"
11 | 


--------------------------------------------------------------------------------
/patiencediff/__init__.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2005, 2006, 2007 Canonical Ltd
  2 | # Copyright (C) 2021-2023 Jelmer Vernooĳ <jelmer@jelmer.uk>
  3 | #
  4 | # This program is free software; you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation; either version 2 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program; if not, write to the Free Software
 16 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 17 | 
 18 | import difflib
 19 | import os
 20 | import sys
 21 | import time
 22 | from typing import Type
 23 | 
 24 | __all__ = [
 25 |     "PatienceSequenceMatcher",
 26 |     "unified_diff",
 27 |     "unified_diff_files",
 28 |     "recurse_matches",
 29 |     "unique_lcs",
 30 | ]
 31 | 
 32 | __version__ = (0, 2, 15)
 33 | 
 34 | 
 35 | # This is a version of unified_diff which only adds a factory parameter
 36 | # so that you can override the default SequenceMatcher
 37 | # this has been submitted as a patch to python
 38 | def unified_diff(
 39 |     a,
 40 |     b,
 41 |     fromfile="",
 42 |     tofile="",
 43 |     fromfiledate="",
 44 |     tofiledate="",
 45 |     n=3,
 46 |     lineterm="\n",
 47 |     sequencematcher=None,
 48 | ):
 49 |     r"""Compare two sequences of lines; generate the delta as a unified diff.
 50 | 
 51 |     Unified diffs are a compact way of showing line changes and a few
 52 |     lines of context.  The number of context lines is set by 'n' which
 53 |     defaults to three.
 54 | 
 55 |     By default, the diff control lines (those with ---, +++, or @@) are
 56 |     created with a trailing newline.  This is helpful so that inputs
 57 |     created from file.readlines() result in diffs that are suitable for
 58 |     file.writelines() since both the inputs and outputs have trailing
 59 |     newlines.
 60 | 
 61 |     For inputs that do not have trailing newlines, set the lineterm
 62 |     argument to "" so that the output will be uniformly newline free.
 63 | 
 64 |     The unidiff format normally has a header for filenames and modification
 65 |     times.  Any or all of these may be specified using strings for
 66 |     'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.  The modification
 67 |     times are normally expressed in the format returned by time.ctime().
 68 | 
 69 |     Example:
 70 |     >>> for line in unified_diff('one two three four'.split(),
 71 |     ...             'zero one tree four'.split(), 'Original', 'Current',
 72 |     ...             'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
 73 |     ...             lineterm=''):
 74 |     ...     print line
 75 |     --- Original Sat Jan 26 23:30:50 1991
 76 |     +++ Current Fri Jun 06 10:20:52 2003
 77 |     @@ -1,4 +1,4 @@
 78 |     +zero
 79 |      one
 80 |     -two
 81 |     -three
 82 |     +tree
 83 |      four
 84 |     """
 85 |     if sequencematcher is None:
 86 |         sequencematcher = difflib.SequenceMatcher
 87 | 
 88 |     if fromfiledate:
 89 |         fromfiledate = "\t" + str(fromfiledate)
 90 |     if tofiledate:
 91 |         tofiledate = "\t" + str(tofiledate)
 92 | 
 93 |     started = False
 94 |     for group in sequencematcher(None, a, b).get_grouped_opcodes(n):
 95 |         if not started:
 96 |             yield f"--- {fromfile}{fromfiledate}{lineterm}"
 97 |             yield f"+++ {tofile}{tofiledate}{lineterm}"
 98 |             started = True
 99 |         i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
100 |         yield f"@@ -{i1 + 1},{i2 - i1} +{j1 + 1},{j2 - j1} @@{lineterm}"
101 |         for tag, i1, i2, j1, j2 in group:
102 |             if tag == "equal":
103 |                 for line in a[i1:i2]:
104 |                     yield " " + line
105 |                 continue
106 |             if tag == "replace" or tag == "delete":
107 |                 for line in a[i1:i2]:
108 |                     yield "-" + line
109 |             if tag == "replace" or tag == "insert":
110 |                 for line in b[j1:j2]:
111 |                     yield "+" + line
112 | 
113 | 
114 | def unified_diff_files(a, b, sequencematcher=None):
115 |     """Generate the diff for two files."""
116 |     # Should this actually be an error?
117 |     if a == b:
118 |         return []
119 |     if a == "-":
120 |         lines_a = sys.stdin.readlines()
121 |         time_a = time.time()
122 |     else:
123 |         with open(a) as f:
124 |             lines_a = f.readlines()
125 |         time_a = os.stat(a).st_mtime  # noqa: F841
126 | 
127 |     if b == "-":
128 |         lines_b = sys.stdin.readlines()
129 |         time_b = time.time()
130 |     else:
131 |         with open(b) as f:
132 |             lines_b = f.readlines()
133 |         time_b = os.stat(b).st_mtime  # noqa: F841
134 | 
135 |     # TODO: Include fromfiledate and tofiledate
136 |     return unified_diff(
137 |         lines_a, lines_b, fromfile=a, tofile=b, sequencematcher=sequencematcher
138 |     )
139 | 
140 | 
141 | PatienceSequenceMatcher: Type[difflib.SequenceMatcher]
142 | 
143 | 
144 | # Try to import the Rust implementation first
145 | try:
146 |     from ._patiencediff_rs import (
147 |         PatienceSequenceMatcher_rs as PatienceSequenceMatcher,
148 |     )
149 |     from ._patiencediff_rs import recurse_matches_rs as recurse_matches
150 |     from ._patiencediff_rs import unique_lcs_rs as unique_lcs
151 | except ImportError:
152 |     # Fall back to the Python implementation if Rust is not available
153 |     from ._patiencediff_py import (
154 |         PatienceSequenceMatcher_py as PatienceSequenceMatcher,
155 |     )
156 |     from ._patiencediff_py import (
157 |         recurse_matches_py as recurse_matches,
158 |     )
159 |     from ._patiencediff_py import unique_lcs_py as unique_lcs
160 | 


--------------------------------------------------------------------------------
/patiencediff/__main__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2005, 2006, 2007 Canonical Ltd
 2 | #
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 | 
17 | import difflib
18 | import sys
19 | 
20 | from . import PatienceSequenceMatcher, unified_diff_files
21 | 
22 | 
23 | def main(argv=None):
24 |     import optparse
25 | 
26 |     p = optparse.OptionParser(
27 |         usage="%prog [options] file_a file_b"
28 |         '\nFiles can be "-" to read from stdin'
29 |     )
30 |     p.add_option(
31 |         "--patience",
32 |         dest="matcher",
33 |         action="store_const",
34 |         const="patience",
35 |         default="patience",
36 |         help="Use the patience difference algorithm",
37 |     )
38 |     p.add_option(
39 |         "--difflib",
40 |         dest="matcher",
41 |         action="store_const",
42 |         const="difflib",
43 |         default="patience",
44 |         help="Use python's difflib algorithm",
45 |     )
46 | 
47 |     algorithms = {
48 |         "patience": PatienceSequenceMatcher,
49 |         "difflib": difflib.SequenceMatcher,
50 |     }
51 | 
52 |     (opts, args) = p.parse_args(argv)
53 |     matcher = algorithms[opts.matcher]
54 | 
55 |     if len(args) != 2:
56 |         print("You must supply 2 filenames to diff")
57 |         return -1
58 | 
59 |     for line in unified_diff_files(args[0], args[1], sequencematcher=matcher):
60 |         sys.stdout.write(line)
61 | 
62 | 
63 | sys.exit(main(sys.argv[1:]))
64 | 


--------------------------------------------------------------------------------
/patiencediff/_patiencediff_c.c:
--------------------------------------------------------------------------------
   1 | /*
   2 |  Copyright (C) 2007, 2010 Canonical Ltd
   3 |  Copyright (C) 2021-2023 Jelmer Vernooĳ <jelmer@jelmer.uk>
   4 | 
   5 |  This program is free software; you can redistribute it and/or modify
   6 |  it under the terms of the GNU General Public License as published by
   7 |  the Free Software Foundation; either version 2 of the License, or
   8 |  (at your option) any later version.
   9 | 
  10 |  This program is distributed in the hope that it will be useful,
  11 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 |  GNU General Public License for more details.
  14 | 
  15 |  You should have received a copy of the GNU General Public License
  16 |  along with this program; if not, write to the Free Software
  17 |  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  18 | 
  19 |  Function equate_lines based on bdiff.c from Mercurial.
  20 |    Copyright (C) 2005, 2006 Matt Mackall <mpm@selenic.com>
  21 | 
  22 |  Functions unique_lcs/recurse_matches based on _patiencediff_py.py.
  23 |    Copyright (C) 2005 Bram Cohen, Copyright (C) 2005, 2006 Canonical Ltd
  24 | */
  25 | 
  26 | 
  27 | #include <stdlib.h>
  28 | #include <string.h>
  29 | #include <Python.h>
  30 | 
  31 | 
  32 | #if defined(__GNUC__)
  33 | #   define inline __inline__
  34 | #elif defined(_MSC_VER)
  35 | #   define inline __inline
  36 | #else
  37 | #   define inline
  38 | #endif
  39 | 
  40 | 
  41 | #define MIN(a, b) (((a) > (b)) ? (b) : (a))
  42 | #define MAX(a, b) (((a) > (b)) ? (a) : (b))
  43 | 
  44 | 
  45 | #define SENTINEL -1
  46 | 
  47 | 
  48 | /* malloc returns NULL on some platforms if you try to allocate nothing,
  49 |  * causing <https://bugs.launchpad.net/bzr/+bug/511267> and
  50 |  * <https://bugs.launchpad.net/bzr/+bug/331095>.  On glibc it passes, but
  51 |  * let's make it fail to aid testing. */
  52 | #define guarded_malloc(x) ( ((x) > 0) ? malloc(x) : NULL )
  53 | 
  54 | enum {
  55 |     OP_EQUAL = 0,
  56 |     OP_INSERT,
  57 |     OP_DELETE,
  58 |     OP_REPLACE
  59 | };
  60 | 
  61 | 
  62 | /* values from this array need to correspond to the order of the enum above */
  63 | static char *opcode_names[] = {
  64 |     "equal",
  65 |     "insert",
  66 |     "delete",
  67 |     "replace",
  68 | };
  69 | 
  70 | 
  71 | struct line {
  72 |     long hash;         /* hash code of the string/object */
  73 |     Py_ssize_t next;   /* next line from the same equivalence class */
  74 |     Py_ssize_t equiv;  /* equivalence class */
  75 |     PyObject *data;
  76 | };
  77 | 
  78 | 
  79 | struct bucket {
  80 |     Py_ssize_t a_head;  /* first item in `a` from this equivalence class */
  81 |     Py_ssize_t a_count;
  82 |     Py_ssize_t b_head;  /* first item in `b` from this equivalence class */
  83 |     Py_ssize_t b_count;
  84 |     Py_ssize_t a_pos;
  85 |     Py_ssize_t b_pos;
  86 | };
  87 | 
  88 | 
  89 | struct hashtable {
  90 |     Py_ssize_t last_a_pos;
  91 |     Py_ssize_t last_b_pos;
  92 |     Py_ssize_t size;
  93 |     struct bucket *table;
  94 | };
  95 | 
  96 | struct matching_line {
  97 |     Py_ssize_t a;     /* index of the line in `a` */
  98 |     Py_ssize_t b;     /* index of the line in `b` */
  99 | };
 100 | 
 101 | 
 102 | struct matching_block {
 103 |     Py_ssize_t a;     /* index of the first line in `a` */
 104 |     Py_ssize_t b;     /* index of the first line in `b` */
 105 |     Py_ssize_t len;   /* length of the block */
 106 | };
 107 | 
 108 | 
 109 | struct matching_blocks {
 110 |     struct matching_block *matches;
 111 |     Py_ssize_t count;
 112 | };
 113 | 
 114 | 
 115 | struct opcode {
 116 |     int tag;
 117 |     Py_ssize_t i1;
 118 |     Py_ssize_t i2;
 119 |     Py_ssize_t j1;
 120 |     Py_ssize_t j2;
 121 | };
 122 | 
 123 | 
 124 | typedef struct {
 125 |     PyObject_HEAD
 126 |     Py_ssize_t asize;
 127 |     Py_ssize_t bsize;
 128 |     struct line *a;
 129 |     struct line *b;
 130 |     struct hashtable hashtable;
 131 |     Py_ssize_t *backpointers;
 132 | } PatienceSequenceMatcher;
 133 | 
 134 | 
 135 | static inline Py_ssize_t
 136 | bisect_left(Py_ssize_t *list, Py_ssize_t item, Py_ssize_t lo, Py_ssize_t hi)
 137 | {
 138 |     while (lo < hi) {
 139 |         Py_ssize_t mid = lo / 2 + hi / 2 + (lo % 2 + hi % 2) / 2;
 140 |         if (list[mid] < item)
 141 |             lo = mid + 1;
 142 |         else
 143 |             hi = mid;
 144 |     }
 145 |     return lo;
 146 | }
 147 | 
 148 | 
 149 | static inline int
 150 | compare_lines(struct line *a, struct line *b)
 151 | {
 152 |     return ((a->hash != b->hash)
 153 |             || PyObject_RichCompareBool(a->data, b->data, Py_EQ) == 0);
 154 | }
 155 | 
 156 | 
 157 | static inline int
 158 | find_equivalence_class(struct bucket *hashtable, Py_ssize_t hsize,
 159 |                        struct line *lines, struct line *ref_lines,
 160 |                        Py_ssize_t i)
 161 | {
 162 |     Py_ssize_t j;
 163 |     for (j = lines[i].hash & hsize; hashtable[j].b_head != SENTINEL; j = (j + 1) & hsize) {
 164 |         if (!compare_lines(lines + i, ref_lines + hashtable[j].b_head)) {
 165 |             break;
 166 |         }
 167 |     }
 168 |     return j;
 169 | }
 170 | 
 171 | 
 172 | static int
 173 | equate_lines(struct hashtable *result,
 174 |              struct line *lines_a, struct line *lines_b,
 175 |              Py_ssize_t asize, Py_ssize_t bsize)
 176 | {
 177 |     Py_ssize_t i, j, hsize;
 178 |     struct bucket *hashtable;
 179 | 
 180 |     /* check for overflow, we need the table to be at least bsize+1 */
 181 |     if (bsize == PY_SSIZE_T_MAX) {
 182 |         PyErr_SetNone(PyExc_OverflowError);
 183 |         return 0;
 184 |     }
 185 | 
 186 |     /* build a hash table of the next highest power of 2 */
 187 |     hsize = 1;
 188 |     while (hsize < bsize + 1)
 189 |         hsize *= 2;
 190 | 
 191 |     /* can't be 0 */
 192 |     hashtable = (struct bucket *) guarded_malloc(sizeof(struct bucket) * hsize);
 193 |     if (hashtable == NULL) {
 194 |         PyErr_NoMemory();
 195 |         return 0;
 196 |     }
 197 | 
 198 |     /* initialise the hashtable */
 199 |     for (i = 0; i < hsize; i++) {
 200 |         hashtable[i].a_count = 0;
 201 |         hashtable[i].b_count = 0;
 202 |         hashtable[i].a_head = SENTINEL;
 203 |         hashtable[i].b_head = SENTINEL;
 204 |     }
 205 |     hsize--;
 206 | 
 207 |     /* add lines from lines_b to the hash table chains. iterating
 208 |        backwards so the matching lines are sorted to the linked list
 209 |        by the line number (because we are adding new lines to the
 210 |        head of the list) */
 211 |     for (i = bsize - 1; i >= 0; i--) {
 212 |         /* find the first hashtable entry, which is either empty or contains
 213 |            the same line as lines_b[i] */
 214 |         j = find_equivalence_class(hashtable, hsize, lines_b, lines_b, i);
 215 | 
 216 |         /* set the equivalence class */
 217 |         lines_b[i].equiv = j;
 218 | 
 219 |         /* add to the head of the equivalence class */
 220 |         lines_b[i].next = hashtable[j].b_head;
 221 |         hashtable[j].b_head = i;
 222 |         hashtable[j].b_count++;
 223 |     }
 224 | 
 225 |     /* match items from lines_a to their equivalence class in lines_b.
 226 |        again, iterating backwards for the right order of the linked lists */
 227 |     for (i = asize - 1; i >= 0; i--) {
 228 |         /* find the first hash entry, which is either empty or contains
 229 |            the same line as lines_a[i] */
 230 |         j = find_equivalence_class(hashtable, hsize, lines_a, lines_b, i);
 231 | 
 232 |         /* set the equivalence class, even if we are not interested in this
 233 |            line, because the values are not pre-filled */
 234 |         lines_a[i].equiv = j;
 235 | 
 236 |         /* we are not interested in lines which are not also in lines_b */
 237 |         if (hashtable[j].b_head == SENTINEL)
 238 |             continue;
 239 | 
 240 |         /* add to the head of the equivalence class */
 241 |         lines_a[i].next = hashtable[j].a_head;
 242 |         hashtable[j].a_head = i;
 243 |         hashtable[j].a_count++;
 244 |     }
 245 | 
 246 |     result->last_a_pos = -1;
 247 |     result->last_b_pos = -1;
 248 |     result->size = hsize + 1;
 249 |     result->table = hashtable;
 250 | 
 251 |     return 1;
 252 | }
 253 | 
 254 | 
 255 | 
 256 | /* Finds longest common subsequence of unique lines in a[alo:ahi] and
 257 |    b[blo:bhi].
 258 |    Parameter backpointers must have allocated memory for at least
 259 |    4 * (bhi - blo) ints. */
 260 | Py_ssize_t
 261 | unique_lcs(struct matching_line *answer,
 262 |            struct hashtable *hashtable, Py_ssize_t *backpointers,
 263 |            struct line *lines_a, struct line *lines_b,
 264 |            Py_ssize_t alo, Py_ssize_t blo, Py_ssize_t ahi, Py_ssize_t bhi)
 265 | {
 266 |     Py_ssize_t i, k, equiv, apos, bpos, norm_apos, norm_bpos, bsize, stacksize;
 267 |     Py_ssize_t *stacks, *lasts, *btoa;
 268 |     struct bucket *h;
 269 | 
 270 |     k = 0;
 271 |     stacksize = 0;
 272 |     bsize = bhi - blo;
 273 |     h = hashtable->table;
 274 | 
 275 |     /* "unpack" the allocated memory */
 276 |     stacks = backpointers + bsize;
 277 |     lasts = stacks + bsize;
 278 |     btoa = lasts + bsize;
 279 | 
 280 |     /* initialise the backpointers */
 281 |     for (i = 0; i < bsize; i++)
 282 |         backpointers[i] = SENTINEL;
 283 | 
 284 |     if (hashtable->last_a_pos == -1 || hashtable->last_a_pos > alo)
 285 |         for (i = 0; i < hashtable->size; i++)
 286 |             h[i].a_pos = h[i].a_head;
 287 |     hashtable->last_a_pos = alo;
 288 | 
 289 |     if (hashtable->last_b_pos == -1 || hashtable->last_b_pos > blo)
 290 |         for (i = 0; i < hashtable->size; i++)
 291 |             h[i].b_pos = h[i].b_head;
 292 |     hashtable->last_b_pos = blo;
 293 | 
 294 |     for (bpos = blo; bpos < bhi; bpos++) {
 295 |         equiv = lines_b[bpos].equiv;
 296 | 
 297 |         /* no lines in a or b  */
 298 |         if (h[equiv].a_count == 0 || h[equiv].b_count == 0)
 299 |             continue;
 300 | 
 301 |         /* find an unique line in lines_a that matches lines_b[bpos]
 302 |            if we find more than one line within the range alo:ahi,
 303 |            jump to the next line from lines_b immediately */
 304 |         apos = SENTINEL;
 305 |         /* loop through all lines in the linked list */
 306 |         for (i = h[equiv].a_pos; i != SENTINEL; i = lines_a[i].next) {
 307 |             /* the index is lower than alo, continue to the next line */
 308 |             if (i < alo) {
 309 |                 h[equiv].a_pos = i;
 310 |                 continue;
 311 |             }
 312 |             /* the index is higher than ahi, stop searching */
 313 |             if (i >= ahi)
 314 |                 break;
 315 |             /* if the line is within our range, check if it's a duplicate */
 316 |             if (apos != SENTINEL)
 317 |                 goto nextb;
 318 |             /* save index to the line */
 319 |             apos = i;
 320 |         }
 321 |         /* this line has no equivalent in lines_a[alo:ahi] */
 322 |         if (apos == SENTINEL)
 323 |             goto nextb;
 324 | 
 325 |         /* check for duplicates of this line in lines_b[blo:bhi] */
 326 |         /* loop through all lines in the linked list */
 327 |         for (i = h[equiv].b_pos; i != SENTINEL; i = lines_b[i].next) {
 328 |             /* the index is lower than blo, continue to the next line */
 329 |             if (i < blo) {
 330 |                 h[equiv].b_pos = i;
 331 |                 continue;
 332 |             }
 333 |             /* the index is higher than bhi, stop searching */
 334 |             if (i >= bhi)
 335 |                 break;
 336 |             /* if this isn't the line with started with and it's within
 337 |                our range, it's a duplicate */
 338 |             if (i != bpos)
 339 |                 goto nextb;
 340 |         }
 341 | 
 342 |         /* use normalised indexes ([0,ahi-alo) instead of [alo,ahi))
 343 |            for the patience sorting algorithm */
 344 |         norm_bpos = bpos - blo;
 345 |         norm_apos = apos - alo;
 346 |         btoa[norm_bpos] = norm_apos;
 347 | 
 348 |         /*
 349 |         Ok, how does this work...
 350 | 
 351 |         We have a list of matching lines from two lists, a and b. These
 352 |         matches are stored in variable `btoa`. As we are iterating over this
 353 |         table by bpos, the lines from b already form an increasing sequence.
 354 |         We need to "sort" also the lines from a using the patience sorting
 355 |         algorithm, ignoring the lines which would need to be swapped.
 356 | 
 357 |           http://en.wikipedia.org/wiki/Patience_sorting
 358 | 
 359 |         For each pair of lines, we need to place the line from a on either
 360 |         an existing pile that has higher value on the top or create a new
 361 |         pile. Variable `stacks` represents the tops of these piles and in
 362 |         variable `lasts` we store the lines from b, that correspond to the
 363 |         lines from a in `stacks`.
 364 | 
 365 |         Whenever we place a new line on top of a pile, we store a
 366 |         backpointer to the line (b) from top of the previous pile. This means
 367 |         that after the loop, variable `backpointers` will contain an index
 368 |         to the previous matching lines that forms an increasing sequence
 369 |         (over both indexes a and b) with the current matching lines. If
 370 |         either index a or b of the previous matching lines would be higher
 371 |         than indexes of the current one or if the indexes of the current
 372 |         one are 0, it will contain SENTINEL.
 373 | 
 374 |         To construct the LCS, we will just need to follow these backpointers
 375 |         from the top of the last pile and stop when we reach SENTINEL.
 376 |         */
 377 | 
 378 |         /* as an optimization, check if the next line comes at the end,
 379 |            because it usually does */
 380 |         if (stacksize && stacks[stacksize - 1] < norm_apos)
 381 |             k = stacksize;
 382 |         /* as an optimization, check if the next line comes right after
 383 |            the previous line, because usually it does */
 384 |         else if (stacksize && (stacks[k] < norm_apos) &&
 385 |                  (k == stacksize - 1 || stacks[k + 1] > norm_apos))
 386 |             k += 1;
 387 |         else
 388 |             k = bisect_left(stacks, norm_apos, 0, stacksize);
 389 | 
 390 |         if (k > 0)
 391 |             backpointers[norm_bpos] = lasts[k - 1];
 392 | 
 393 |         if (k < stacksize) {
 394 |             stacks[k] = norm_apos;
 395 |             lasts[k] = norm_bpos;
 396 |         }
 397 |         else {
 398 |             stacks[stacksize] = norm_apos;
 399 |             lasts[stacksize] = norm_bpos;
 400 |             stacksize += 1;
 401 |         }
 402 | 
 403 | 
 404 | nextb:
 405 |         ;
 406 |     }
 407 | 
 408 |     if (stacksize == 0)
 409 |         return 0;
 410 | 
 411 |     /* backtrace the structures to find the LCS */
 412 |     i = 0;
 413 |     k = lasts[stacksize - 1];
 414 |     while (k != SENTINEL) {
 415 |         answer[i].a = btoa[k];
 416 |         answer[i].b = k;
 417 |         k = backpointers[k];
 418 |         i++;
 419 |     }
 420 | 
 421 |     return i;
 422 | }
 423 | 
 424 | /* Adds a new line to the list of matching blocks, either extending the
 425 |    current block or adding a new one. */
 426 | static inline void
 427 | add_matching_line(struct matching_blocks *answer, Py_ssize_t a, Py_ssize_t b)
 428 | {
 429 |     Py_ssize_t last_index = answer->count - 1;
 430 |     if ((last_index >= 0) &&
 431 |         (a == answer->matches[last_index].a +
 432 |               answer->matches[last_index].len) &&
 433 |         (b == answer->matches[last_index].b +
 434 |               answer->matches[last_index].len)) {
 435 |         /* enlarge the last block */
 436 |         answer->matches[last_index].len++;
 437 |     }
 438 |     else {
 439 |         /* create a new block */
 440 |         last_index++;
 441 |         answer->matches[last_index].a = a;
 442 |         answer->matches[last_index].b = b;
 443 |         answer->matches[last_index].len = 1;
 444 |         answer->count++;
 445 |     }
 446 | }
 447 | 
 448 | 
 449 | static int
 450 | recurse_matches(struct matching_blocks *answer, struct hashtable *hashtable,
 451 |                 Py_ssize_t *backpointers, struct line *a, struct line *b,
 452 |                 Py_ssize_t alo, Py_ssize_t blo, Py_ssize_t ahi, Py_ssize_t bhi,
 453 |                 int maxrecursion)
 454 | {
 455 |     int res;
 456 |     Py_ssize_t new, last_a_pos, last_b_pos, lcs_size, nahi, nbhi, i, apos, bpos;
 457 |     struct matching_line *lcs;
 458 | 
 459 |     if (maxrecursion < 0)
 460 |         return 1;
 461 | 
 462 |     if (alo == ahi || blo == bhi)
 463 |         return 1;
 464 | 
 465 |     new = 0;
 466 |     last_a_pos = alo - 1;
 467 |     last_b_pos = blo - 1;
 468 | 
 469 |     lcs = (struct matching_line *)guarded_malloc(sizeof(struct matching_line) * (bhi - blo));
 470 |     if (lcs == NULL)
 471 |         return 0;
 472 | 
 473 |     lcs_size = unique_lcs(lcs, hashtable, backpointers, a, b, alo, blo, ahi, bhi);
 474 | 
 475 |     /* recurse between lines which are unique in each file and match */
 476 |     for (i = lcs_size - 1; i >= 0; i--) {
 477 |         apos = alo + lcs[i].a;
 478 |         bpos = blo + lcs[i].b;
 479 |         if (last_a_pos + 1 != apos || last_b_pos + 1 != bpos) {
 480 |             res = recurse_matches(answer, hashtable,
 481 |                                   backpointers, a, b,
 482 |                                   last_a_pos + 1, last_b_pos + 1,
 483 |                                   apos, bpos, maxrecursion - 1);
 484 |             if (!res)
 485 |                 goto error;
 486 |         }
 487 |         last_a_pos = apos;
 488 |         last_b_pos = bpos;
 489 |         add_matching_line(answer, apos, bpos);
 490 |         new = 1;
 491 |     }
 492 | 
 493 |     free(lcs);
 494 |     lcs = NULL;
 495 | 
 496 |     /* find matches between the last match and the end */
 497 |     if (new > 0) {
 498 |         res = recurse_matches(answer, hashtable,
 499 |                               backpointers, a, b,
 500 |                               last_a_pos + 1, last_b_pos + 1,
 501 |                               ahi, bhi, maxrecursion - 1);
 502 |         if (!res)
 503 |             goto error;
 504 |     }
 505 | 
 506 | 
 507 |     /* find matching lines at the very beginning */
 508 |     else if (a[alo].equiv == b[blo].equiv) {
 509 |         while (alo < ahi && blo < bhi && a[alo].equiv == b[blo].equiv)
 510 |             add_matching_line(answer, alo++, blo++);
 511 |         res = recurse_matches(answer, hashtable,
 512 |                               backpointers, a, b,
 513 |                               alo, blo, ahi, bhi, maxrecursion - 1);
 514 |         if (!res)
 515 |             goto error;
 516 |     }
 517 | 
 518 |     /* find matching lines at the very end */
 519 |     else if (a[ahi - 1].equiv == b[bhi - 1].equiv) {
 520 |         nahi = ahi - 1;
 521 |         nbhi = bhi - 1;
 522 |         while (nahi > alo && nbhi > blo && a[nahi - 1].equiv == b[nbhi - 1].equiv) {
 523 |             nahi--;
 524 |             nbhi--;
 525 |         }
 526 |         res = recurse_matches(answer, hashtable,
 527 |                               backpointers, a, b,
 528 |                               last_a_pos + 1, last_b_pos + 1,
 529 |                               nahi, nbhi, maxrecursion - 1);
 530 |         if (!res)
 531 |             goto error;
 532 |         for (i = 0; i < ahi - nahi; i++)
 533 |             add_matching_line(answer, nahi + i, nbhi + i);
 534 |     }
 535 | 
 536 |     return 1;
 537 | 
 538 | error:
 539 |     free(lcs);
 540 |     return 0;
 541 | }
 542 | 
 543 | 
 544 | static void
 545 | delete_lines(struct line *lines, Py_ssize_t size)
 546 | {
 547 |     struct line *line = lines;
 548 |     while (size-- > 0) {
 549 |         Py_XDECREF(line->data);
 550 |         line++;
 551 |     }
 552 |     free(lines);
 553 | }
 554 | 
 555 | 
 556 | static Py_ssize_t
 557 | load_lines(PyObject *orig, struct line **lines)
 558 | {
 559 |     Py_ssize_t size, i;
 560 |     struct line *line;
 561 |     PyObject *seq, *item;
 562 | 
 563 |     seq = PySequence_Fast(orig, "sequence expected");
 564 |     if (seq == NULL) {
 565 |         return -1;
 566 |     }
 567 | 
 568 |     size = PySequence_Fast_GET_SIZE(seq);
 569 |     if (size == 0) {
 570 |         Py_DECREF(seq);
 571 |         return 0;
 572 |     }
 573 | 
 574 |     /* Allocate a memory block for line data, initialized to 0 */
 575 |     line = *lines = (struct line *)calloc(size, sizeof(struct line));
 576 |     if (line == NULL) {
 577 |         PyErr_NoMemory();
 578 |         Py_DECREF(seq);
 579 |         return -1;
 580 |     }
 581 | 
 582 |     for (i = 0; i < size; i++) {
 583 |         item = PySequence_Fast_GET_ITEM(seq, i);
 584 |         Py_INCREF(item);
 585 |         line->data = item;
 586 |         line->hash = PyObject_Hash(item);
 587 |         if (line->hash == (-1)) {
 588 |             /* Propagate the hash exception */
 589 |             size = -1;
 590 |             goto cleanup;
 591 |         }
 592 |         line->next = SENTINEL;
 593 |         line++;
 594 |     }
 595 | 
 596 |     cleanup:
 597 |     Py_DECREF(seq);
 598 |     if (size == -1) {
 599 |         /* Error -- cleanup unused object references */
 600 |         delete_lines(*lines, i);
 601 |         *lines = NULL;
 602 |     }
 603 |     return size;
 604 | }
 605 | 
 606 | 
 607 | static PyObject *
 608 | py_unique_lcs(PyObject *self, PyObject *args)
 609 | {
 610 |     PyObject *aseq, *bseq, *res, *item;
 611 |     Py_ssize_t asize, bsize, i, nmatches, *backpointers = NULL;
 612 |     struct line *a = NULL, *b = NULL;
 613 |     struct matching_line *matches = NULL;
 614 |     struct hashtable hashtable;
 615 | 
 616 |     if (!PyArg_ParseTuple(args, "OO", &aseq, &bseq))
 617 |         return NULL;
 618 | 
 619 |     hashtable.table = NULL;
 620 | 
 621 |     asize = load_lines(aseq, &a);
 622 |     bsize = load_lines(bseq, &b);
 623 |     if (asize == -1 || bsize == -1)
 624 |         goto error;
 625 | 
 626 |     if (!equate_lines(&hashtable, a, b, asize, bsize))
 627 |         goto error;
 628 | 
 629 |     if (bsize > 0) {
 630 |         matches = (struct matching_line *)guarded_malloc(sizeof(struct matching_line) * bsize);
 631 |         if (matches == NULL)
 632 |             goto error;
 633 | 
 634 |         backpointers = (Py_ssize_t *)guarded_malloc(sizeof(Py_ssize_t) * bsize * 4);
 635 |         if (backpointers == NULL)
 636 |             goto error;
 637 |     }
 638 | 
 639 |     nmatches = unique_lcs(matches, &hashtable, backpointers, a, b, 0, 0, asize, bsize);
 640 | 
 641 |     res = PyList_New(nmatches);
 642 |     for (i = 0; i < nmatches; i++) {
 643 |         item = Py_BuildValue("nn", matches[nmatches - i - 1].a, matches[nmatches - i - 1].b);
 644 |         if (item == NULL)
 645 |             goto error;
 646 |         if (PyList_SetItem(res, i, item) != 0)
 647 |             goto error;
 648 |     }
 649 | 
 650 |     free(backpointers);
 651 |     free(matches);
 652 |     free(hashtable.table);
 653 |     delete_lines(b, bsize);
 654 |     delete_lines(a, asize);
 655 |     return res;
 656 | 
 657 | error:
 658 |     free(backpointers);
 659 |     free(matches);
 660 |     free(hashtable.table);
 661 |     delete_lines(b, bsize);
 662 |     delete_lines(a, asize);
 663 |     return NULL;
 664 | }
 665 | 
 666 | 
 667 | static PyObject *
 668 | py_recurse_matches(PyObject *self, PyObject *args)
 669 | {
 670 |     PyObject *aseq, *bseq, *item, *answer;
 671 |     int maxrecursion, res;
 672 |     Py_ssize_t i, j, asize, bsize, alo, blo, ahi, bhi;
 673 |     Py_ssize_t *backpointers = NULL;
 674 |     struct line *a = NULL, *b = NULL;
 675 |     struct hashtable hashtable;
 676 |     struct matching_blocks matches;
 677 | 
 678 |     if (!PyArg_ParseTuple(args, "OOnnnnOi", &aseq, &bseq, &alo, &blo,
 679 |                           &ahi, &bhi, &answer, &maxrecursion))
 680 |         return NULL;
 681 | 
 682 |     hashtable.table = NULL;
 683 |     matches.matches = NULL;
 684 | 
 685 |     asize = load_lines(aseq, &a);
 686 |     bsize = load_lines(bseq, &b);
 687 |     if (asize == -1 || bsize == -1)
 688 |         goto error;
 689 | 
 690 |     if (!equate_lines(&hashtable, a, b, asize, bsize))
 691 |         goto error;
 692 | 
 693 |     matches.count = 0;
 694 | 
 695 |     if (bsize > 0) {
 696 |         matches.matches = (struct matching_block *)guarded_malloc(sizeof(struct matching_block) * bsize);
 697 |         if (matches.matches == NULL)
 698 |             goto error;
 699 | 
 700 |         backpointers = (Py_ssize_t *)guarded_malloc(sizeof(Py_ssize_t) * bsize * 4);
 701 |         if (backpointers == NULL)
 702 |             goto error;
 703 |     } else {
 704 |         matches.matches = NULL;
 705 |         backpointers = NULL;
 706 |     }
 707 | 
 708 |     res = recurse_matches(&matches, &hashtable, backpointers,
 709 |                           a, b, alo, blo, ahi, bhi, maxrecursion);
 710 |     if (!res)
 711 |         goto error;
 712 | 
 713 |     for (i = 0; i < matches.count; i++) {
 714 |         for (j = 0; j < matches.matches[i].len; j++) {
 715 |             item = Py_BuildValue("nn", matches.matches[i].a + j,
 716 |                                  matches.matches[i].b + j);
 717 |             if (item == NULL)
 718 |                 goto error;
 719 |             if (PyList_Append(answer, item) != 0)
 720 |                 goto error;
 721 |         }
 722 |     }
 723 | 
 724 |     free(backpointers);
 725 |     free(matches.matches);
 726 |     free(hashtable.table);
 727 |     delete_lines(b, bsize);
 728 |     delete_lines(a, asize);
 729 |     Py_RETURN_NONE;
 730 | 
 731 | error:
 732 |     free(backpointers);
 733 |     free(matches.matches);
 734 |     free(hashtable.table);
 735 |     delete_lines(b, bsize);
 736 |     delete_lines(a, asize);
 737 |     return NULL;
 738 | }
 739 | 
 740 | 
 741 | static PyObject *
 742 | PatienceSequenceMatcher_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 743 | {
 744 |     PyObject *junk, *a, *b;
 745 |     PatienceSequenceMatcher *self;
 746 | 
 747 |     self = (PatienceSequenceMatcher *)type->tp_alloc(type, 0);
 748 |     if (self != NULL) {
 749 | 
 750 |         if (!PyArg_ParseTuple(args, "OOO", &junk, &a, &b)) {
 751 |             Py_DECREF(self);
 752 |             return NULL;
 753 |         }
 754 | 
 755 |         self->asize = load_lines(a, &(self->a));
 756 |         self->bsize = load_lines(b, &(self->b));
 757 | 
 758 |         if (self->asize == -1 || self->bsize == -1) {
 759 |             Py_DECREF(self);
 760 |             return NULL;
 761 |         }
 762 | 
 763 |         if (!equate_lines(&self->hashtable, self->a, self->b, self->asize, self->bsize)) {
 764 |             Py_DECREF(self);
 765 |             return NULL;
 766 |         }
 767 | 
 768 |         if (self->bsize > 0) {
 769 |             self->backpointers = (Py_ssize_t *)guarded_malloc(sizeof(Py_ssize_t) * self->bsize * 4);
 770 |             if (self->backpointers == NULL) {
 771 |                 Py_DECREF(self);
 772 |                 PyErr_NoMemory();
 773 |                 return NULL;
 774 |             }
 775 |         } else {
 776 |             self->backpointers = NULL;
 777 |         }
 778 | 
 779 |     }
 780 | 
 781 |     return (PyObject *)self;
 782 | }
 783 | 
 784 | 
 785 | static void
 786 | PatienceSequenceMatcher_dealloc(PatienceSequenceMatcher* self)
 787 | {
 788 |     free(self->backpointers);
 789 |     free(self->hashtable.table);
 790 |     delete_lines(self->b, self->bsize);
 791 |     delete_lines(self->a, self->asize);
 792 |     ((PyObject *)self)->ob_type->tp_free((PyObject *)self);
 793 | }
 794 | 
 795 | 
 796 | static char PatienceSequenceMatcher_get_matching_blocks_doc[] =
 797 |     "Return list of triples describing matching subsequences.\n"
 798 |     "\n"
 799 |     "Each triple is of the form (i, j, n), and means that\n"
 800 |     "a[i:i+n] == b[j:j+n].  The triples are monotonically increasing in\n"
 801 |     "i and in j.\n"
 802 |     "\n"
 803 |     "The last triple is a dummy, (len(a), len(b), 0), and is the only\n"
 804 |     "triple with n==0.\n"
 805 |     "\n"
 806 |     ">>> s = PatienceSequenceMatcher(None, \"abxcd\", \"abcd\")\n"
 807 |     ">>> s.get_matching_blocks()\n"
 808 |     "[(0, 0, 2), (3, 2, 2), (5, 4, 0)]\n";
 809 | 
 810 | static PyObject *
 811 | PatienceSequenceMatcher_get_matching_blocks(PatienceSequenceMatcher* self)
 812 | {
 813 |     PyObject *answer, *item;
 814 |     int res;
 815 |     Py_ssize_t i;
 816 |     struct matching_blocks matches;
 817 | 
 818 |     matches.count = 0;
 819 |     if (self->bsize > 0) {
 820 |         matches.matches = (struct matching_block *)
 821 |             guarded_malloc(sizeof(struct matching_block) * self->bsize);
 822 |         if (matches.matches == NULL)
 823 |             return PyErr_NoMemory();
 824 |     } else
 825 |         matches.matches = NULL;
 826 | 
 827 |     res = recurse_matches(&matches, &self->hashtable, self->backpointers,
 828 |                           self->a, self->b, 0, 0,
 829 |                           self->asize, self->bsize, 10);
 830 |     if (!res) {
 831 |         free(matches.matches);
 832 |         return PyErr_NoMemory();
 833 |     }
 834 | 
 835 |     answer = PyList_New(matches.count + 1);
 836 |     if (answer == NULL) {
 837 |         free(matches.matches);
 838 |         return NULL;
 839 |     }
 840 | 
 841 |     for (i = 0; i < matches.count; i++) {
 842 |         item = Py_BuildValue("nnn", matches.matches[i].a,
 843 |                              matches.matches[i].b, matches.matches[i].len);
 844 |         if (item == NULL)
 845 |             goto error;
 846 |         if (PyList_SetItem(answer, i, item) != 0)
 847 |             goto error;
 848 |     }
 849 |     item = Py_BuildValue("nnn", self->asize, self->bsize, 0);
 850 |     if (item == NULL)
 851 |         goto error;
 852 |     if (PyList_SetItem(answer, i, item) != 0)
 853 |         goto error;
 854 | 
 855 |     free(matches.matches);
 856 |     return answer;
 857 | 
 858 | error:
 859 |     free(matches.matches);
 860 |     Py_DECREF(answer);
 861 |     return NULL;
 862 | }
 863 | 
 864 | 
 865 | static char PatienceSequenceMatcher_get_opcodes_doc[] =
 866 |     "Return list of 5-tuples describing how to turn a into b.\n"
 867 |     "\n"
 868 |     "Each tuple is of the form (tag, i1, i2, j1, j2).  The first tuple\n"
 869 |     "has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the\n"
 870 |     "tuple preceding it, and likewise for j1 == the previous j2.\n"
 871 |     "\n"
 872 |     "The tags are strings, with these meanings:\n"
 873 |     "\n"
 874 |     "'replace':  a[i1:i2] should be replaced by b[j1:j2]\n"
 875 |     "'delete':   a[i1:i2] should be deleted.\n"
 876 |     "               Note that j1==j2 in this case.\n"
 877 |     "'insert':   b[j1:j2] should be inserted at a[i1:i1].\n"
 878 |     "               Note that i1==i2 in this case.\n"
 879 |     "'equal':    a[i1:i2] == b[j1:j2]\n"
 880 |     "\n"
 881 |     ">>> a = \"qabxcd\"\n"
 882 |     ">>> b = \"abycdf\"\n"
 883 |     ">>> s = PatienceSequenceMatcher(None, a, b)\n"
 884 |     ">>> for tag, i1, i2, j1, j2 in s.get_opcodes():\n"
 885 |     "...    print (\"%7s a[%d:%d] (%s) b[%d:%d] (%s)\" %\n"
 886 |     "...           (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2]))\n"
 887 |     " delete a[0:1] (q) b[0:0] ()\n"
 888 |     "  equal a[1:3] (ab) b[0:2] (ab)\n"
 889 |     "replace a[3:4] (x) b[2:3] (y)\n"
 890 |     "  equal a[4:6] (cd) b[3:5] (cd)\n"
 891 |     " insert a[6:6] () b[5:6] (f)\n";
 892 | 
 893 | static PyObject *
 894 | PatienceSequenceMatcher_get_opcodes(PatienceSequenceMatcher* self)
 895 | {
 896 |     PyObject *answer, *item;
 897 |     Py_ssize_t i, j, k, ai, bj;
 898 |     int tag, res;
 899 |     struct matching_blocks matches;
 900 | 
 901 |     matches.count = 0;
 902 |     matches.matches = (struct matching_block *)guarded_malloc(sizeof(struct matching_block) * (self->bsize + 1));
 903 |     if (matches.matches == NULL)
 904 |         return PyErr_NoMemory();
 905 | 
 906 |     res = recurse_matches(&matches, &self->hashtable, self->backpointers,
 907 |                           self->a, self->b, 0, 0,
 908 |                           self->asize, self->bsize, 10);
 909 |     if (!res) {
 910 |         free(matches.matches);
 911 |         return PyErr_NoMemory();
 912 |     }
 913 | 
 914 |     matches.matches[matches.count].a = self->asize;
 915 |     matches.matches[matches.count].b = self->bsize;
 916 |     matches.matches[matches.count].len = 0;
 917 |     matches.count++;
 918 | 
 919 |     answer = PyList_New(0);
 920 |     if (answer == NULL) {
 921 |         free(matches.matches);
 922 |         return NULL;
 923 |     }
 924 | 
 925 |     i = j = 0;
 926 |     for (k = 0; k < matches.count; k++) {
 927 |         ai = matches.matches[k].a;
 928 |         bj = matches.matches[k].b;
 929 | 
 930 |         tag = -1;
 931 |         if (i < ai && j < bj)
 932 |             tag = OP_REPLACE;
 933 |         else if (i < ai)
 934 |             tag = OP_DELETE;
 935 |         else if (j < bj)
 936 |             tag = OP_INSERT;
 937 | 
 938 |         if (tag != -1) {
 939 |             item = Py_BuildValue("snnnn", opcode_names[tag], i, ai, j, bj);
 940 |             if (item == NULL)
 941 |                 goto error;
 942 |             if (PyList_Append(answer, item) != 0)
 943 |                 goto error;
 944 |         }
 945 | 
 946 |         i = ai + matches.matches[k].len;
 947 |         j = bj + matches.matches[k].len;
 948 | 
 949 |         if (matches.matches[k].len > 0) {
 950 |             item = Py_BuildValue("snnnn", opcode_names[OP_EQUAL], ai, i, bj, j);
 951 |             if (item == NULL)
 952 |                 goto error;
 953 |             if (PyList_Append(answer, item) != 0)
 954 |                 goto error;
 955 |         }
 956 |     }
 957 | 
 958 |     free(matches.matches);
 959 |     return answer;
 960 | 
 961 | error:
 962 |     free(matches.matches);
 963 |     Py_DECREF(answer);
 964 |     return NULL;
 965 | }
 966 | 
 967 | 
 968 | static char PatienceSequenceMatcher_get_grouped_opcodes_doc[] =
 969 |     "Isolate change clusters by eliminating ranges with no changes.\n"
 970 |     "\n"
 971 |     "Return a list of groups with upto n lines of context.\n"
 972 |     "Each group is in the same format as returned by get_opcodes().\n"
 973 |     "\n"
 974 |     ">>> from pprint import pprint\n"
 975 |     ">>> a = map(str, range(1,40))\n"
 976 |     ">>> b = a[:]\n"
 977 |     ">>> b[8:8] = ['i']     # Make an insertion\n"
 978 |     ">>> b[20] += 'x'       # Make a replacement\n"
 979 |     ">>> b[23:28] = []      # Make a deletion\n"
 980 |     ">>> b[30] += 'y'       # Make another replacement\n"
 981 |     ">>> pprint(PatienceSequenceMatcher(None,a,b).get_grouped_opcodes())\n"
 982 |     "[[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)],\n"
 983 |     " [('equal', 16, 19, 17, 20),\n"
 984 |     "  ('replace', 19, 20, 20, 21),\n"
 985 |     "  ('equal', 20, 22, 21, 23),\n"
 986 |     "  ('delete', 22, 27, 23, 23),\n"
 987 |     "  ('equal', 27, 30, 23, 26)],\n"
 988 |     " [('equal', 31, 34, 27, 30),\n"
 989 |     "  ('replace', 34, 35, 30, 31),\n"
 990 |     "  ('equal', 35, 38, 31, 34)]]\n";
 991 | 
 992 | static PyObject *
 993 | PatienceSequenceMatcher_get_grouped_opcodes(PatienceSequenceMatcher* self,
 994 |                                             PyObject *args)
 995 | {
 996 |     PyObject *answer, *group, *item;
 997 |     Py_ssize_t i, j, k, ai, bj, size, ncodes, tag;
 998 |     Py_ssize_t i1, i2, j1, j2;
 999 |     int n = 3, nn, res;
1000 |     struct matching_blocks matches;
1001 |     struct opcode *codes;
1002 | 
1003 |     if (!PyArg_ParseTuple(args, "|i", &n))
1004 |         return NULL;
1005 | 
1006 |     matches.count = 0;
1007 |     matches.matches = (struct matching_block *)guarded_malloc(sizeof(struct matching_block) * (self->bsize + 1));
1008 |     if (matches.matches == NULL)
1009 |         return PyErr_NoMemory();
1010 | 
1011 |     res = recurse_matches(&matches, &self->hashtable, self->backpointers,
1012 |                           self->a, self->b, 0, 0,
1013 |                           self->asize, self->bsize, 10);
1014 |     if (!res) {
1015 |         free(matches.matches);
1016 |         return PyErr_NoMemory();
1017 |     }
1018 | 
1019 |     matches.matches[matches.count].a = self->asize;
1020 |     matches.matches[matches.count].b = self->bsize;
1021 |     matches.matches[matches.count].len = 0;
1022 |     matches.count++;
1023 | 
1024 |     ncodes = 0;
1025 |     codes = (struct opcode *)guarded_malloc(sizeof(struct opcode) * matches.count * 2);
1026 |     if (codes == NULL) {
1027 |         free(matches.matches);
1028 |         return PyErr_NoMemory();
1029 |     }
1030 | 
1031 |     i = j = 0;
1032 |     for (k = 0; k < matches.count; k++) {
1033 |         ai = matches.matches[k].a;
1034 |         bj = matches.matches[k].b;
1035 | 
1036 |         tag = -1;
1037 |         if (i < ai && j < bj)
1038 |             tag = OP_REPLACE;
1039 |         else if (i < ai)
1040 |             tag = OP_DELETE;
1041 |         else if (j < bj)
1042 |             tag = OP_INSERT;
1043 | 
1044 |         if (tag != -1) {
1045 |             codes[ncodes].tag = tag;
1046 |             codes[ncodes].i1 = i;
1047 |             codes[ncodes].i2 = ai;
1048 |             codes[ncodes].j1 = j;
1049 |             codes[ncodes].j2 = bj;
1050 |             ncodes++;
1051 |         }
1052 | 
1053 |         i = ai + matches.matches[k].len;
1054 |         j = bj + matches.matches[k].len;
1055 | 
1056 |         if (matches.matches[k].len > 0) {
1057 |             codes[ncodes].tag = OP_EQUAL;
1058 |             codes[ncodes].i1 = ai;
1059 |             codes[ncodes].i2 = i;
1060 |             codes[ncodes].j1 = bj;
1061 |             codes[ncodes].j2 = j;
1062 |             ncodes++;
1063 |         }
1064 |     }
1065 | 
1066 |     if (ncodes == 0) {
1067 |         codes[ncodes].tag = OP_EQUAL;
1068 |         codes[ncodes].i1 = 0;
1069 |         codes[ncodes].i2 = 1;
1070 |         codes[ncodes].j1 = 0;
1071 |         codes[ncodes].j2 = 1;
1072 |         ncodes++;
1073 |     }
1074 | 
1075 |     /* fixup leading and trailing groups if they show no changes. */
1076 |     if (codes[0].tag == OP_EQUAL) {
1077 |         codes[0].i1 = MAX(codes[0].i1, codes[0].i2 - n);
1078 |         codes[0].j1 = MAX(codes[0].j1, codes[0].j2 - n);
1079 |     }
1080 |     if (codes[ncodes - 1].tag == OP_EQUAL) {
1081 |         codes[ncodes - 1].i2 = MIN(codes[ncodes - 1].i2,
1082 |                                    codes[ncodes - 1].i1 + n);
1083 |         codes[ncodes - 1].j2 = MIN(codes[ncodes - 1].j2,
1084 |                                    codes[ncodes - 1].j1 + n);
1085 |     }
1086 | 
1087 |     group = NULL;
1088 | 
1089 |     answer = PyList_New(0);
1090 |     if (answer == NULL)
1091 |         goto error;
1092 | 
1093 |     group = PyList_New(0);
1094 |     if (group == NULL)
1095 |         goto error;
1096 | 
1097 |     nn = n + n;
1098 |     tag = -1;
1099 |     for (i = 0; i < ncodes; i++) {
1100 |         tag = codes[i].tag;
1101 |         i1 = codes[i].i1;
1102 |         i2 = codes[i].i2;
1103 |         j1 = codes[i].j1;
1104 |         j2 = codes[i].j2;
1105 |         /* end the current group and start a new one whenever
1106 |            there is a large range with no changes. */
1107 |         if (tag == OP_EQUAL && i2 - i1 > nn) {
1108 |             item = Py_BuildValue("snnnn", opcode_names[tag],
1109 |                                   i1, MIN(i2, i1 + n), j1, MIN(j2, j1 + n));
1110 |             if (item == NULL)
1111 |                 goto error;
1112 |             if (PyList_Append(group, item) != 0)
1113 |                 goto error;
1114 |             if (PyList_Append(answer, group) != 0)
1115 |                 goto error;
1116 |             group = PyList_New(0);
1117 |             if (group == NULL)
1118 |                 goto error;
1119 |             i1 = MAX(i1, i2 - n);
1120 |             j1 = MAX(j1, j2 - n);
1121 |         }
1122 |         item = Py_BuildValue("snnnn", opcode_names[tag], i1, i2, j1 ,j2);
1123 |         if (item == NULL)
1124 |             goto error;
1125 |         if (PyList_Append(group, item) != 0)
1126 |             goto error;
1127 |     }
1128 |     size = PyList_Size(group);
1129 |     if (size > 0 && !(size == 1 && tag == OP_EQUAL)) {
1130 |         if (PyList_Append(answer, group) != 0)
1131 |             goto error;
1132 |     }
1133 |     else
1134 |         Py_DECREF(group);
1135 | 
1136 |     free(codes);
1137 |     free(matches.matches);
1138 |     return answer;
1139 | 
1140 | error:
1141 |     free(codes);
1142 |     free(matches.matches);
1143 |     Py_DECREF(group);
1144 |     Py_DECREF(answer);
1145 |     return NULL;
1146 | }
1147 | 
1148 | 
1149 | static PyMethodDef PatienceSequenceMatcher_methods[] = {
1150 |     {"get_matching_blocks",
1151 |      (PyCFunction)PatienceSequenceMatcher_get_matching_blocks,
1152 |      METH_NOARGS,
1153 |      PatienceSequenceMatcher_get_matching_blocks_doc},
1154 |     {"get_opcodes",
1155 |      (PyCFunction)PatienceSequenceMatcher_get_opcodes,
1156 |      METH_NOARGS,
1157 |      PatienceSequenceMatcher_get_opcodes_doc},
1158 |     {"get_grouped_opcodes",
1159 |      (PyCFunction)PatienceSequenceMatcher_get_grouped_opcodes,
1160 |      METH_VARARGS,
1161 |      PatienceSequenceMatcher_get_grouped_opcodes_doc},
1162 |     {NULL}
1163 | };
1164 | 
1165 | 
1166 | static char PatienceSequenceMatcher_doc[] =
1167 |     "C implementation of PatienceSequenceMatcher";
1168 | 
1169 | 
1170 | static PyTypeObject PatienceSequenceMatcherType = {
1171 |     PyVarObject_HEAD_INIT(NULL, 0)
1172 |     .tp_name = "PatienceSequenceMatcher",
1173 |     .tp_basicsize = sizeof(PatienceSequenceMatcher),
1174 |     .tp_dealloc = (destructor)PatienceSequenceMatcher_dealloc,
1175 |     .tp_flags = Py_TPFLAGS_DEFAULT,
1176 |     .tp_doc = PatienceSequenceMatcher_doc,
1177 |     .tp_methods = PatienceSequenceMatcher_methods,
1178 |     .tp_new = PatienceSequenceMatcher_new,
1179 | };
1180 | 
1181 | 
1182 | static PyMethodDef cpatiencediff_methods[] = {
1183 |     {"unique_lcs_c", py_unique_lcs, METH_VARARGS},
1184 |     {"recurse_matches_c", py_recurse_matches, METH_VARARGS},
1185 |     {NULL, NULL}
1186 | };
1187 | 
1188 | static PyObject *
1189 | moduleinit(void) {
1190 |     PyObject* m;
1191 | 
1192 |     if (PyType_Ready(&PatienceSequenceMatcherType) < 0)
1193 |         return NULL;
1194 | 
1195 | #if PY_MAJOR_VERSION >= 3
1196 | static struct PyModuleDef moduledef = {
1197 |         PyModuleDef_HEAD_INIT,
1198 |         "_patiencediff_c",         /* m_name */
1199 |         "C implementation of PatienceSequenceMatcher", /* m_doc */
1200 |         -1,                 /* m_size */
1201 |         cpatiencediff_methods, /* m_methods */
1202 |         NULL,               /* m_reload */
1203 |         NULL,               /* m_traverse */
1204 |         NULL,               /* m_clear*/
1205 |         NULL,               /* m_free */
1206 |     };
1207 | 
1208 |     m = PyModule_Create(&moduledef);
1209 | #else
1210 |     m = Py_InitModule3("_patiencediff_c", cpatiencediff_methods,
1211 |                        "C implementation of PatienceSequenceMatcher");
1212 | #endif
1213 |     if (m == NULL)
1214 |       return NULL;
1215 | 
1216 |     Py_INCREF(&PatienceSequenceMatcherType);
1217 |     PyModule_AddObject(m, "PatienceSequenceMatcher_c",
1218 |                        (PyObject *)&PatienceSequenceMatcherType);
1219 |     return m;
1220 | }
1221 | 
1222 | #if PY_MAJOR_VERSION >= 3
1223 | PyMODINIT_FUNC
1224 | PyInit__patiencediff_c(void)
1225 | {
1226 | 	return moduleinit();
1227 | }
1228 | #else
1229 | PyMODINIT_FUNC
1230 | init_patiencediff_c(void)
1231 | {
1232 |     moduleinit();
1233 | }
1234 | #endif
1235 | 
1236 | 
1237 | /* vim: sw=4 et 
1238 |  */
1239 | 


--------------------------------------------------------------------------------
/patiencediff/_patiencediff_c.pyi:
--------------------------------------------------------------------------------
 1 | import difflib
 2 | from typing import Any, Sequence
 3 | 
 4 | class PatienceSequenceMatcher_c(difflib.SequenceMatcher):
 5 |     def get_matching_blocks(self) -> list[difflib.Match]: ...
 6 | 
 7 | def unique_lcs_c(
 8 |     a: Sequence[Any], b: Sequence[Any]
 9 | ) -> list[tuple[int, int]]: ...
10 | def recurse_matches_c(
11 |     a: Sequence[Any],
12 |     b: Sequence[Any],
13 |     alo: int,
14 |     blo: int,
15 |     ahi: int,
16 |     bhi: int,
17 |     answer: list[tuple[int, int]],
18 |     maxrecursion: int,
19 | ) -> None: ...
20 | 


--------------------------------------------------------------------------------
/patiencediff/_patiencediff_py.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2005 Bram Cohen, Copyright (C) 2005, 2006 Canonical Ltd
  2 | #
  3 | # This program is free software; you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation; either version 2 of the License, or
  6 | # (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program; if not, write to the Free Software
 15 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 16 | 
 17 | import difflib
 18 | from bisect import bisect
 19 | from typing import Any, Dict, List, Optional, Sequence, Tuple
 20 | 
 21 | 
 22 | class MaxRecursionDepth(Exception):
 23 |     def __init__(self) -> None:
 24 |         super().__init__("max recursion depth reached")
 25 | 
 26 | 
 27 | def unique_lcs_py(a: Sequence[Any], b: Sequence[Any]) -> List[Tuple[int, int]]:
 28 |     """Find the longest common subset for unique lines.
 29 | 
 30 |     :param a: An indexable object (such as string or list of strings)
 31 |     :param b: Another indexable object (such as string or list of strings)
 32 |     :return: A list of tuples, one for each line which is matched.
 33 |             [(line_in_a, line_in_b), ...]
 34 | 
 35 |     This only matches lines which are unique on both sides.
 36 |     This helps prevent common lines from over influencing match
 37 |     results.
 38 |     The longest common subset uses the Patience Sorting algorithm:
 39 |     http://en.wikipedia.org/wiki/Patience_sorting
 40 |     """
 41 |     line: Any
 42 |     # set index[line in a] = position of line in a unless
 43 |     # a is a duplicate, in which case it's set to None
 44 |     index: Dict[Any, Optional[int]] = {}
 45 |     for i, line in enumerate(a):
 46 |         if line in index:
 47 |             index[line] = None
 48 |         else:
 49 |             index[line] = i
 50 |     # make btoa[i] = position of line i in a, unless
 51 |     # that line doesn't occur exactly once in both,
 52 |     # in which case it's set to None
 53 |     btoa: List[Optional[int]] = [None] * len(b)
 54 |     index2: Dict[Any, int] = {}
 55 |     for pos, line in enumerate(b):
 56 |         next = index.get(line)
 57 |         if next is not None:
 58 |             if line in index2:
 59 |                 # unset the previous mapping, which we now know to
 60 |                 # be invalid because the line isn't unique
 61 |                 btoa[index2[line]] = None
 62 |                 del index[line]
 63 |             else:
 64 |                 index2[line] = pos
 65 |                 btoa[pos] = next
 66 |     # this is the Patience sorting algorithm
 67 |     # see http://en.wikipedia.org/wiki/Patience_sorting
 68 |     backpointers: List[Optional[int]] = [None] * len(b)
 69 |     stacks: List[int] = []
 70 |     lasts: List[int] = []
 71 |     k: int = 0
 72 |     for bpos, apos in enumerate(btoa):
 73 |         if apos is None:
 74 |             continue
 75 |         # as an optimization, check if the next line comes at the end,
 76 |         # because it usually does
 77 |         if stacks and stacks[-1] < apos:
 78 |             k = len(stacks)
 79 |         # as an optimization, check if the next line comes right after
 80 |         # the previous line, because usually it does
 81 |         elif (
 82 |             stacks
 83 |             and stacks[k] < apos
 84 |             and (k == len(stacks) - 1 or stacks[k + 1] > apos)
 85 |         ):
 86 |             k += 1
 87 |         else:
 88 |             k = bisect(stacks, apos)
 89 |         if k > 0:
 90 |             backpointers[bpos] = lasts[k - 1]
 91 |         if k < len(stacks):
 92 |             stacks[k] = apos
 93 |             lasts[k] = bpos
 94 |         else:
 95 |             stacks.append(apos)
 96 |             lasts.append(bpos)
 97 |     if len(lasts) == 0:
 98 |         return []
 99 |     result = []
100 |     m: Optional[int] = lasts[-1]
101 |     while m is not None:
102 |         result.append((btoa[m], m))
103 |         m = backpointers[m]
104 |     result.reverse()
105 |     return result  # type: ignore
106 | 
107 | 
108 | def recurse_matches_py(
109 |     a: Sequence[Any],
110 |     b: Sequence[Any],
111 |     alo: int,
112 |     blo: int,
113 |     ahi: int,
114 |     bhi: int,
115 |     answer: List[Tuple[int, int]],
116 |     maxrecursion: int,
117 | ) -> None:
118 |     """Find all of the matching text in the lines of a and b.
119 | 
120 |     :param a: A sequence
121 |     :param b: Another sequence
122 |     :param alo: The start location of a to check, typically 0
123 |     :param ahi: The start location of b to check, typically 0
124 |     :param ahi: The maximum length of a to check, typically len(a)
125 |     :param bhi: The maximum length of b to check, typically len(b)
126 |     :param answer: The return array. Will be filled with tuples
127 |                    indicating [(line_in_a, line_in_b)]
128 |     :param maxrecursion: The maximum depth to recurse.
129 |                          Must be a positive integer.
130 |     :return: None, the return value is in the parameter answer, which
131 |              should be a list
132 | 
133 |     """
134 |     if maxrecursion < 0:
135 |         # this will never happen normally, this check is to prevent DOS attacks
136 |         raise MaxRecursionDepth()
137 |     oldlength = len(answer)
138 |     if alo == ahi or blo == bhi:
139 |         return
140 |     last_a_pos = alo - 1
141 |     last_b_pos = blo - 1
142 |     for apos, bpos in unique_lcs_py(a[alo:ahi], b[blo:bhi]):
143 |         # recurse between lines which are unique in each file and match
144 |         apos += alo
145 |         bpos += blo
146 |         # Most of the time, you will have a sequence of similar entries
147 |         if last_a_pos + 1 != apos or last_b_pos + 1 != bpos:
148 |             recurse_matches_py(
149 |                 a,
150 |                 b,
151 |                 last_a_pos + 1,
152 |                 last_b_pos + 1,
153 |                 apos,
154 |                 bpos,
155 |                 answer,
156 |                 maxrecursion - 1,
157 |             )
158 |         last_a_pos = apos
159 |         last_b_pos = bpos
160 |         answer.append((apos, bpos))
161 |     if len(answer) > oldlength:
162 |         # find matches between the last match and the end
163 |         recurse_matches_py(
164 |             a,
165 |             b,
166 |             last_a_pos + 1,
167 |             last_b_pos + 1,
168 |             ahi,
169 |             bhi,
170 |             answer,
171 |             maxrecursion - 1,
172 |         )
173 |     elif a[alo] == b[blo]:
174 |         # find matching lines at the very beginning
175 |         while alo < ahi and blo < bhi and a[alo] == b[blo]:
176 |             answer.append((alo, blo))
177 |             alo += 1
178 |             blo += 1
179 |         recurse_matches_py(a, b, alo, blo, ahi, bhi, answer, maxrecursion - 1)
180 |     elif a[ahi - 1] == b[bhi - 1]:
181 |         # find matching lines at the very end
182 |         nahi = ahi - 1
183 |         nbhi = bhi - 1
184 |         while nahi > alo and nbhi > blo and a[nahi - 1] == b[nbhi - 1]:
185 |             nahi -= 1
186 |             nbhi -= 1
187 |         recurse_matches_py(
188 |             a,
189 |             b,
190 |             last_a_pos + 1,
191 |             last_b_pos + 1,
192 |             nahi,
193 |             nbhi,
194 |             answer,
195 |             maxrecursion - 1,
196 |         )
197 |         for i in range(ahi - nahi):
198 |             answer.append((nahi + i, nbhi + i))
199 | 
200 | 
201 | def _collapse_sequences(matches):
202 |     """Find sequences of lines.
203 | 
204 |     Given a sequence of [(line_in_a, line_in_b),]
205 |     find regions where they both increment at the same time
206 |     """
207 |     answer = []
208 |     start_a = start_b = None
209 |     length = 0
210 |     for i_a, i_b in matches:
211 |         if (
212 |             start_a is not None
213 |             and (i_a == start_a + length)
214 |             and (i_b == start_b + length)
215 |         ):
216 |             length += 1
217 |         else:
218 |             if start_a is not None:
219 |                 answer.append((start_a, start_b, length))
220 |             start_a = i_a
221 |             start_b = i_b
222 |             length = 1
223 | 
224 |     if length != 0:
225 |         answer.append((start_a, start_b, length))
226 | 
227 |     return answer
228 | 
229 | 
230 | def _check_consistency(answer):
231 |     # For consistency sake, make sure all matches are only increasing
232 |     next_a = -1
233 |     next_b = -1
234 |     for a, b, match_len in answer:
235 |         if a < next_a:
236 |             raise ValueError("Non increasing matches for a")
237 |         if b < next_b:
238 |             raise ValueError("Non increasing matches for b")
239 |         next_a = a + match_len
240 |         next_b = b + match_len
241 | 
242 | 
243 | class PatienceSequenceMatcher_py(difflib.SequenceMatcher):
244 |     """Compare a pair of sequences using longest common subset."""
245 | 
246 |     _do_check_consistency = True
247 | 
248 |     def __init__(self, isjunk=None, a="", b="") -> None:
249 |         if isjunk is not None:
250 |             raise NotImplementedError(
251 |                 "Currently we do not support isjunk for sequence matching"
252 |             )
253 |         difflib.SequenceMatcher.__init__(self, isjunk, a, b)
254 | 
255 |     def get_matching_blocks(self):
256 |         """Return list of triples describing matching subsequences.
257 | 
258 |         Each triple is of the form (i, j, n), and means that
259 |         a[i:i+n] == b[j:j+n].  The triples are monotonically increasing in
260 |         i and in j.
261 | 
262 |         The last triple is a dummy, (len(a), len(b), 0), and is the only
263 |         triple with n==0.
264 | 
265 |         >>> s = PatienceSequenceMatcher(None, "abxcd", "abcd")
266 |         >>> s.get_matching_blocks()
267 |         [(0, 0, 2), (3, 2, 2), (5, 4, 0)]
268 |         """
269 |         # jam 20060525 This is the python 2.4.1 difflib get_matching_blocks
270 |         # implementation which uses __helper. 2.4.3 got rid of helper for
271 |         # doing it inline with a queue.
272 |         # We should consider doing the same for recurse_matches
273 | 
274 |         if self.matching_blocks is not None:
275 |             return self.matching_blocks
276 | 
277 |         matches = []
278 |         recurse_matches_py(
279 |             self.a, self.b, 0, 0, len(self.a), len(self.b), matches, 10
280 |         )
281 |         # Matches now has individual line pairs of
282 |         # line A matches line B, at the given offsets
283 |         self.matching_blocks = _collapse_sequences(matches)
284 |         self.matching_blocks.append((len(self.a), len(self.b), 0))
285 |         if PatienceSequenceMatcher_py._do_check_consistency:
286 |             if __debug__:
287 |                 _check_consistency(self.matching_blocks)
288 | 
289 |         return self.matching_blocks
290 | 


--------------------------------------------------------------------------------
/patiencediff/_patiencediff_rs.pyi:
--------------------------------------------------------------------------------
 1 | import difflib
 2 | from typing import Any, Callable, Literal, Sequence, TypeVar
 3 | 
 4 | T = TypeVar("T")
 5 | 
 6 | class PatienceSequenceMatcher_rs(difflib.SequenceMatcher):
 7 |     def __init__(
 8 |         self, junk: Callable[[T], bool] | None, a: Sequence[T], b: Sequence[T]
 9 |     ) -> None: ...
10 |     def get_matching_blocks(self) -> list[difflib.Match]: ...
11 |     def get_opcodes(
12 |         self,
13 |     ) -> list[
14 |         tuple[
15 |             Literal["replace", "delete", "insert", "equal"], int, int, int, int
16 |         ]
17 |     ]: ...
18 |     def get_grouped_opcodes(
19 |         self, n: int = 3
20 |     ) -> list[list[tuple[str, int, int, int, int]]]: ...
21 | 
22 | def unique_lcs_rs(
23 |     a: Sequence[Any], b: Sequence[Any]
24 | ) -> list[tuple[int, int]]: ...
25 | def recurse_matches_rs(
26 |     a: Sequence[Any],
27 |     b: Sequence[Any],
28 |     alo: int,
29 |     blo: int,
30 |     ahi: int,
31 |     bhi: int,
32 |     answer: list[tuple[int, int]],
33 |     maxrecursion: int,
34 | ) -> None: ...
35 | 


--------------------------------------------------------------------------------
/patiencediff/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/breezy-team/patiencediff/fff9527aae89dfaf249fc68b82516facb1350ce2/patiencediff/py.typed


--------------------------------------------------------------------------------
/patiencediff/test_patiencediff.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2005, 2006, 2007 Canonical Ltd
  2 | # Copyright (C) 2021-2023 Jelmer Vernooĳ <jelmer@jelmer.uk>
  3 | #
  4 | # This program is free software; you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation; either version 2 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program; if not, write to the Free Software
 16 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 17 | 
 18 | import os
 19 | import shutil
 20 | import tempfile
 21 | import unittest
 22 | 
 23 | import patiencediff
 24 | 
 25 | from . import _patiencediff_py
 26 | 
 27 | 
 28 | class TestPatienceDiffLib(unittest.TestCase):
 29 |     def setUp(self):
 30 |         super().setUp()
 31 |         self._unique_lcs = _patiencediff_py.unique_lcs_py
 32 |         self._recurse_matches = _patiencediff_py.recurse_matches_py
 33 |         self._PatienceSequenceMatcher = (
 34 |             _patiencediff_py.PatienceSequenceMatcher_py
 35 |         )
 36 | 
 37 |     def test_diff_unicode_string(self):
 38 |         a = "".join([chr(i) for i in range(4000, 4500, 3)])
 39 |         b = "".join([chr(i) for i in range(4300, 4800, 2)])
 40 |         sm = self._PatienceSequenceMatcher(None, a, b)
 41 |         mb = sm.get_matching_blocks()
 42 |         self.assertEqual(35, len(mb))
 43 | 
 44 |     def test_unique_lcs(self):
 45 |         unique_lcs = self._unique_lcs
 46 |         self.assertEqual(unique_lcs("", ""), [])
 47 |         self.assertEqual(unique_lcs("", "a"), [])
 48 |         self.assertEqual(unique_lcs("a", ""), [])
 49 |         self.assertEqual(unique_lcs("a", "a"), [(0, 0)])
 50 |         self.assertEqual(unique_lcs("a", "b"), [])
 51 |         self.assertEqual(unique_lcs("ab", "ab"), [(0, 0), (1, 1)])
 52 |         self.assertEqual(
 53 |             unique_lcs("abcde", "cdeab"), [(2, 0), (3, 1), (4, 2)]
 54 |         )
 55 |         self.assertEqual(
 56 |             unique_lcs("cdeab", "abcde"), [(0, 2), (1, 3), (2, 4)]
 57 |         )
 58 |         self.assertEqual(
 59 |             unique_lcs("abXde", "abYde"), [(0, 0), (1, 1), (3, 3), (4, 4)]
 60 |         )
 61 |         self.assertEqual(unique_lcs("acbac", "abc"), [(2, 1)])
 62 | 
 63 |     def test_recurse_matches(self):
 64 |         def test_one(a, b, matches):
 65 |             test_matches = []
 66 |             self._recurse_matches(a, b, 0, 0, len(a), len(b), test_matches, 10)
 67 |             self.assertEqual(test_matches, matches)
 68 | 
 69 |         test_one(
 70 |             ["a", "", "b", "", "c"],
 71 |             ["a", "a", "b", "c", "c"],
 72 |             [(0, 0), (2, 2), (4, 4)],
 73 |         )
 74 |         test_one(
 75 |             ["a", "c", "b", "a", "c"],
 76 |             ["a", "b", "c"],
 77 |             [(0, 0), (2, 1), (4, 2)],
 78 |         )
 79 |         # Even though 'bc' is not unique globally, and is surrounded by
 80 |         # non-matching lines, we should still match, because they are locally
 81 |         # unique
 82 |         test_one(
 83 |             "abcdbce",
 84 |             "afbcgdbce",
 85 |             [(0, 0), (1, 2), (2, 3), (3, 5), (4, 6), (5, 7), (6, 8)],
 86 |         )
 87 | 
 88 |         # recurse_matches doesn't match non-unique
 89 |         # lines surrounded by bogus text.
 90 |         # The update has been done in patiencediff.SequenceMatcher instead
 91 | 
 92 |         # This is what it could be
 93 |         # test_one('aBccDe', 'abccde', [(0,0), (2,2), (3,3), (5,5)])
 94 | 
 95 |         # This is what it currently gives:
 96 |         test_one("aBccDe", "abccde", [(0, 0), (5, 5)])
 97 | 
 98 |     def assertDiffBlocks(self, a, b, expected_blocks):
 99 |         """Check that the sequence matcher returns the correct blocks.
100 | 
101 |         :param a: A sequence to match
102 |         :param b: Another sequence to match
103 |         :param expected_blocks: The expected output, not including the final
104 |             matching block (len(a), len(b), 0)
105 |         """
106 |         matcher = self._PatienceSequenceMatcher(None, a, b)
107 |         blocks = matcher.get_matching_blocks()
108 |         last = blocks.pop()
109 |         self.assertEqual((len(a), len(b), 0), last)
110 |         self.assertEqual(expected_blocks, blocks)
111 | 
112 |     def test_matching_blocks(self):
113 |         # Some basic matching tests
114 |         self.assertDiffBlocks("", "", [])
115 |         self.assertDiffBlocks([], [], [])
116 |         self.assertDiffBlocks("abc", "", [])
117 |         self.assertDiffBlocks("", "abc", [])
118 |         self.assertDiffBlocks("abcd", "abcd", [(0, 0, 4)])
119 |         self.assertDiffBlocks("abcd", "abce", [(0, 0, 3)])
120 |         self.assertDiffBlocks("eabc", "abce", [(1, 0, 3)])
121 |         self.assertDiffBlocks("eabce", "abce", [(1, 0, 4)])
122 |         self.assertDiffBlocks("abcde", "abXde", [(0, 0, 2), (3, 3, 2)])
123 |         self.assertDiffBlocks("abcde", "abXYZde", [(0, 0, 2), (3, 5, 2)])
124 |         self.assertDiffBlocks("abde", "abXYZde", [(0, 0, 2), (2, 5, 2)])
125 |         # This may check too much, but it checks to see that
126 |         # a copied block stays attached to the previous section,
127 |         # not the later one.
128 |         # difflib would tend to grab the trailing longest match
129 |         # which would make the diff not look right
130 |         self.assertDiffBlocks(
131 |             "abcdefghijklmnop",
132 |             "abcdefxydefghijklmnop",
133 |             [(0, 0, 6), (6, 11, 10)],
134 |         )
135 | 
136 |         # make sure it supports passing in lists
137 |         self.assertDiffBlocks(
138 |             ["hello there\n", "world\n", "how are you today?\n"],
139 |             ["hello there\n", "how are you today?\n"],
140 |             [(0, 0, 1), (2, 1, 1)],
141 |         )
142 | 
143 |         # non unique lines surrounded by non-matching lines
144 |         # won't be found
145 |         self.assertDiffBlocks("aBccDe", "abccde", [(0, 0, 1), (5, 5, 1)])
146 | 
147 |         # But they only need to be locally unique
148 |         self.assertDiffBlocks(
149 |             "aBcDec", "abcdec", [(0, 0, 1), (2, 2, 1), (4, 4, 2)]
150 |         )
151 | 
152 |         # non unique blocks won't be matched
153 |         self.assertDiffBlocks("aBcdEcdFg", "abcdecdfg", [(0, 0, 1), (8, 8, 1)])
154 | 
155 |         # but locally unique ones will
156 |         self.assertDiffBlocks(
157 |             "aBcdEeXcdFg",
158 |             "abcdecdfg",
159 |             [(0, 0, 1), (2, 2, 2), (5, 4, 1), (7, 5, 2), (10, 8, 1)],
160 |         )
161 | 
162 |         self.assertDiffBlocks("abbabbXd", "cabbabxd", [(7, 7, 1)])
163 |         self.assertDiffBlocks("abbabbbb", "cabbabbc", [])
164 |         self.assertDiffBlocks("bbbbbbbb", "cbbbbbbc", [])
165 | 
166 |     def test_matching_blocks_tuples(self):
167 |         # Some basic matching tests
168 |         self.assertDiffBlocks([], [], [])
169 |         self.assertDiffBlocks([("a",), ("b",), ("c,")], [], [])
170 |         self.assertDiffBlocks([], [("a",), ("b",), ("c,")], [])
171 |         self.assertDiffBlocks(
172 |             [("a",), ("b",), ("c,")], [("a",), ("b",), ("c,")], [(0, 0, 3)]
173 |         )
174 |         self.assertDiffBlocks(
175 |             [("a",), ("b",), ("c,")], [("a",), ("b",), ("d,")], [(0, 0, 2)]
176 |         )
177 |         self.assertDiffBlocks(
178 |             [("d",), ("b",), ("c,")], [("a",), ("b",), ("c,")], [(1, 1, 2)]
179 |         )
180 |         self.assertDiffBlocks(
181 |             [("d",), ("a",), ("b",), ("c,")],
182 |             [("a",), ("b",), ("c,")],
183 |             [(1, 0, 3)],
184 |         )
185 |         self.assertDiffBlocks(
186 |             [("a", "b"), ("c", "d"), ("e", "f")],
187 |             [("a", "b"), ("c", "X"), ("e", "f")],
188 |             [(0, 0, 1), (2, 2, 1)],
189 |         )
190 |         self.assertDiffBlocks(
191 |             [("a", "b"), ("c", "d"), ("e", "f")],
192 |             [("a", "b"), ("c", "dX"), ("e", "f")],
193 |             [(0, 0, 1), (2, 2, 1)],
194 |         )
195 | 
196 |     def test_opcodes(self):
197 |         def chk_ops(a, b, expected_codes):
198 |             s = self._PatienceSequenceMatcher(None, a, b)
199 |             self.assertEqual(expected_codes, s.get_opcodes())
200 | 
201 |         chk_ops("", "", [])
202 |         chk_ops([], [], [])
203 |         chk_ops("abc", "", [("delete", 0, 3, 0, 0)])
204 |         chk_ops("", "abc", [("insert", 0, 0, 0, 3)])
205 |         chk_ops("abcd", "abcd", [("equal", 0, 4, 0, 4)])
206 |         chk_ops(
207 |             "abcd", "abce", [("equal", 0, 3, 0, 3), ("replace", 3, 4, 3, 4)]
208 |         )
209 |         chk_ops(
210 |             "eabc",
211 |             "abce",
212 |             [
213 |                 ("delete", 0, 1, 0, 0),
214 |                 ("equal", 1, 4, 0, 3),
215 |                 ("insert", 4, 4, 3, 4),
216 |             ],
217 |         )
218 |         chk_ops(
219 |             "eabce", "abce", [("delete", 0, 1, 0, 0), ("equal", 1, 5, 0, 4)]
220 |         )
221 |         chk_ops(
222 |             "abcde",
223 |             "abXde",
224 |             [
225 |                 ("equal", 0, 2, 0, 2),
226 |                 ("replace", 2, 3, 2, 3),
227 |                 ("equal", 3, 5, 3, 5),
228 |             ],
229 |         )
230 |         chk_ops(
231 |             "abcde",
232 |             "abXYZde",
233 |             [
234 |                 ("equal", 0, 2, 0, 2),
235 |                 ("replace", 2, 3, 2, 5),
236 |                 ("equal", 3, 5, 5, 7),
237 |             ],
238 |         )
239 |         chk_ops(
240 |             "abde",
241 |             "abXYZde",
242 |             [
243 |                 ("equal", 0, 2, 0, 2),
244 |                 ("insert", 2, 2, 2, 5),
245 |                 ("equal", 2, 4, 5, 7),
246 |             ],
247 |         )
248 |         chk_ops(
249 |             "abcdefghijklmnop",
250 |             "abcdefxydefghijklmnop",
251 |             [
252 |                 ("equal", 0, 6, 0, 6),
253 |                 ("insert", 6, 6, 6, 11),
254 |                 ("equal", 6, 16, 11, 21),
255 |             ],
256 |         )
257 |         chk_ops(
258 |             ["hello there\n", "world\n", "how are you today?\n"],
259 |             ["hello there\n", "how are you today?\n"],
260 |             [
261 |                 ("equal", 0, 1, 0, 1),
262 |                 ("delete", 1, 2, 1, 1),
263 |                 ("equal", 2, 3, 1, 2),
264 |             ],
265 |         )
266 |         chk_ops(
267 |             "aBccDe",
268 |             "abccde",
269 |             [
270 |                 ("equal", 0, 1, 0, 1),
271 |                 ("replace", 1, 5, 1, 5),
272 |                 ("equal", 5, 6, 5, 6),
273 |             ],
274 |         )
275 |         chk_ops(
276 |             "aBcDec",
277 |             "abcdec",
278 |             [
279 |                 ("equal", 0, 1, 0, 1),
280 |                 ("replace", 1, 2, 1, 2),
281 |                 ("equal", 2, 3, 2, 3),
282 |                 ("replace", 3, 4, 3, 4),
283 |                 ("equal", 4, 6, 4, 6),
284 |             ],
285 |         )
286 |         chk_ops(
287 |             "aBcdEcdFg",
288 |             "abcdecdfg",
289 |             [
290 |                 ("equal", 0, 1, 0, 1),
291 |                 ("replace", 1, 8, 1, 8),
292 |                 ("equal", 8, 9, 8, 9),
293 |             ],
294 |         )
295 |         chk_ops(
296 |             "aBcdEeXcdFg",
297 |             "abcdecdfg",
298 |             [
299 |                 ("equal", 0, 1, 0, 1),
300 |                 ("replace", 1, 2, 1, 2),
301 |                 ("equal", 2, 4, 2, 4),
302 |                 ("delete", 4, 5, 4, 4),
303 |                 ("equal", 5, 6, 4, 5),
304 |                 ("delete", 6, 7, 5, 5),
305 |                 ("equal", 7, 9, 5, 7),
306 |                 ("replace", 9, 10, 7, 8),
307 |                 ("equal", 10, 11, 8, 9),
308 |             ],
309 |         )
310 | 
311 |     def test_grouped_opcodes(self):
312 |         def chk_ops(a, b, expected_codes, n=3):
313 |             s = self._PatienceSequenceMatcher(None, a, b)
314 |             self.assertEqual(expected_codes, list(s.get_grouped_opcodes(n)))
315 | 
316 |         chk_ops("", "", [])
317 |         chk_ops([], [], [])
318 |         chk_ops("abc", "", [[("delete", 0, 3, 0, 0)]])
319 |         chk_ops("", "abc", [[("insert", 0, 0, 0, 3)]])
320 |         chk_ops("abcd", "abcd", [])
321 |         chk_ops(
322 |             "abcd", "abce", [[("equal", 0, 3, 0, 3), ("replace", 3, 4, 3, 4)]]
323 |         )
324 |         chk_ops(
325 |             "eabc",
326 |             "abce",
327 |             [
328 |                 [
329 |                     ("delete", 0, 1, 0, 0),
330 |                     ("equal", 1, 4, 0, 3),
331 |                     ("insert", 4, 4, 3, 4),
332 |                 ]
333 |             ],
334 |         )
335 |         chk_ops(
336 |             "abcdefghijklmnop",
337 |             "abcdefxydefghijklmnop",
338 |             [
339 |                 [
340 |                     ("equal", 3, 6, 3, 6),
341 |                     ("insert", 6, 6, 6, 11),
342 |                     ("equal", 6, 9, 11, 14),
343 |                 ]
344 |             ],
345 |         )
346 |         chk_ops(
347 |             "abcdefghijklmnop",
348 |             "abcdefxydefghijklmnop",
349 |             [
350 |                 [
351 |                     ("equal", 2, 6, 2, 6),
352 |                     ("insert", 6, 6, 6, 11),
353 |                     ("equal", 6, 10, 11, 15),
354 |                 ]
355 |             ],
356 |             4,
357 |         )
358 |         chk_ops(
359 |             "Xabcdef",
360 |             "abcdef",
361 |             [[("delete", 0, 1, 0, 0), ("equal", 1, 4, 0, 3)]],
362 |         )
363 |         chk_ops(
364 |             "abcdef",
365 |             "abcdefX",
366 |             [[("equal", 3, 6, 3, 6), ("insert", 6, 6, 6, 7)]],
367 |         )
368 | 
369 |     def test_multiple_ranges(self):
370 |         # There was an earlier bug where we used a bad set of ranges,
371 |         # this triggers that specific bug, to make sure it doesn't regress
372 |         self.assertDiffBlocks(
373 |             "abcdefghijklmnop",
374 |             "abcXghiYZQRSTUVWXYZijklmnop",
375 |             [(0, 0, 3), (6, 4, 3), (9, 20, 7)],
376 |         )
377 | 
378 |         self.assertDiffBlocks(
379 |             "ABCd efghIjk  L",
380 |             "AxyzBCn mo pqrstuvwI1 2  L",
381 |             [(0, 0, 1), (1, 4, 2), (9, 19, 1), (12, 23, 3)],
382 |         )
383 | 
384 |         # These are rot13 code snippets.
385 |         self.assertDiffBlocks(
386 |             '''\
387 |     trg nqqrq jura lbh nqq n svyr va gur qverpgbel.
388 |     """
389 |     gnxrf_netf = ['svyr*']
390 |     gnxrf_bcgvbaf = ['ab-erphefr']
391 | 
392 |     qrs eha(frys, svyr_yvfg, ab_erphefr=Snyfr):
393 |         sebz omeyvo.nqq vzcbeg fzneg_nqq, nqq_ercbegre_cevag, nqq_ercbegre_ahyy
394 |         vs vf_dhvrg():
395 |             ercbegre = nqq_ercbegre_ahyy
396 |         ryfr:
397 |             ercbegre = nqq_ercbegre_cevag
398 |         fzneg_nqq(svyr_yvfg, abg ab_erphefr, ercbegre)
399 | 
400 | 
401 | pynff pzq_zxqve(Pbzznaq):
402 | '''.splitlines(True),
403 |             '''\
404 |     trg nqqrq jura lbh nqq n svyr va gur qverpgbel.
405 | 
406 |     --qel-eha jvyy fubj juvpu svyrf jbhyq or nqqrq, ohg abg npghnyyl
407 |     nqq gurz.
408 |     """
409 |     gnxrf_netf = ['svyr*']
410 |     gnxrf_bcgvbaf = ['ab-erphefr', 'qel-eha']
411 | 
412 |     qrs eha(frys, svyr_yvfg, ab_erphefr=Snyfr, qel_eha=Snyfr):
413 |         vzcbeg omeyvo.nqq
414 | 
415 |         vs qel_eha:
416 |             vs vf_dhvrg():
417 |                 # Guvf vf cbvagyrff, ohg V'q engure abg envfr na reebe
418 |                 npgvba = omeyvo.nqq.nqq_npgvba_ahyy
419 |             ryfr:
420 |   npgvba = omeyvo.nqq.nqq_npgvba_cevag
421 |         ryvs vf_dhvrg():
422 |             npgvba = omeyvo.nqq.nqq_npgvba_nqq
423 |         ryfr:
424 |        npgvba = omeyvo.nqq.nqq_npgvba_nqq_naq_cevag
425 | 
426 |         omeyvo.nqq.fzneg_nqq(svyr_yvfg, abg ab_erphefr, npgvba)
427 | 
428 | 
429 | pynff pzq_zxqve(Pbzznaq):
430 | '''.splitlines(True),
431 |             [(0, 0, 1), (1, 4, 2), (9, 19, 1), (12, 23, 3)],
432 |         )
433 | 
434 |     def test_patience_unified_diff(self):
435 |         txt_a = ["hello there\n", "world\n", "how are you today?\n"]
436 |         txt_b = ["hello there\n", "how are you today?\n"]
437 |         unified_diff = patiencediff.unified_diff
438 |         psm = self._PatienceSequenceMatcher
439 |         self.assertEqual(
440 |             [
441 |                 "--- \n",
442 |                 "+++ \n",
443 |                 "@@ -1,3 +1,2 @@\n",
444 |                 " hello there\n",
445 |                 "-world\n",
446 |                 " how are you today?\n",
447 |             ],
448 |             list(unified_diff(txt_a, txt_b, sequencematcher=psm)),
449 |         )
450 |         txt_a = [x + "\n" for x in "abcdefghijklmnop"]
451 |         txt_b = [x + "\n" for x in "abcdefxydefghijklmnop"]
452 |         # This is the result with LongestCommonSubstring matching
453 |         self.assertEqual(
454 |             [
455 |                 "--- \n",
456 |                 "+++ \n",
457 |                 "@@ -1,6 +1,11 @@\n",
458 |                 " a\n",
459 |                 " b\n",
460 |                 " c\n",
461 |                 "+d\n",
462 |                 "+e\n",
463 |                 "+f\n",
464 |                 "+x\n",
465 |                 "+y\n",
466 |                 " d\n",
467 |                 " e\n",
468 |                 " f\n",
469 |             ],
470 |             list(unified_diff(txt_a, txt_b)),
471 |         )
472 |         # And the patience diff
473 |         self.assertEqual(
474 |             [
475 |                 "--- \n",
476 |                 "+++ \n",
477 |                 "@@ -4,6 +4,11 @@\n",
478 |                 " d\n",
479 |                 " e\n",
480 |                 " f\n",
481 |                 "+x\n",
482 |                 "+y\n",
483 |                 "+d\n",
484 |                 "+e\n",
485 |                 "+f\n",
486 |                 " g\n",
487 |                 " h\n",
488 |                 " i\n",
489 |             ],
490 |             list(unified_diff(txt_a, txt_b, sequencematcher=psm)),
491 |         )
492 | 
493 |     def test_patience_unified_diff_with_dates(self):
494 |         txt_a = ["hello there\n", "world\n", "how are you today?\n"]
495 |         txt_b = ["hello there\n", "how are you today?\n"]
496 |         unified_diff = patiencediff.unified_diff
497 |         psm = self._PatienceSequenceMatcher
498 |         self.assertEqual(
499 |             [
500 |                 "--- a\t2008-08-08\n",
501 |                 "+++ b\t2008-09-09\n",
502 |                 "@@ -1,3 +1,2 @@\n",
503 |                 " hello there\n",
504 |                 "-world\n",
505 |                 " how are you today?\n",
506 |             ],
507 |             list(
508 |                 unified_diff(
509 |                     txt_a,
510 |                     txt_b,
511 |                     fromfile="a",
512 |                     tofile="b",
513 |                     fromfiledate="2008-08-08",
514 |                     tofiledate="2008-09-09",
515 |                     sequencematcher=psm,
516 |                 )
517 |             ),
518 |         )
519 | 
520 | 
521 | class TestPatienceDiffLibFiles(unittest.TestCase):
522 |     def setUp(self):
523 |         super().setUp()
524 |         self._PatienceSequenceMatcher = (
525 |             _patiencediff_py.PatienceSequenceMatcher_py
526 |         )
527 |         self.test_dir = tempfile.mkdtemp()
528 |         self.addCleanup(lambda: shutil.rmtree(self.test_dir))
529 | 
530 |     def test_patience_unified_diff_files(self):
531 |         txt_a = [b"hello there\n", b"world\n", b"how are you today?\n"]
532 |         txt_b = [b"hello there\n", b"how are you today?\n"]
533 |         with open(os.path.join(self.test_dir, "a1"), "wb") as f:
534 |             f.writelines(txt_a)
535 |         with open(os.path.join(self.test_dir, "b1"), "wb") as f:
536 |             f.writelines(txt_b)
537 | 
538 |         unified_diff_files = patiencediff.unified_diff_files
539 |         psm = self._PatienceSequenceMatcher
540 | 
541 |         old_pwd = os.getcwd()
542 |         os.chdir(self.test_dir)
543 |         try:
544 |             self.assertEqual(
545 |                 [
546 |                     "--- a1\n",
547 |                     "+++ b1\n",
548 |                     "@@ -1,3 +1,2 @@\n",
549 |                     " hello there\n",
550 |                     "-world\n",
551 |                     " how are you today?\n",
552 |                 ],
553 |                 list(unified_diff_files("a1", "b1", sequencematcher=psm)),
554 |             )
555 |         finally:
556 |             os.chdir(old_pwd)
557 | 
558 |         txt_a = [x + "\n" for x in "abcdefghijklmnop"]
559 |         txt_b = [x + "\n" for x in "abcdefxydefghijklmnop"]
560 |         with open(os.path.join(self.test_dir, "a2"), "w") as f:
561 |             f.writelines(txt_a)
562 |         with open(os.path.join(self.test_dir, "b2"), "w") as f:
563 |             f.writelines(txt_b)
564 | 
565 |         # This is the result with LongestCommonSubstring matching
566 |         os.chdir(self.test_dir)
567 |         try:
568 |             self.assertEqual(
569 |                 [
570 |                     "--- a2\n",
571 |                     "+++ b2\n",
572 |                     "@@ -1,6 +1,11 @@\n",
573 |                     " a\n",
574 |                     " b\n",
575 |                     " c\n",
576 |                     "+d\n",
577 |                     "+e\n",
578 |                     "+f\n",
579 |                     "+x\n",
580 |                     "+y\n",
581 |                     " d\n",
582 |                     " e\n",
583 |                     " f\n",
584 |                 ],
585 |                 list(unified_diff_files("a2", "b2")),
586 |             )
587 | 
588 |             # And the patience diff
589 |             self.assertEqual(
590 |                 [
591 |                     "--- a2\n",
592 |                     "+++ b2\n",
593 |                     "@@ -4,6 +4,11 @@\n",
594 |                     " d\n",
595 |                     " e\n",
596 |                     " f\n",
597 |                     "+x\n",
598 |                     "+y\n",
599 |                     "+d\n",
600 |                     "+e\n",
601 |                     "+f\n",
602 |                     " g\n",
603 |                     " h\n",
604 |                     " i\n",
605 |                 ],
606 |                 list(unified_diff_files("a2", "b2", sequencematcher=psm)),
607 |             )
608 |         finally:
609 |             os.chdir(old_pwd)
610 | 
611 | 
612 | class TestPatienceDiffLib_rs(TestPatienceDiffLib):
613 |     """Test class for the Rust implementation using PyO3 bindings."""
614 | 
615 |     def setUp(self):
616 |         super(TestPatienceDiffLib, self).setUp()
617 |         try:
618 |             from . import _patiencediff_rs
619 |         except ImportError:
620 |             self.skipTest("Rust extension not built")
621 |         self._unique_lcs = _patiencediff_rs.unique_lcs_rs
622 |         self._recurse_matches = _patiencediff_rs.recurse_matches_rs
623 |         self._PatienceSequenceMatcher = (
624 |             _patiencediff_rs.PatienceSequenceMatcher_rs
625 |         )
626 | 
627 |     def test_unhashable(self):
628 |         """We should get a proper exception here."""
629 |         # We need to be able to hash items in the sequence, lists are
630 |         # unhashable, and thus cannot be diffed
631 |         self.assertRaises(
632 |             TypeError, self._PatienceSequenceMatcher, None, [[]], []
633 |         )
634 |         self.assertRaises(
635 |             TypeError, self._PatienceSequenceMatcher, None, ["valid", []], []
636 |         )
637 |         self.assertRaises(
638 |             TypeError, self._PatienceSequenceMatcher, None, ["valid"], [[]]
639 |         )
640 |         self.assertRaises(
641 |             TypeError,
642 |             self._PatienceSequenceMatcher,
643 |             None,
644 |             ["valid"],
645 |             ["valid", []],
646 |         )
647 | 
648 | 
649 | class TestPatienceDiffLibFiles_rs(TestPatienceDiffLibFiles):
650 |     """Test class for file operations with the Rust implementation."""
651 | 
652 |     def setUp(self):
653 |         super().setUp()
654 |         try:
655 |             from . import _patiencediff_rs
656 |         except ImportError:
657 |             self.skipTest("Rust extension not built")
658 |         self._PatienceSequenceMatcher = (
659 |             _patiencediff_rs.PatienceSequenceMatcher_rs
660 |         )
661 | 
662 | 
663 | class TestUsingCompiledIfAvailable(unittest.TestCase):
664 |     def test_PatienceSequenceMatcher(self):
665 |         try:
666 |             from ._patiencediff_rs import PatienceSequenceMatcher_rs
667 | 
668 |             self.assertIs(
669 |                 PatienceSequenceMatcher_rs,
670 |                 patiencediff.PatienceSequenceMatcher,
671 |             )
672 |         except ImportError:
673 |             from ._patiencediff_py import PatienceSequenceMatcher_py
674 | 
675 |             self.assertIs(
676 |                 PatienceSequenceMatcher_py,
677 |                 patiencediff.PatienceSequenceMatcher,
678 |             )
679 | 
680 |     def test_unique_lcs(self):
681 |         try:
682 |             from ._patiencediff_rs import unique_lcs_rs
683 | 
684 |             self.assertIs(unique_lcs_rs, patiencediff.unique_lcs)
685 |         except ImportError:
686 |             from ._patiencediff_py import unique_lcs_py
687 | 
688 |             self.assertIs(unique_lcs_py, patiencediff.unique_lcs)
689 | 
690 |     def test_recurse_matches(self):
691 |         try:
692 |             from ._patiencediff_rs import recurse_matches_rs
693 | 
694 |             self.assertIs(recurse_matches_rs, patiencediff.recurse_matches)
695 |         except ImportError:
696 |             from ._patiencediff_py import recurse_matches_py
697 | 
698 |             self.assertIs(recurse_matches_py, patiencediff.recurse_matches)
699 | 
700 |     def test_run_implementation(self):
701 |         """Test that we can run the implementation that was loaded."""
702 |         # Simple test with some basic strings
703 |         a = "abcde"
704 |         b = "abXde"
705 | 
706 |         # Create a matcher and get blocks
707 |         matcher = patiencediff.PatienceSequenceMatcher(None, a, b)
708 |         blocks = matcher.get_matching_blocks()
709 | 
710 |         # Validate results - we should get two blocks plus sentinel
711 |         self.assertEqual(3, len(blocks))
712 |         self.assertEqual((0, 0, 2), blocks[0])  # "ab" match
713 |         self.assertEqual((3, 3, 2), blocks[1])  # "de" match
714 |         self.assertEqual((5, 5, 0), blocks[2])  # sentinel
715 | 
716 |         # Test that unique_lcs works
717 |         matches = patiencediff.unique_lcs(a, b)
718 |         self.assertEqual([(0, 0), (1, 1), (3, 3), (4, 4)], matches)
719 | 
720 | 
721 | if __name__ == "__main__":
722 |     # Check which implementation is loaded
723 |     import importlib.util
724 | 
725 |     if importlib.util.find_spec("patiencediff._patiencediff_rs") is not None:
726 |         print("Rust extension is loaded successfully!")
727 |     else:
728 |         print("Rust extension is not available, using Python implementation")
729 | 
730 |     # Run the tests
731 |     unittest.main()
732 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.2", "setuptools-rust>=1.5.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "patiencediff"
 7 | description = "Python implementation of the patiencediff algorithm"
 8 | readme = "README.rst"
 9 | maintainers = [{name = "Breezy Developers", email = "team@breezy-vcs.org"}]
10 | license = {text = "GNU GPLv2 or later"}
11 | classifiers = [
12 |     "Development Status :: 6 - Mature",
13 |     "License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)",
14 |     "Programming Language :: Python :: 3.9",
15 |     "Programming Language :: Python :: 3.10",
16 |     "Programming Language :: Python :: 3.11",
17 |     "Programming Language :: Python :: 3.12",
18 |     "Programming Language :: Python :: 3.13",
19 |     "Programming Language :: Python :: Implementation :: CPython",
20 |     "Programming Language :: Python :: Implementation :: PyPy",
21 |     "Operating System :: POSIX",
22 | ]
23 | requires-python = ">=3.9"
24 | dynamic = ["version"]
25 | dependencies = []
26 | 
27 | [project.urls]
28 | Homepage = "https://www.breezy-vcs.org/"
29 | Repository = "https://github.com/breezy-team/patiencediff"
30 | 
31 | [project.scripts]
32 | patiencediff = "patiencediff.__main__:main"
33 | 
34 | [tool.setuptools]
35 | packages = ["patiencediff"]
36 | include-package-data = false
37 | 
38 | [tool.setuptools.package-data]
39 | patiencediff = ["py.typed"]
40 | 
41 | [tool.setuptools.dynamic]
42 | version = {attr = "patiencediff.__version__"}
43 | 
44 | [tool.ruff.lint]
45 | select = [
46 |     "ANN",
47 |     "D",
48 |     "E",
49 |     "F",
50 |     "I",
51 |     "UP",
52 | ]
53 | ignore = [
54 |     "ANN001",
55 |     "ANN201",
56 |     "ANN202",
57 |     "D100",
58 |     "D101",
59 |     "D102",
60 |     "D103",
61 |     "D104",
62 |     "E501",
63 | ]
64 | 
65 | [tool.ruff]
66 | target-version = "py38"
67 | line-length = 79
68 | 
69 | [tool.ruff.lint.pydocstyle]
70 | convention = "google"
71 | 
72 | [project.optional-dependencies]
73 | dev = [
74 |     "ruff==0.11.11"
75 | ]
76 | 
77 | [tool.cibuildwheel]
78 | environment = {PATH="$HOME/.cargo/bin:$PATH"}
79 | before-build = "pip install -U setuptools-rust && curl https://sh.rustup.rs -sSf | sh -s -- --profile=minimal -y && rustup show"
80 | 
81 | [tool.cibuildwheel.linux]
82 | skip = "*-musllinux_*"
83 | archs = ["auto", "aarch64"]
84 | before-build = "pip install -U setuptools-rust && yum -y install libatomic && curl https://sh.rustup.rs -sSf | sh -s -- --profile=minimal -y && rustup show"
85 | 
86 | [tool.cibuildwheel.macos]
87 | archs = ["auto", "universal2", "x86_64", "arm64"]
88 | before-all = "rustup target add x86_64-apple-darwin aarch64-apple-darwin"
89 | skip = """\
90 |     cp39-macosx_x86_64 cp39-macosx_universal2 \
91 |     cp310-macosx_x86_64 cp310-macosx_universal2 \
92 |     cp311-macosx_x86_64 cp311-macosx_universal2 \
93 |     cp312-macosx_x86_64 cp312-macosx_universal2 \
94 |     cp313-macosx_x86_64 cp313-macosx_universal2 \
95 |     """
96 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | 
 5 | from setuptools import setup
 6 | from setuptools_rust import Binding, RustExtension
 7 | 
 8 | # Rust extension
 9 | rust_extensions = [
10 |     RustExtension(
11 |         "patiencediff._patiencediff_rs",
12 |         "Cargo.toml",
13 |         binding=Binding.PyO3,
14 |         optional=os.environ.get("CIBUILDWHEEL", "0") != "1",
15 |     )
16 | ]
17 | 
18 | setup(
19 |     rust_extensions=rust_extensions,
20 | )
21 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | use pyo3::prelude::*;
  2 | use pyo3::types::{PyList, PySequence, PyTuple};
  3 | 
  4 | /// Find the longest common subsequence of unique elements in sequences a and b.
  5 | ///
  6 | /// Returns a list of (i, j) tuples where a[i] == b[j].
  7 | /// This implementation uses the patience sorting algorithm.
  8 | #[pyfunction]
  9 | fn unique_lcs_rs<'py>(
 10 |     py: Python<'py>,
 11 |     a: Bound<'py, PyAny>,
 12 |     b: Bound<'py, PyAny>,
 13 | ) -> PyResult<Bound<'py, PyList>> {
 14 |     // Convert Python sequences to vectors of PyItem for patiencediff crate
 15 |     let a_seq = a.clone();
 16 |     let b_seq = b.clone();
 17 | 
 18 |     let a_len = a_seq.len()?;
 19 |     let b_len = b_seq.len()?;
 20 | 
 21 |     // Create PyItem sequences
 22 |     let mut a_items = Vec::with_capacity(a_len);
 23 |     let mut b_items = Vec::with_capacity(b_len);
 24 | 
 25 |     // Extract items from sequences
 26 |     for i in 0..a_len {
 27 |         let item = a_seq.get_item(i)?;
 28 |         a_items.push(PyItem(item.into()));
 29 |     }
 30 | 
 31 |     for i in 0..b_len {
 32 |         let item = b_seq.get_item(i)?;
 33 |         b_items.push(PyItem(item.into()));
 34 |     }
 35 | 
 36 |     // Use patiencediff crate's unique_lcs function
 37 |     let matches = patiencediff::unique_lcs(&a_items, &b_items);
 38 | 
 39 |     // Create result list
 40 |     let result = PyList::empty(py);
 41 | 
 42 |     // Add matches to the result list
 43 |     for &(a_pos, b_pos) in &matches {
 44 |         let tuple = PyTuple::new(py, &[a_pos, b_pos])?;
 45 |         result.append(tuple)?;
 46 |     }
 47 | 
 48 |     Ok(result)
 49 | }
 50 | 
 51 | /// Python item wrapper that implements the necessary traits for patiencediff crate
 52 | struct PyItem(PyObject);
 53 | 
 54 | // Implement Clone for PyItem using clone_ref() for PyObject
 55 | impl Clone for PyItem {
 56 |     fn clone(&self) -> Self {
 57 |         Python::with_gil(|py| PyItem(self.0.clone_ref(py)))
 58 |     }
 59 | }
 60 | 
 61 | // Define equality for PyItem that uses Python's eq
 62 | impl PartialEq for PyItem {
 63 |     fn eq(&self, other: &Self) -> bool {
 64 |         Python::with_gil(|py| {
 65 |             let a = self.0.extract::<Bound<PyAny>>(py).unwrap();
 66 |             let b = other.0.extract::<Bound<PyAny>>(py).unwrap();
 67 |             a.eq(&b).unwrap_or(false)
 68 |         })
 69 |     }
 70 | }
 71 | 
 72 | impl Eq for PyItem {}
 73 | 
 74 | // Define hashing for PyItem that uses Python's hash
 75 | impl std::hash::Hash for PyItem {
 76 |     fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
 77 |         let hash_value = Python::with_gil(|py| {
 78 |             let obj = self.0.extract::<Bound<PyAny>>(py).unwrap();
 79 |             match obj.hash() {
 80 |                 Ok(hash) => hash,
 81 |                 Err(e) => {
 82 |                     // Properly propagate a TypeError without panicking
 83 |                     if e.is_instance_of::<pyo3::exceptions::PyTypeError>(py) {
 84 |                         return 0; // Use a constant hash for unhashable types
 85 |                     }
 86 |                     // For any other errors, use a different constant
 87 |                     return 1;
 88 |                 }
 89 |             }
 90 |         });
 91 |         state.write_isize(hash_value);
 92 |     }
 93 | }
 94 | 
 95 | /// Recursively find matches between two sequences.
 96 | ///
 97 | /// This function wraps the patiencediff crate's recurse_matches function.
 98 | #[pyfunction]
 99 | fn recurse_matches_rs<'py>(
100 |     py: Python<'py>,
101 |     a: Bound<'py, PyAny>,
102 |     b: Bound<'py, PyAny>,
103 |     alo: usize,
104 |     blo: usize,
105 |     ahi: usize,
106 |     bhi: usize,
107 |     answer: Bound<'py, PyList>,
108 |     maxrecursion: i32,
109 | ) -> PyResult<()> {
110 |     // Early return for base cases
111 |     if maxrecursion < 0 || alo == ahi || blo == bhi {
112 |         return Ok(());
113 |     }
114 | 
115 |     // Convert Python sequences to vectors of PyItem for patiencediff crate
116 |     let a_seq = a.clone();
117 |     let b_seq = b.clone();
118 | 
119 |     // Create vectors of PyItems for the sliced sequences
120 |     let mut a_items = Vec::with_capacity(ahi - alo);
121 |     let mut b_items = Vec::with_capacity(bhi - blo);
122 | 
123 |     // Extract the items we need from the sequences
124 |     for i in alo..ahi {
125 |         let item = a_seq.get_item(i)?;
126 |         a_items.push(PyItem(item.into()));
127 |     }
128 | 
129 |     for i in blo..bhi {
130 |         let item = b_seq.get_item(i)?;
131 |         b_items.push(PyItem(item.into()));
132 |     }
133 | 
134 |     // Create a vector to collect the matches
135 |     let mut matches = Vec::new();
136 | 
137 |     // Call the patiencediff crate's recurse_matches function
138 |     patiencediff::recurse_matches(
139 |         &a_items,
140 |         &b_items,
141 |         0,
142 |         0,
143 |         a_items.len(),
144 |         b_items.len(),
145 |         &mut matches,
146 |         maxrecursion,
147 |     );
148 | 
149 |     // Convert the results to Python and add to the answer list
150 |     for &(rel_a, rel_b) in &matches {
151 |         let a_pos = rel_a + alo;
152 |         let b_pos = rel_b + blo;
153 | 
154 |         let tuple = PyTuple::new(py, &[a_pos, b_pos])?;
155 |         answer.append(tuple)?;
156 |     }
157 | 
158 |     Ok(())
159 | }
160 | 
161 | /// The PatienceSequenceMatcher class
162 | #[pyclass(name = "PatienceSequenceMatcher_rs")]
163 | struct PatienceSequenceMatcherRs {
164 |     matcher: patiencediff::SequenceMatcher<PyItem>,
165 | }
166 | 
167 | #[pymethods]
168 | impl PatienceSequenceMatcherRs {
169 |     #[new]
170 |     fn new(py: Python<'_>, _junk: Option<PyObject>, a: PyObject, b: PyObject) -> PyResult<Self> {
171 |         // Extract sequences
172 |         let a_any = a.extract::<Bound<PyAny>>(py)?;
173 |         let b_any = b.extract::<Bound<PyAny>>(py)?;
174 | 
175 |         // Convert to sequences
176 |         let a_seq = a_any.downcast::<PySequence>()?;
177 |         let b_seq = b_any.downcast::<PySequence>()?;
178 | 
179 |         let a_len = a_seq.len()?;
180 |         let b_len = b_seq.len()?;
181 | 
182 |         // Create PyItem sequences
183 |         let mut a_items = Vec::with_capacity(a_len);
184 |         let mut b_items = Vec::with_capacity(b_len);
185 | 
186 |         // Check if all items are hashable before proceeding
187 |         for i in 0..a_len {
188 |             let item = a_seq.get_item(i)?;
189 |             // Try to hash the item to check if it's hashable
190 |             if let Err(e) = item.hash() {
191 |                 if e.is_instance_of::<pyo3::exceptions::PyTypeError>(py) {
192 |                     return Err(pyo3::exceptions::PyTypeError::new_err("unhashable type"));
193 |                 }
194 |                 return Err(e);
195 |             }
196 |             a_items.push(PyItem(item.into()));
197 |         }
198 | 
199 |         for i in 0..b_len {
200 |             let item = b_seq.get_item(i)?;
201 |             // Try to hash the item to check if it's hashable
202 |             if let Err(e) = item.hash() {
203 |                 if e.is_instance_of::<pyo3::exceptions::PyTypeError>(py) {
204 |                     return Err(pyo3::exceptions::PyTypeError::new_err("unhashable type"));
205 |                 }
206 |                 return Err(e);
207 |             }
208 |             b_items.push(PyItem(item.into()));
209 |         }
210 | 
211 |         // Create and return the matcher
212 |         let matcher = patiencediff::SequenceMatcher::new(&a_items, &b_items);
213 | 
214 |         Ok(Self { matcher })
215 |     }
216 | 
217 |     /// Return list of triples describing matching subsequences.
218 |     ///
219 |     /// Each triple is of the form (i, j, n), and means that
220 |     /// a[i:i+n] == b[j:j+n].  The triples are monotonically increasing in
221 |     /// i and in j.
222 |     ///
223 |     /// The last triple is a dummy, (len(a), len(b), 0), and is the only
224 |     /// triple with n==0.
225 |     fn get_matching_blocks<'py>(&mut self, py: Python<'py>) -> PyResult<Bound<'py, PyList>> {
226 |         // Get matching blocks from the matcher
227 |         let blocks = self.matcher.get_matching_blocks();
228 | 
229 |         // Convert blocks to Python list
230 |         let result = PyList::empty(py);
231 | 
232 |         for &(a, b, n) in blocks {
233 |             let tuple = PyTuple::new(py, &[a, b, n])?;
234 |             result.append(tuple)?;
235 |         }
236 | 
237 |         Ok(result)
238 |     }
239 | 
240 |     /// Return list of 5-tuples describing how to turn a into b.
241 |     ///
242 |     /// Each tuple is of the form (tag, i1, i2, j1, j2).  The first tuple
243 |     /// has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the
244 |     /// tuple preceding it, and likewise for j1 == the previous j2.
245 |     ///
246 |     /// The tags are strings, with these meanings:
247 |     ///
248 |     /// 'replace':  a[i1:i2] should be replaced by b[j1:j2]
249 |     /// 'delete':   a[i1:i2] should be deleted.
250 |     ///                Note that j1==j2 in this case.
251 |     /// 'insert':   b[j1:j2] should be inserted at a[i1:i1].
252 |     ///                Note that i1==i2 in this case.
253 |     /// 'equal':    a[i1:i2] == b[j1:j2]
254 |     fn get_opcodes<'py>(&mut self, py: Python<'py>) -> PyResult<Bound<'py, PyList>> {
255 |         // Get opcodes directly from the matcher
256 |         let opcodes = self.matcher.get_opcodes();
257 | 
258 |         // Convert opcodes to Python list
259 |         let result = PyList::empty(py);
260 | 
261 |         for opcode in opcodes {
262 |             match opcode {
263 |                 patiencediff::Opcode::Equal(i1, i2, j1, j2) => {
264 |                     let tuple = PyTuple::new(
265 |                         py,
266 |                         &[
267 |                             "equal".into_py(py),
268 |                             i1.into_py(py),
269 |                             i2.into_py(py),
270 |                             j1.into_py(py),
271 |                             j2.into_py(py),
272 |                         ],
273 |                     )?;
274 |                     result.append(tuple)?;
275 |                 }
276 |                 patiencediff::Opcode::Replace(i1, i2, j1, j2) => {
277 |                     let tuple = PyTuple::new(
278 |                         py,
279 |                         &[
280 |                             "replace".into_py(py),
281 |                             i1.into_py(py),
282 |                             i2.into_py(py),
283 |                             j1.into_py(py),
284 |                             j2.into_py(py),
285 |                         ],
286 |                     )?;
287 |                     result.append(tuple)?;
288 |                 }
289 |                 patiencediff::Opcode::Delete(i1, i2, j1, j2) => {
290 |                     let tuple = PyTuple::new(
291 |                         py,
292 |                         &[
293 |                             "delete".into_py(py),
294 |                             i1.into_py(py),
295 |                             i2.into_py(py),
296 |                             j1.into_py(py),
297 |                             j2.into_py(py),
298 |                         ],
299 |                     )?;
300 |                     result.append(tuple)?;
301 |                 }
302 |                 patiencediff::Opcode::Insert(i1, i2, j1, j2) => {
303 |                     let tuple = PyTuple::new(
304 |                         py,
305 |                         &[
306 |                             "insert".into_py(py),
307 |                             i1.into_py(py),
308 |                             i2.into_py(py),
309 |                             j1.into_py(py),
310 |                             j2.into_py(py),
311 |                         ],
312 |                     )?;
313 |                     result.append(tuple)?;
314 |                 }
315 |             }
316 |         }
317 | 
318 |         Ok(result)
319 |     }
320 | 
321 |     /// Return a list of groups with upto n lines of context.
322 |     ///
323 |     /// Each group is in the same format as returned by get_opcodes().
324 |     fn get_grouped_opcodes<'py>(
325 |         &mut self,
326 |         py: Python<'py>,
327 |         n: Option<usize>,
328 |     ) -> PyResult<Bound<'py, PyList>> {
329 |         let n = n.unwrap_or(3);
330 | 
331 |         // Get grouped opcodes directly from the matcher
332 |         let grouped_opcodes = self.matcher.get_grouped_opcodes(n);
333 | 
334 |         // Convert to Python list
335 |         let result = PyList::empty(py);
336 | 
337 |         for group in grouped_opcodes {
338 |             let group_list = PyList::empty(py);
339 | 
340 |             for opcode in group {
341 |                 let (tag, i1, i2, j1, j2) = match opcode {
342 |                     patiencediff::Opcode::Equal(i1, i2, j1, j2) => ("equal", i1, i2, j1, j2),
343 |                     patiencediff::Opcode::Replace(i1, i2, j1, j2) => ("replace", i1, i2, j1, j2),
344 |                     patiencediff::Opcode::Delete(i1, i2, j1, j2) => ("delete", i1, i2, j1, j2),
345 |                     patiencediff::Opcode::Insert(i1, i2, j1, j2) => ("insert", i1, i2, j1, j2),
346 |                 };
347 | 
348 |                 let tuple = PyTuple::new(
349 |                     py,
350 |                     &[
351 |                         tag.into_py(py),
352 |                         i1.into_py(py),
353 |                         i2.into_py(py),
354 |                         j1.into_py(py),
355 |                         j2.into_py(py),
356 |                     ],
357 |                 )?;
358 | 
359 |                 group_list.append(tuple)?;
360 |             }
361 | 
362 |             if group_list.len() > 0 {
363 |                 result.append(group_list)?;
364 |             }
365 |         }
366 | 
367 |         // Note: We're not adding a default group for empty result anymore
368 |         Ok(result)
369 |     }
370 | }
371 | 
372 | #[pymodule]
373 | fn _patiencediff_rs(m: &Bound<'_, PyModule>) -> PyResult<()> {
374 |     m.add_class::<PatienceSequenceMatcherRs>()?;
375 |     m.add_function(wrap_pyfunction!(unique_lcs_rs, m)?)?;
376 |     m.add_function(wrap_pyfunction!(recurse_matches_rs, m)?)?;
377 |     Ok(())
378 | }
379 | 


--------------------------------------------------------------------------------