├── .bzrignore ├── .codespellrc ├── .github ├── CODEOWNERS ├── dependabot.yml └── workflows │ ├── auto-merge.yaml │ ├── auto-merge.yml │ ├── disperse.yml │ ├── pythonpackage.yml │ └── wheels.yaml ├── .gitignore ├── AUTHORS ├── CLAUDE.md ├── CODE_OF_CONDUCT.md ├── COPYING ├── Cargo.lock ├── Cargo.toml ├── MANIFEST.in ├── README.rst ├── build.cmd ├── disperse.toml ├── patiencediff ├── __init__.py ├── __main__.py ├── _patiencediff_c.c ├── _patiencediff_c.pyi ├── _patiencediff_py.py ├── _patiencediff_rs.pyi ├── py.typed └── test_patiencediff.py ├── pyproject.toml ├── setup.py └── src └── lib.rs /.bzrignore: -------------------------------------------------------------------------------- 1 | build 2 | patiencediff.egg-info 3 | dist/ 4 | *~ 5 | -------------------------------------------------------------------------------- /.codespellrc: -------------------------------------------------------------------------------- 1 | [codespell] 2 | ignore-words-list = alo 3 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @jelmer 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Keep GitHub Actions up to date with GitHub's Dependabot... 2 | # https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot 3 | # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem 4 | version: 2 5 | updates: 6 | - package-ecosystem: "github-actions" 7 | directory: "/" 8 | schedule: 9 | interval: weekly 10 | - package-ecosystem: "pip" 11 | directory: "/" 12 | schedule: 13 | interval: weekly 14 | -------------------------------------------------------------------------------- /.github/workflows/auto-merge.yaml: -------------------------------------------------------------------------------- 1 | name: Dependabot auto-merge 2 | on: pull_request_target 3 | 4 | permissions: 5 | pull-requests: write 6 | contents: write 7 | 8 | jobs: 9 | dependabot: 10 | runs-on: ubuntu-latest 11 | if: ${{ github.actor == 'dependabot[bot]' }} 12 | steps: 13 | - name: Dependabot metadata 14 | id: metadata 15 | uses: dependabot/fetch-metadata@v2 16 | with: 17 | github-token: "${{ secrets.GITHUB_TOKEN }}" 18 | - name: Enable auto-merge for Dependabot PRs 19 | run: gh pr merge --auto --squash "$PR_URL" 20 | env: 21 | PR_URL: ${{github.event.pull_request.html_url}} 22 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 23 | -------------------------------------------------------------------------------- /.github/workflows/auto-merge.yml: -------------------------------------------------------------------------------- 1 | name: Dependabot auto-merge 2 | on: pull_request_target 3 | 4 | permissions: 5 | pull-requests: write 6 | contents: write 7 | 8 | jobs: 9 | dependabot: 10 | runs-on: ubuntu-latest 11 | if: ${{ github.actor == 'dependabot[bot]' }} 12 | steps: 13 | - name: Dependabot metadata 14 | id: metadata 15 | uses: dependabot/fetch-metadata@v2 16 | with: 17 | github-token: "${{ secrets.GITHUB_TOKEN }}" 18 | - name: Enable auto-merge for Dependabot PRs 19 | run: gh pr merge --auto --squash "$PR_URL" 20 | env: 21 | PR_URL: ${{github.event.pull_request.html_url}} 22 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 23 | -------------------------------------------------------------------------------- /.github/workflows/disperse.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Disperse configuration 3 | 4 | "on": 5 | - push 6 | 7 | jobs: 8 | build: 9 | 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: jelmer/action-disperse-validate@v2 15 | -------------------------------------------------------------------------------- /.github/workflows/pythonpackage.yml: -------------------------------------------------------------------------------- 1 | name: Python package 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ${{ matrix.os }} 9 | strategy: 10 | matrix: 11 | os: [ubuntu-latest, macos-latest, windows-latest] 12 | python-version: 13 | - '3.13' 14 | - '3.12' 15 | - '3.11' 16 | - '3.10' 17 | - '3.9' 18 | fail-fast: false 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v5 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip mypy setuptools setuptools-rust 29 | pip install -U pip ".[dev]" 30 | - name: Style checks 31 | run: | 32 | python -m ruff check . 33 | python -m ruff format --check . 34 | - name: Typing checks 35 | run: python -m mypy patiencediff 36 | - name: Build 37 | run: python setup.py build_ext -i 38 | - name: Test suite run 39 | run: python -m unittest patiencediff.test_patiencediff 40 | env: 41 | PYTHONHASHSEED: random 42 | -------------------------------------------------------------------------------- /.github/workflows/wheels.yaml: -------------------------------------------------------------------------------- 1 | name: Build Python distributions 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: "0 6 * * *" # Daily 6AM UTC build 8 | 9 | jobs: 10 | build-wheels: 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | matrix: 14 | os: [ubuntu-latest, macos-latest, windows-latest] 15 | fail-fast: true 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: actions/setup-python@v5 20 | - name: Install dependencies 21 | run: | 22 | python -m pip install --upgrade pip 23 | pip install setuptools wheel cibuildwheel 24 | - name: Set up QEMU 25 | uses: docker/setup-qemu-action@v3 26 | if: "matrix.os == 'ubuntu-latest'" 27 | - name: Build wheels 28 | run: python -m cibuildwheel --output-dir wheelhouse 29 | - name: Upload wheels 30 | uses: actions/upload-artifact@v4 31 | with: 32 | name: artifact-${{ matrix.os }} 33 | path: ./wheelhouse/*.whl 34 | 35 | build-sdist: 36 | runs-on: ubuntu-latest 37 | steps: 38 | - uses: actions/checkout@v4 39 | - uses: actions/setup-python@v5 40 | - name: Install dependencies 41 | run: | 42 | python -m pip install --upgrade pip 43 | pip install build 44 | - name: Build sdist 45 | run: python -m build --sdist 46 | - name: Upload sdist 47 | uses: actions/upload-artifact@v4 48 | with: 49 | name: artifact-source 50 | path: ./dist/*.tar.gz 51 | 52 | test-sdist: 53 | needs: 54 | - build-sdist 55 | runs-on: ubuntu-latest 56 | steps: 57 | - uses: actions/setup-python@v5 58 | - name: Install dependencies 59 | run: | 60 | python -m pip install --upgrade pip 61 | # Upgrade packging to avoid a bug in twine. 62 | # See https://github.com/pypa/twine/issues/1216 63 | pip install "twine>=6.1.0" "packaging>=24.2" 64 | - name: Download sdist 65 | uses: actions/download-artifact@v4 66 | with: 67 | name: artifact-source 68 | path: dist 69 | - name: Test sdist 70 | run: twine check dist/* 71 | - name: Test installation from sdist 72 | run: pip install dist/*.tar.gz 73 | 74 | publish: 75 | runs-on: ubuntu-latest 76 | needs: 77 | - build-wheels 78 | - build-sdist 79 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') 80 | permissions: 81 | id-token: write 82 | environment: 83 | name: pypi 84 | url: https://pypi.org/p/patiencediff 85 | steps: 86 | - name: Download distributions 87 | uses: actions/download-artifact@v4 88 | with: 89 | merge-multiple: true 90 | pattern: artifact-* 91 | path: dist 92 | - name: Publish package distributions to PyPI 93 | uses: pypa/gh-action-pypi-publish@release/v1 94 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | build 3 | __pycache__ 4 | *~ 5 | *.so 6 | *.pyc 7 | patiencediff.egg-info 8 | target/ 9 | 10 | **/.claude/settings.local.json 11 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | John Arbash Meinel 2 | Lukáš Lalinský 3 | Martin Pool 4 | Jelmer Vernooij 5 | -------------------------------------------------------------------------------- /CLAUDE.md: -------------------------------------------------------------------------------- 1 | # CLAUDE.md 2 | 3 | This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. 4 | 5 | ## Repository Overview 6 | 7 | patiencediff is a Python implementation of the "patience diff" algorithm first described by Bram Cohen. The package contains both a Python implementation and a faster C implementation of the algorithm. 8 | 9 | Similar to Python's `difflib`, this module provides: 10 | - A `unified_diff` function for generating unified diffs of text files 11 | - A `SequenceMatcher` that can be used on arbitrary lists 12 | 13 | The package was originally extracted from the Bazaar codebase and is now maintained by the Breezy team. 14 | 15 | ## Building and Installation 16 | 17 | To build the package: 18 | 19 | ```bash 20 | # Build the package (including C extension) 21 | pip3 install -e . 22 | 23 | # Build without C extension 24 | CIBUILDWHEEL=1 pip install -e . 25 | ``` 26 | 27 | ## Running Tests 28 | 29 | Tests use Python's built-in unittest framework: 30 | 31 | ```bash 32 | # Run all tests 33 | python3 -m unittest discover patiencediff 34 | 35 | # Run a specific test class 36 | python3 -m unittest patiencediff.test_patiencediff.TestPatienceDiffLib 37 | 38 | # Run a specific test method 39 | python3 -m unittest patiencediff.test_patiencediff.TestPatienceDiffLib.test_unique_lcs 40 | ``` 41 | 42 | ## Code Linting 43 | 44 | The project uses ruff for linting: 45 | 46 | ```bash 47 | # Install development dependencies (includes ruff) 48 | pip install -e ".[dev]" 49 | 50 | # Run linting 51 | ruff . 52 | ``` 53 | 54 | ## Using patiencediff 55 | 56 | To use the patiencediff module from the command line: 57 | 58 | ```bash 59 | python3 -m patiencediff file_a file_b 60 | 61 | # Use standard difflib algorithm instead of patience 62 | python3 -m patiencediff --difflib file_a file_b 63 | ``` 64 | 65 | From Python: 66 | 67 | ```python 68 | import patiencediff 69 | 70 | # Generate unified diff 71 | diff = patiencediff.unified_diff( 72 | ['a\n', 'b\n', 'c\n'], 73 | ['a\n', 'x\n', 'c\n'] 74 | ) 75 | print(''.join(diff)) 76 | 77 | # Use SequenceMatcher for custom diff operations 78 | matcher = patiencediff.PatienceSequenceMatcher(None, a_list, b_list) 79 | ``` 80 | 81 | ## Code Architecture 82 | 83 | The package consists of two implementations: 84 | 85 | 1. **Python implementation** (`_patiencediff_py.py`): Pure Python implementation of the algorithm, more readable but slower. 86 | 87 | 2. **C implementation** (`_patiencediff_c.c`): Faster implementation in C, requires a C compiler to build. 88 | 89 | The entry point (`__init__.py`) tries to load the C implementation first, and falls back to the Python implementation if the C extension isn't available. 90 | 91 | Key components: 92 | - `unique_lcs`: Finds the longest common subsequence between two sequences 93 | - `recurse_matches`: Recursively finds matches between two sequences 94 | - `PatienceSequenceMatcher`: Main implementation of the diff algorithm, similar interface to `difflib.SequenceMatcher` 95 | - `unified_diff`: Creates a unified diff from two sequences 96 | - `unified_diff_files`: Reads two files and returns a unified diff 97 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributor Covenant Code of Conduct 3 | 4 | ## Our Pledge 5 | 6 | We as members, contributors, and leaders pledge to make participation in our 7 | community a harassment-free experience for everyone, regardless of age, body 8 | size, visible or invisible disability, ethnicity, sex characteristics, gender 9 | identity and expression, level of experience, education, socioeconomic status, 10 | nationality, personal appearance, race, religion, or sexual identity 11 | and orientation. 12 | 13 | We pledge to act and interact in ways that contribute to an open, welcoming, 14 | diverse, inclusive, and healthy community. 15 | 16 | ## Our Standards 17 | 18 | Examples of behavior that contributes to a positive environment for our 19 | community include: 20 | 21 | * Demonstrating empathy and kindness toward other people 22 | * Being respectful of differing opinions, viewpoints, and experiences 23 | * Giving and gracefully accepting constructive feedback 24 | * Accepting responsibility and apologizing to those affected by our mistakes, 25 | and learning from the experience 26 | * Focusing on what is best not just for us as individuals, but for the 27 | overall community 28 | 29 | Examples of unacceptable behavior include: 30 | 31 | * The use of sexualized language or imagery, and sexual attention or 32 | advances of any kind 33 | * Trolling, insulting or derogatory comments, and personal or political attacks 34 | * Public or private harassment 35 | * Publishing others' private information, such as a physical or email 36 | address, without their explicit permission 37 | * Other conduct which could reasonably be considered inappropriate in a 38 | professional setting 39 | 40 | ## Enforcement Responsibilities 41 | 42 | Community leaders are responsible for clarifying and enforcing our standards of 43 | acceptable behavior and will take appropriate and fair corrective action in 44 | response to any behavior that they deem inappropriate, threatening, offensive, 45 | or harmful. 46 | 47 | Community leaders have the right and responsibility to remove, edit, or reject 48 | comments, commits, code, wiki edits, issues, and other contributions that are 49 | not aligned to this Code of Conduct, and will communicate reasons for moderation 50 | decisions when appropriate. 51 | 52 | ## Scope 53 | 54 | This Code of Conduct applies within all community spaces, and also applies when 55 | an individual is officially representing the community in public spaces. 56 | Examples of representing our community include using an official email address, 57 | posting via an official social media account, or acting as an appointed 58 | representative at an online or offline event. 59 | 60 | ## Enforcement 61 | 62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 63 | reported to the community leaders responsible for enforcement at 64 | [INSERT CONTACT METHOD]. 65 | All complaints will be reviewed and investigated promptly and fairly. 66 | 67 | All community leaders are obligated to respect the privacy and security of the 68 | reporter of any incident. 69 | 70 | ## Enforcement Guidelines 71 | 72 | Community leaders will follow these Community Impact Guidelines in determining 73 | the consequences for any action they deem in violation of this Code of Conduct: 74 | 75 | ### 1. Correction 76 | 77 | **Community Impact**: Use of inappropriate language or other behavior deemed 78 | unprofessional or unwelcome in the community. 79 | 80 | **Consequence**: A private, written warning from community leaders, providing 81 | clarity around the nature of the violation and an explanation of why the 82 | behavior was inappropriate. A public apology may be requested. 83 | 84 | ### 2. Warning 85 | 86 | **Community Impact**: A violation through a single incident or series 87 | of actions. 88 | 89 | **Consequence**: A warning with consequences for continued behavior. No 90 | interaction with the people involved, including unsolicited interaction with 91 | those enforcing the Code of Conduct, for a specified period of time. This 92 | includes avoiding interactions in community spaces as well as external channels 93 | like social media. Violating these terms may lead to a temporary or 94 | permanent ban. 95 | 96 | ### 3. Temporary Ban 97 | 98 | **Community Impact**: A serious violation of community standards, including 99 | sustained inappropriate behavior. 100 | 101 | **Consequence**: A temporary ban from any sort of interaction or public 102 | communication with the community for a specified period of time. No public or 103 | private interaction with the people involved, including unsolicited interaction 104 | with those enforcing the Code of Conduct, is allowed during this period. 105 | Violating these terms may lead to a permanent ban. 106 | 107 | ### 4. Permanent Ban 108 | 109 | **Community Impact**: Demonstrating a pattern of violation of community 110 | standards, including sustained inappropriate behavior, harassment of an 111 | individual, or aggression toward or disparagement of classes of individuals. 112 | 113 | **Consequence**: A permanent ban from any sort of public interaction within 114 | the community. 115 | 116 | ## Attribution 117 | 118 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 119 | version 2.0, available at 120 | [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0]. 121 | 122 | Community Impact Guidelines were inspired by 123 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 124 | 125 | For answers to common questions about this code of conduct, see the FAQ at 126 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available 127 | at [https://www.contributor-covenant.org/translations][translations]. 128 | 129 | [homepage]: https://www.contributor-covenant.org 130 | [v2.0]: https://www.contributor-covenant.org/version/2/0/code_of_conduct.html 131 | [Mozilla CoC]: https://github.com/mozilla/diversity 132 | [FAQ]: https://www.contributor-covenant.org/faq 133 | [translations]: https://www.contributor-covenant.org/translations 134 | 135 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 4 4 | 5 | [[package]] 6 | name = "autocfg" 7 | version = "1.4.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" 10 | 11 | [[package]] 12 | name = "cfg-if" 13 | version = "1.0.0" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 16 | 17 | [[package]] 18 | name = "heck" 19 | version = "0.5.0" 20 | source = "registry+https://github.com/rust-lang/crates.io-index" 21 | checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" 22 | 23 | [[package]] 24 | name = "indoc" 25 | version = "2.0.6" 26 | source = "registry+https://github.com/rust-lang/crates.io-index" 27 | checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" 28 | 29 | [[package]] 30 | name = "libc" 31 | version = "0.2.172" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" 34 | 35 | [[package]] 36 | name = "memoffset" 37 | version = "0.9.1" 38 | source = "registry+https://github.com/rust-lang/crates.io-index" 39 | checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" 40 | dependencies = [ 41 | "autocfg", 42 | ] 43 | 44 | [[package]] 45 | name = "once_cell" 46 | version = "1.21.3" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" 49 | 50 | [[package]] 51 | name = "patiencediff" 52 | version = "0.2.1" 53 | source = "registry+https://github.com/rust-lang/crates.io-index" 54 | checksum = "2c707262dd66fabcb1b2b79f2d65e2a1f8abb7e31005e882504c99680e009225" 55 | 56 | [[package]] 57 | name = "patiencediff-rs" 58 | version = "0.2.15" 59 | dependencies = [ 60 | "patiencediff", 61 | "pyo3", 62 | ] 63 | 64 | [[package]] 65 | name = "portable-atomic" 66 | version = "1.11.0" 67 | source = "registry+https://github.com/rust-lang/crates.io-index" 68 | checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" 69 | 70 | [[package]] 71 | name = "proc-macro2" 72 | version = "1.0.95" 73 | source = "registry+https://github.com/rust-lang/crates.io-index" 74 | checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" 75 | dependencies = [ 76 | "unicode-ident", 77 | ] 78 | 79 | [[package]] 80 | name = "pyo3" 81 | version = "0.24.2" 82 | source = "registry+https://github.com/rust-lang/crates.io-index" 83 | checksum = "e5203598f366b11a02b13aa20cab591229ff0a89fd121a308a5df751d5fc9219" 84 | dependencies = [ 85 | "cfg-if", 86 | "indoc", 87 | "libc", 88 | "memoffset", 89 | "once_cell", 90 | "portable-atomic", 91 | "pyo3-build-config", 92 | "pyo3-ffi", 93 | "pyo3-macros", 94 | "unindent", 95 | ] 96 | 97 | [[package]] 98 | name = "pyo3-build-config" 99 | version = "0.24.2" 100 | source = "registry+https://github.com/rust-lang/crates.io-index" 101 | checksum = "99636d423fa2ca130fa5acde3059308006d46f98caac629418e53f7ebb1e9999" 102 | dependencies = [ 103 | "once_cell", 104 | "target-lexicon", 105 | ] 106 | 107 | [[package]] 108 | name = "pyo3-ffi" 109 | version = "0.24.2" 110 | source = "registry+https://github.com/rust-lang/crates.io-index" 111 | checksum = "78f9cf92ba9c409279bc3305b5409d90db2d2c22392d443a87df3a1adad59e33" 112 | dependencies = [ 113 | "libc", 114 | "pyo3-build-config", 115 | ] 116 | 117 | [[package]] 118 | name = "pyo3-macros" 119 | version = "0.24.2" 120 | source = "registry+https://github.com/rust-lang/crates.io-index" 121 | checksum = "0b999cb1a6ce21f9a6b147dcf1be9ffedf02e0043aec74dc390f3007047cecd9" 122 | dependencies = [ 123 | "proc-macro2", 124 | "pyo3-macros-backend", 125 | "quote", 126 | "syn", 127 | ] 128 | 129 | [[package]] 130 | name = "pyo3-macros-backend" 131 | version = "0.24.2" 132 | source = "registry+https://github.com/rust-lang/crates.io-index" 133 | checksum = "822ece1c7e1012745607d5cf0bcb2874769f0f7cb34c4cde03b9358eb9ef911a" 134 | dependencies = [ 135 | "heck", 136 | "proc-macro2", 137 | "pyo3-build-config", 138 | "quote", 139 | "syn", 140 | ] 141 | 142 | [[package]] 143 | name = "quote" 144 | version = "1.0.40" 145 | source = "registry+https://github.com/rust-lang/crates.io-index" 146 | checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" 147 | dependencies = [ 148 | "proc-macro2", 149 | ] 150 | 151 | [[package]] 152 | name = "syn" 153 | version = "2.0.101" 154 | source = "registry+https://github.com/rust-lang/crates.io-index" 155 | checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" 156 | dependencies = [ 157 | "proc-macro2", 158 | "quote", 159 | "unicode-ident", 160 | ] 161 | 162 | [[package]] 163 | name = "target-lexicon" 164 | version = "0.13.2" 165 | source = "registry+https://github.com/rust-lang/crates.io-index" 166 | checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" 167 | 168 | [[package]] 169 | name = "unicode-ident" 170 | version = "1.0.18" 171 | source = "registry+https://github.com/rust-lang/crates.io-index" 172 | checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" 173 | 174 | [[package]] 175 | name = "unindent" 176 | version = "0.2.4" 177 | source = "registry+https://github.com/rust-lang/crates.io-index" 178 | checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" 179 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "patiencediff-rs" 3 | version = "0.2.15" 4 | edition = "2021" 5 | authors = ["Breezy Developers "] 6 | description = "Python bindings for patiencediff algorithm" 7 | license = "GPL-2.0-or-later" 8 | repository = "https://github.com/breezy-team/patiencediff" 9 | 10 | [lib] 11 | name = "_patiencediff_rs" 12 | crate-type = ["cdylib"] 13 | 14 | [dependencies] 15 | patiencediff = { version = "0.2.1", default-features = false } 16 | pyo3 = { version = "0.24.0", features = ["extension-module"] } -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include AUTHORS 2 | include README.rst 3 | include COPYING 4 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | patiencediff 2 | ############ 3 | 4 | This package contains the implementation of the ``patiencediff`` algorithm, as 5 | `first described `_ by Bram Cohen. 6 | 7 | Like Python's ``difflib``, this module provides both a convenience ``unified_diff`` 8 | function for the generation of unified diffs of text files 9 | as well as a SequenceMatcher that can be used on arbitrary lists. 10 | 11 | Patiencediff provides a good balance of performance, nice output for humans, 12 | and implementation simplicity. 13 | 14 | The code in this package was extracted from the `Bazaar `_ 15 | code base. 16 | 17 | The package comes with two implementations: 18 | 19 | * A Python implementation (_patiencediff_py.py); this implementation only 20 | requires a Python interpreter and is the more readable version of the two 21 | 22 | * A C implementation implementation (_patiencediff_c.c); this implementation 23 | is faster, but requires a C compiler and is less readable 24 | 25 | Usage 26 | ===== 27 | 28 | To invoke patiencediff from the command-line:: 29 | 30 | python -m patiencediff file_a file_b 31 | 32 | Or from Python:: 33 | 34 | >>> import patiencediff 35 | >>> print(''.join(patiencediff.unified_diff( 36 | ... ['a\n', 'b\n', 'b\n', 'c\n'], 37 | ... ['a\n', 'c\n', 'b\n']))) 38 | --- 39 | +++ 40 | @@ -1,4 +1,3 @@ 41 | a 42 | +c 43 | b 44 | -b 45 | -c 46 | -------------------------------------------------------------------------------- /build.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | :: To build extensions for 64 bit Python 3, we need to configure environment 3 | :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: 4 | :: MS Windows SDK for Windows 7 and .NET Framework 4 5 | :: 6 | :: More details at: 7 | :: https://github.com/cython/cython/wiki/CythonExtensionsOnWindows 8 | 9 | IF "%DISTUTILS_USE_SDK%"=="1" ( 10 | ECHO Configuring environment to build with MSVC on a 64bit architecture 11 | ECHO Using Windows SDK 7.1 12 | "C:\Program Files\Microsoft SDKs\Windows\v7.1\Setup\WindowsSdkVer.exe" -q -version:v7.1 13 | CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 /release 14 | SET MSSdk=1 15 | REM Need the following to allow tox to see the SDK compiler 16 | SET TOX_TESTENV_PASSENV=DISTUTILS_USE_SDK MSSdk INCLUDE LIB 17 | ) ELSE ( 18 | ECHO Using default MSVC build environment 19 | ) 20 | 21 | CALL %* 22 | -------------------------------------------------------------------------------- /disperse.toml: -------------------------------------------------------------------------------- 1 | name = "patiencediff" 2 | tag-name = "v$VERSION" 3 | verify-command = "python3 -m unittest patiencediff.test_patiencediff" 4 | tarball-location = [] 5 | release-timeout = 5 6 | 7 | [[update_version]] 8 | path = "patiencediff/__init__.py" 9 | match = "^__version__ = ((.*))$" 10 | new-line = "__version__ = $TUPLED_VERSION" 11 | -------------------------------------------------------------------------------- /patiencediff/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2005, 2006, 2007 Canonical Ltd 2 | # Copyright (C) 2021-2023 Jelmer Vernooij 3 | # 4 | # This program is free software; you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation; either version 2 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with this program; if not, write to the Free Software 16 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | 18 | import difflib 19 | import os 20 | import sys 21 | import time 22 | from typing import Type 23 | 24 | __all__ = [ 25 | "PatienceSequenceMatcher", 26 | "unified_diff", 27 | "unified_diff_files", 28 | "recurse_matches", 29 | "unique_lcs", 30 | ] 31 | 32 | __version__ = (0, 2, 15) 33 | 34 | 35 | # This is a version of unified_diff which only adds a factory parameter 36 | # so that you can override the default SequenceMatcher 37 | # this has been submitted as a patch to python 38 | def unified_diff( 39 | a, 40 | b, 41 | fromfile="", 42 | tofile="", 43 | fromfiledate="", 44 | tofiledate="", 45 | n=3, 46 | lineterm="\n", 47 | sequencematcher=None, 48 | ): 49 | r"""Compare two sequences of lines; generate the delta as a unified diff. 50 | 51 | Unified diffs are a compact way of showing line changes and a few 52 | lines of context. The number of context lines is set by 'n' which 53 | defaults to three. 54 | 55 | By default, the diff control lines (those with ---, +++, or @@) are 56 | created with a trailing newline. This is helpful so that inputs 57 | created from file.readlines() result in diffs that are suitable for 58 | file.writelines() since both the inputs and outputs have trailing 59 | newlines. 60 | 61 | For inputs that do not have trailing newlines, set the lineterm 62 | argument to "" so that the output will be uniformly newline free. 63 | 64 | The unidiff format normally has a header for filenames and modification 65 | times. Any or all of these may be specified using strings for 66 | 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification 67 | times are normally expressed in the format returned by time.ctime(). 68 | 69 | Example: 70 | >>> for line in unified_diff('one two three four'.split(), 71 | ... 'zero one tree four'.split(), 'Original', 'Current', 72 | ... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003', 73 | ... lineterm=''): 74 | ... print line 75 | --- Original Sat Jan 26 23:30:50 1991 76 | +++ Current Fri Jun 06 10:20:52 2003 77 | @@ -1,4 +1,4 @@ 78 | +zero 79 | one 80 | -two 81 | -three 82 | +tree 83 | four 84 | """ 85 | if sequencematcher is None: 86 | sequencematcher = difflib.SequenceMatcher 87 | 88 | if fromfiledate: 89 | fromfiledate = "\t" + str(fromfiledate) 90 | if tofiledate: 91 | tofiledate = "\t" + str(tofiledate) 92 | 93 | started = False 94 | for group in sequencematcher(None, a, b).get_grouped_opcodes(n): 95 | if not started: 96 | yield f"--- {fromfile}{fromfiledate}{lineterm}" 97 | yield f"+++ {tofile}{tofiledate}{lineterm}" 98 | started = True 99 | i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4] 100 | yield f"@@ -{i1 + 1},{i2 - i1} +{j1 + 1},{j2 - j1} @@{lineterm}" 101 | for tag, i1, i2, j1, j2 in group: 102 | if tag == "equal": 103 | for line in a[i1:i2]: 104 | yield " " + line 105 | continue 106 | if tag == "replace" or tag == "delete": 107 | for line in a[i1:i2]: 108 | yield "-" + line 109 | if tag == "replace" or tag == "insert": 110 | for line in b[j1:j2]: 111 | yield "+" + line 112 | 113 | 114 | def unified_diff_files(a, b, sequencematcher=None): 115 | """Generate the diff for two files.""" 116 | # Should this actually be an error? 117 | if a == b: 118 | return [] 119 | if a == "-": 120 | lines_a = sys.stdin.readlines() 121 | time_a = time.time() 122 | else: 123 | with open(a) as f: 124 | lines_a = f.readlines() 125 | time_a = os.stat(a).st_mtime # noqa: F841 126 | 127 | if b == "-": 128 | lines_b = sys.stdin.readlines() 129 | time_b = time.time() 130 | else: 131 | with open(b) as f: 132 | lines_b = f.readlines() 133 | time_b = os.stat(b).st_mtime # noqa: F841 134 | 135 | # TODO: Include fromfiledate and tofiledate 136 | return unified_diff( 137 | lines_a, lines_b, fromfile=a, tofile=b, sequencematcher=sequencematcher 138 | ) 139 | 140 | 141 | PatienceSequenceMatcher: Type[difflib.SequenceMatcher] 142 | 143 | 144 | # Try to import the Rust implementation first 145 | try: 146 | from ._patiencediff_rs import ( 147 | PatienceSequenceMatcher_rs as PatienceSequenceMatcher, 148 | ) 149 | from ._patiencediff_rs import recurse_matches_rs as recurse_matches 150 | from ._patiencediff_rs import unique_lcs_rs as unique_lcs 151 | except ImportError: 152 | # Fall back to the Python implementation if Rust is not available 153 | from ._patiencediff_py import ( 154 | PatienceSequenceMatcher_py as PatienceSequenceMatcher, 155 | ) 156 | from ._patiencediff_py import ( 157 | recurse_matches_py as recurse_matches, 158 | ) 159 | from ._patiencediff_py import unique_lcs_py as unique_lcs 160 | -------------------------------------------------------------------------------- /patiencediff/__main__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2005, 2006, 2007 Canonical Ltd 2 | # 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program; if not, write to the Free Software 15 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 16 | 17 | import difflib 18 | import sys 19 | 20 | from . import PatienceSequenceMatcher, unified_diff_files 21 | 22 | 23 | def main(argv=None): 24 | import optparse 25 | 26 | p = optparse.OptionParser( 27 | usage="%prog [options] file_a file_b" 28 | '\nFiles can be "-" to read from stdin' 29 | ) 30 | p.add_option( 31 | "--patience", 32 | dest="matcher", 33 | action="store_const", 34 | const="patience", 35 | default="patience", 36 | help="Use the patience difference algorithm", 37 | ) 38 | p.add_option( 39 | "--difflib", 40 | dest="matcher", 41 | action="store_const", 42 | const="difflib", 43 | default="patience", 44 | help="Use python's difflib algorithm", 45 | ) 46 | 47 | algorithms = { 48 | "patience": PatienceSequenceMatcher, 49 | "difflib": difflib.SequenceMatcher, 50 | } 51 | 52 | (opts, args) = p.parse_args(argv) 53 | matcher = algorithms[opts.matcher] 54 | 55 | if len(args) != 2: 56 | print("You must supply 2 filenames to diff") 57 | return -1 58 | 59 | for line in unified_diff_files(args[0], args[1], sequencematcher=matcher): 60 | sys.stdout.write(line) 61 | 62 | 63 | sys.exit(main(sys.argv[1:])) 64 | -------------------------------------------------------------------------------- /patiencediff/_patiencediff_c.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2007, 2010 Canonical Ltd 3 | Copyright (C) 2021-2023 Jelmer Vernooij 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 2 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program; if not, write to the Free Software 17 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 18 | 19 | Function equate_lines based on bdiff.c from Mercurial. 20 | Copyright (C) 2005, 2006 Matt Mackall 21 | 22 | Functions unique_lcs/recurse_matches based on _patiencediff_py.py. 23 | Copyright (C) 2005 Bram Cohen, Copyright (C) 2005, 2006 Canonical Ltd 24 | */ 25 | 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | 32 | #if defined(__GNUC__) 33 | # define inline __inline__ 34 | #elif defined(_MSC_VER) 35 | # define inline __inline 36 | #else 37 | # define inline 38 | #endif 39 | 40 | 41 | #define MIN(a, b) (((a) > (b)) ? (b) : (a)) 42 | #define MAX(a, b) (((a) > (b)) ? (a) : (b)) 43 | 44 | 45 | #define SENTINEL -1 46 | 47 | 48 | /* malloc returns NULL on some platforms if you try to allocate nothing, 49 | * causing and 50 | * . On glibc it passes, but 51 | * let's make it fail to aid testing. */ 52 | #define guarded_malloc(x) ( ((x) > 0) ? malloc(x) : NULL ) 53 | 54 | enum { 55 | OP_EQUAL = 0, 56 | OP_INSERT, 57 | OP_DELETE, 58 | OP_REPLACE 59 | }; 60 | 61 | 62 | /* values from this array need to correspond to the order of the enum above */ 63 | static char *opcode_names[] = { 64 | "equal", 65 | "insert", 66 | "delete", 67 | "replace", 68 | }; 69 | 70 | 71 | struct line { 72 | long hash; /* hash code of the string/object */ 73 | Py_ssize_t next; /* next line from the same equivalence class */ 74 | Py_ssize_t equiv; /* equivalence class */ 75 | PyObject *data; 76 | }; 77 | 78 | 79 | struct bucket { 80 | Py_ssize_t a_head; /* first item in `a` from this equivalence class */ 81 | Py_ssize_t a_count; 82 | Py_ssize_t b_head; /* first item in `b` from this equivalence class */ 83 | Py_ssize_t b_count; 84 | Py_ssize_t a_pos; 85 | Py_ssize_t b_pos; 86 | }; 87 | 88 | 89 | struct hashtable { 90 | Py_ssize_t last_a_pos; 91 | Py_ssize_t last_b_pos; 92 | Py_ssize_t size; 93 | struct bucket *table; 94 | }; 95 | 96 | struct matching_line { 97 | Py_ssize_t a; /* index of the line in `a` */ 98 | Py_ssize_t b; /* index of the line in `b` */ 99 | }; 100 | 101 | 102 | struct matching_block { 103 | Py_ssize_t a; /* index of the first line in `a` */ 104 | Py_ssize_t b; /* index of the first line in `b` */ 105 | Py_ssize_t len; /* length of the block */ 106 | }; 107 | 108 | 109 | struct matching_blocks { 110 | struct matching_block *matches; 111 | Py_ssize_t count; 112 | }; 113 | 114 | 115 | struct opcode { 116 | int tag; 117 | Py_ssize_t i1; 118 | Py_ssize_t i2; 119 | Py_ssize_t j1; 120 | Py_ssize_t j2; 121 | }; 122 | 123 | 124 | typedef struct { 125 | PyObject_HEAD 126 | Py_ssize_t asize; 127 | Py_ssize_t bsize; 128 | struct line *a; 129 | struct line *b; 130 | struct hashtable hashtable; 131 | Py_ssize_t *backpointers; 132 | } PatienceSequenceMatcher; 133 | 134 | 135 | static inline Py_ssize_t 136 | bisect_left(Py_ssize_t *list, Py_ssize_t item, Py_ssize_t lo, Py_ssize_t hi) 137 | { 138 | while (lo < hi) { 139 | Py_ssize_t mid = lo / 2 + hi / 2 + (lo % 2 + hi % 2) / 2; 140 | if (list[mid] < item) 141 | lo = mid + 1; 142 | else 143 | hi = mid; 144 | } 145 | return lo; 146 | } 147 | 148 | 149 | static inline int 150 | compare_lines(struct line *a, struct line *b) 151 | { 152 | return ((a->hash != b->hash) 153 | || PyObject_RichCompareBool(a->data, b->data, Py_EQ) == 0); 154 | } 155 | 156 | 157 | static inline int 158 | find_equivalence_class(struct bucket *hashtable, Py_ssize_t hsize, 159 | struct line *lines, struct line *ref_lines, 160 | Py_ssize_t i) 161 | { 162 | Py_ssize_t j; 163 | for (j = lines[i].hash & hsize; hashtable[j].b_head != SENTINEL; j = (j + 1) & hsize) { 164 | if (!compare_lines(lines + i, ref_lines + hashtable[j].b_head)) { 165 | break; 166 | } 167 | } 168 | return j; 169 | } 170 | 171 | 172 | static int 173 | equate_lines(struct hashtable *result, 174 | struct line *lines_a, struct line *lines_b, 175 | Py_ssize_t asize, Py_ssize_t bsize) 176 | { 177 | Py_ssize_t i, j, hsize; 178 | struct bucket *hashtable; 179 | 180 | /* check for overflow, we need the table to be at least bsize+1 */ 181 | if (bsize == PY_SSIZE_T_MAX) { 182 | PyErr_SetNone(PyExc_OverflowError); 183 | return 0; 184 | } 185 | 186 | /* build a hash table of the next highest power of 2 */ 187 | hsize = 1; 188 | while (hsize < bsize + 1) 189 | hsize *= 2; 190 | 191 | /* can't be 0 */ 192 | hashtable = (struct bucket *) guarded_malloc(sizeof(struct bucket) * hsize); 193 | if (hashtable == NULL) { 194 | PyErr_NoMemory(); 195 | return 0; 196 | } 197 | 198 | /* initialise the hashtable */ 199 | for (i = 0; i < hsize; i++) { 200 | hashtable[i].a_count = 0; 201 | hashtable[i].b_count = 0; 202 | hashtable[i].a_head = SENTINEL; 203 | hashtable[i].b_head = SENTINEL; 204 | } 205 | hsize--; 206 | 207 | /* add lines from lines_b to the hash table chains. iterating 208 | backwards so the matching lines are sorted to the linked list 209 | by the line number (because we are adding new lines to the 210 | head of the list) */ 211 | for (i = bsize - 1; i >= 0; i--) { 212 | /* find the first hashtable entry, which is either empty or contains 213 | the same line as lines_b[i] */ 214 | j = find_equivalence_class(hashtable, hsize, lines_b, lines_b, i); 215 | 216 | /* set the equivalence class */ 217 | lines_b[i].equiv = j; 218 | 219 | /* add to the head of the equivalence class */ 220 | lines_b[i].next = hashtable[j].b_head; 221 | hashtable[j].b_head = i; 222 | hashtable[j].b_count++; 223 | } 224 | 225 | /* match items from lines_a to their equivalence class in lines_b. 226 | again, iterating backwards for the right order of the linked lists */ 227 | for (i = asize - 1; i >= 0; i--) { 228 | /* find the first hash entry, which is either empty or contains 229 | the same line as lines_a[i] */ 230 | j = find_equivalence_class(hashtable, hsize, lines_a, lines_b, i); 231 | 232 | /* set the equivalence class, even if we are not interested in this 233 | line, because the values are not pre-filled */ 234 | lines_a[i].equiv = j; 235 | 236 | /* we are not interested in lines which are not also in lines_b */ 237 | if (hashtable[j].b_head == SENTINEL) 238 | continue; 239 | 240 | /* add to the head of the equivalence class */ 241 | lines_a[i].next = hashtable[j].a_head; 242 | hashtable[j].a_head = i; 243 | hashtable[j].a_count++; 244 | } 245 | 246 | result->last_a_pos = -1; 247 | result->last_b_pos = -1; 248 | result->size = hsize + 1; 249 | result->table = hashtable; 250 | 251 | return 1; 252 | } 253 | 254 | 255 | 256 | /* Finds longest common subsequence of unique lines in a[alo:ahi] and 257 | b[blo:bhi]. 258 | Parameter backpointers must have allocated memory for at least 259 | 4 * (bhi - blo) ints. */ 260 | Py_ssize_t 261 | unique_lcs(struct matching_line *answer, 262 | struct hashtable *hashtable, Py_ssize_t *backpointers, 263 | struct line *lines_a, struct line *lines_b, 264 | Py_ssize_t alo, Py_ssize_t blo, Py_ssize_t ahi, Py_ssize_t bhi) 265 | { 266 | Py_ssize_t i, k, equiv, apos, bpos, norm_apos, norm_bpos, bsize, stacksize; 267 | Py_ssize_t *stacks, *lasts, *btoa; 268 | struct bucket *h; 269 | 270 | k = 0; 271 | stacksize = 0; 272 | bsize = bhi - blo; 273 | h = hashtable->table; 274 | 275 | /* "unpack" the allocated memory */ 276 | stacks = backpointers + bsize; 277 | lasts = stacks + bsize; 278 | btoa = lasts + bsize; 279 | 280 | /* initialise the backpointers */ 281 | for (i = 0; i < bsize; i++) 282 | backpointers[i] = SENTINEL; 283 | 284 | if (hashtable->last_a_pos == -1 || hashtable->last_a_pos > alo) 285 | for (i = 0; i < hashtable->size; i++) 286 | h[i].a_pos = h[i].a_head; 287 | hashtable->last_a_pos = alo; 288 | 289 | if (hashtable->last_b_pos == -1 || hashtable->last_b_pos > blo) 290 | for (i = 0; i < hashtable->size; i++) 291 | h[i].b_pos = h[i].b_head; 292 | hashtable->last_b_pos = blo; 293 | 294 | for (bpos = blo; bpos < bhi; bpos++) { 295 | equiv = lines_b[bpos].equiv; 296 | 297 | /* no lines in a or b */ 298 | if (h[equiv].a_count == 0 || h[equiv].b_count == 0) 299 | continue; 300 | 301 | /* find an unique line in lines_a that matches lines_b[bpos] 302 | if we find more than one line within the range alo:ahi, 303 | jump to the next line from lines_b immediately */ 304 | apos = SENTINEL; 305 | /* loop through all lines in the linked list */ 306 | for (i = h[equiv].a_pos; i != SENTINEL; i = lines_a[i].next) { 307 | /* the index is lower than alo, continue to the next line */ 308 | if (i < alo) { 309 | h[equiv].a_pos = i; 310 | continue; 311 | } 312 | /* the index is higher than ahi, stop searching */ 313 | if (i >= ahi) 314 | break; 315 | /* if the line is within our range, check if it's a duplicate */ 316 | if (apos != SENTINEL) 317 | goto nextb; 318 | /* save index to the line */ 319 | apos = i; 320 | } 321 | /* this line has no equivalent in lines_a[alo:ahi] */ 322 | if (apos == SENTINEL) 323 | goto nextb; 324 | 325 | /* check for duplicates of this line in lines_b[blo:bhi] */ 326 | /* loop through all lines in the linked list */ 327 | for (i = h[equiv].b_pos; i != SENTINEL; i = lines_b[i].next) { 328 | /* the index is lower than blo, continue to the next line */ 329 | if (i < blo) { 330 | h[equiv].b_pos = i; 331 | continue; 332 | } 333 | /* the index is higher than bhi, stop searching */ 334 | if (i >= bhi) 335 | break; 336 | /* if this isn't the line with started with and it's within 337 | our range, it's a duplicate */ 338 | if (i != bpos) 339 | goto nextb; 340 | } 341 | 342 | /* use normalised indexes ([0,ahi-alo) instead of [alo,ahi)) 343 | for the patience sorting algorithm */ 344 | norm_bpos = bpos - blo; 345 | norm_apos = apos - alo; 346 | btoa[norm_bpos] = norm_apos; 347 | 348 | /* 349 | Ok, how does this work... 350 | 351 | We have a list of matching lines from two lists, a and b. These 352 | matches are stored in variable `btoa`. As we are iterating over this 353 | table by bpos, the lines from b already form an increasing sequence. 354 | We need to "sort" also the lines from a using the patience sorting 355 | algorithm, ignoring the lines which would need to be swapped. 356 | 357 | http://en.wikipedia.org/wiki/Patience_sorting 358 | 359 | For each pair of lines, we need to place the line from a on either 360 | an existing pile that has higher value on the top or create a new 361 | pile. Variable `stacks` represents the tops of these piles and in 362 | variable `lasts` we store the lines from b, that correspond to the 363 | lines from a in `stacks`. 364 | 365 | Whenever we place a new line on top of a pile, we store a 366 | backpointer to the line (b) from top of the previous pile. This means 367 | that after the loop, variable `backpointers` will contain an index 368 | to the previous matching lines that forms an increasing sequence 369 | (over both indexes a and b) with the current matching lines. If 370 | either index a or b of the previous matching lines would be higher 371 | than indexes of the current one or if the indexes of the current 372 | one are 0, it will contain SENTINEL. 373 | 374 | To construct the LCS, we will just need to follow these backpointers 375 | from the top of the last pile and stop when we reach SENTINEL. 376 | */ 377 | 378 | /* as an optimization, check if the next line comes at the end, 379 | because it usually does */ 380 | if (stacksize && stacks[stacksize - 1] < norm_apos) 381 | k = stacksize; 382 | /* as an optimization, check if the next line comes right after 383 | the previous line, because usually it does */ 384 | else if (stacksize && (stacks[k] < norm_apos) && 385 | (k == stacksize - 1 || stacks[k + 1] > norm_apos)) 386 | k += 1; 387 | else 388 | k = bisect_left(stacks, norm_apos, 0, stacksize); 389 | 390 | if (k > 0) 391 | backpointers[norm_bpos] = lasts[k - 1]; 392 | 393 | if (k < stacksize) { 394 | stacks[k] = norm_apos; 395 | lasts[k] = norm_bpos; 396 | } 397 | else { 398 | stacks[stacksize] = norm_apos; 399 | lasts[stacksize] = norm_bpos; 400 | stacksize += 1; 401 | } 402 | 403 | 404 | nextb: 405 | ; 406 | } 407 | 408 | if (stacksize == 0) 409 | return 0; 410 | 411 | /* backtrace the structures to find the LCS */ 412 | i = 0; 413 | k = lasts[stacksize - 1]; 414 | while (k != SENTINEL) { 415 | answer[i].a = btoa[k]; 416 | answer[i].b = k; 417 | k = backpointers[k]; 418 | i++; 419 | } 420 | 421 | return i; 422 | } 423 | 424 | /* Adds a new line to the list of matching blocks, either extending the 425 | current block or adding a new one. */ 426 | static inline void 427 | add_matching_line(struct matching_blocks *answer, Py_ssize_t a, Py_ssize_t b) 428 | { 429 | Py_ssize_t last_index = answer->count - 1; 430 | if ((last_index >= 0) && 431 | (a == answer->matches[last_index].a + 432 | answer->matches[last_index].len) && 433 | (b == answer->matches[last_index].b + 434 | answer->matches[last_index].len)) { 435 | /* enlarge the last block */ 436 | answer->matches[last_index].len++; 437 | } 438 | else { 439 | /* create a new block */ 440 | last_index++; 441 | answer->matches[last_index].a = a; 442 | answer->matches[last_index].b = b; 443 | answer->matches[last_index].len = 1; 444 | answer->count++; 445 | } 446 | } 447 | 448 | 449 | static int 450 | recurse_matches(struct matching_blocks *answer, struct hashtable *hashtable, 451 | Py_ssize_t *backpointers, struct line *a, struct line *b, 452 | Py_ssize_t alo, Py_ssize_t blo, Py_ssize_t ahi, Py_ssize_t bhi, 453 | int maxrecursion) 454 | { 455 | int res; 456 | Py_ssize_t new, last_a_pos, last_b_pos, lcs_size, nahi, nbhi, i, apos, bpos; 457 | struct matching_line *lcs; 458 | 459 | if (maxrecursion < 0) 460 | return 1; 461 | 462 | if (alo == ahi || blo == bhi) 463 | return 1; 464 | 465 | new = 0; 466 | last_a_pos = alo - 1; 467 | last_b_pos = blo - 1; 468 | 469 | lcs = (struct matching_line *)guarded_malloc(sizeof(struct matching_line) * (bhi - blo)); 470 | if (lcs == NULL) 471 | return 0; 472 | 473 | lcs_size = unique_lcs(lcs, hashtable, backpointers, a, b, alo, blo, ahi, bhi); 474 | 475 | /* recurse between lines which are unique in each file and match */ 476 | for (i = lcs_size - 1; i >= 0; i--) { 477 | apos = alo + lcs[i].a; 478 | bpos = blo + lcs[i].b; 479 | if (last_a_pos + 1 != apos || last_b_pos + 1 != bpos) { 480 | res = recurse_matches(answer, hashtable, 481 | backpointers, a, b, 482 | last_a_pos + 1, last_b_pos + 1, 483 | apos, bpos, maxrecursion - 1); 484 | if (!res) 485 | goto error; 486 | } 487 | last_a_pos = apos; 488 | last_b_pos = bpos; 489 | add_matching_line(answer, apos, bpos); 490 | new = 1; 491 | } 492 | 493 | free(lcs); 494 | lcs = NULL; 495 | 496 | /* find matches between the last match and the end */ 497 | if (new > 0) { 498 | res = recurse_matches(answer, hashtable, 499 | backpointers, a, b, 500 | last_a_pos + 1, last_b_pos + 1, 501 | ahi, bhi, maxrecursion - 1); 502 | if (!res) 503 | goto error; 504 | } 505 | 506 | 507 | /* find matching lines at the very beginning */ 508 | else if (a[alo].equiv == b[blo].equiv) { 509 | while (alo < ahi && blo < bhi && a[alo].equiv == b[blo].equiv) 510 | add_matching_line(answer, alo++, blo++); 511 | res = recurse_matches(answer, hashtable, 512 | backpointers, a, b, 513 | alo, blo, ahi, bhi, maxrecursion - 1); 514 | if (!res) 515 | goto error; 516 | } 517 | 518 | /* find matching lines at the very end */ 519 | else if (a[ahi - 1].equiv == b[bhi - 1].equiv) { 520 | nahi = ahi - 1; 521 | nbhi = bhi - 1; 522 | while (nahi > alo && nbhi > blo && a[nahi - 1].equiv == b[nbhi - 1].equiv) { 523 | nahi--; 524 | nbhi--; 525 | } 526 | res = recurse_matches(answer, hashtable, 527 | backpointers, a, b, 528 | last_a_pos + 1, last_b_pos + 1, 529 | nahi, nbhi, maxrecursion - 1); 530 | if (!res) 531 | goto error; 532 | for (i = 0; i < ahi - nahi; i++) 533 | add_matching_line(answer, nahi + i, nbhi + i); 534 | } 535 | 536 | return 1; 537 | 538 | error: 539 | free(lcs); 540 | return 0; 541 | } 542 | 543 | 544 | static void 545 | delete_lines(struct line *lines, Py_ssize_t size) 546 | { 547 | struct line *line = lines; 548 | while (size-- > 0) { 549 | Py_XDECREF(line->data); 550 | line++; 551 | } 552 | free(lines); 553 | } 554 | 555 | 556 | static Py_ssize_t 557 | load_lines(PyObject *orig, struct line **lines) 558 | { 559 | Py_ssize_t size, i; 560 | struct line *line; 561 | PyObject *seq, *item; 562 | 563 | seq = PySequence_Fast(orig, "sequence expected"); 564 | if (seq == NULL) { 565 | return -1; 566 | } 567 | 568 | size = PySequence_Fast_GET_SIZE(seq); 569 | if (size == 0) { 570 | Py_DECREF(seq); 571 | return 0; 572 | } 573 | 574 | /* Allocate a memory block for line data, initialized to 0 */ 575 | line = *lines = (struct line *)calloc(size, sizeof(struct line)); 576 | if (line == NULL) { 577 | PyErr_NoMemory(); 578 | Py_DECREF(seq); 579 | return -1; 580 | } 581 | 582 | for (i = 0; i < size; i++) { 583 | item = PySequence_Fast_GET_ITEM(seq, i); 584 | Py_INCREF(item); 585 | line->data = item; 586 | line->hash = PyObject_Hash(item); 587 | if (line->hash == (-1)) { 588 | /* Propagate the hash exception */ 589 | size = -1; 590 | goto cleanup; 591 | } 592 | line->next = SENTINEL; 593 | line++; 594 | } 595 | 596 | cleanup: 597 | Py_DECREF(seq); 598 | if (size == -1) { 599 | /* Error -- cleanup unused object references */ 600 | delete_lines(*lines, i); 601 | *lines = NULL; 602 | } 603 | return size; 604 | } 605 | 606 | 607 | static PyObject * 608 | py_unique_lcs(PyObject *self, PyObject *args) 609 | { 610 | PyObject *aseq, *bseq, *res, *item; 611 | Py_ssize_t asize, bsize, i, nmatches, *backpointers = NULL; 612 | struct line *a = NULL, *b = NULL; 613 | struct matching_line *matches = NULL; 614 | struct hashtable hashtable; 615 | 616 | if (!PyArg_ParseTuple(args, "OO", &aseq, &bseq)) 617 | return NULL; 618 | 619 | hashtable.table = NULL; 620 | 621 | asize = load_lines(aseq, &a); 622 | bsize = load_lines(bseq, &b); 623 | if (asize == -1 || bsize == -1) 624 | goto error; 625 | 626 | if (!equate_lines(&hashtable, a, b, asize, bsize)) 627 | goto error; 628 | 629 | if (bsize > 0) { 630 | matches = (struct matching_line *)guarded_malloc(sizeof(struct matching_line) * bsize); 631 | if (matches == NULL) 632 | goto error; 633 | 634 | backpointers = (Py_ssize_t *)guarded_malloc(sizeof(Py_ssize_t) * bsize * 4); 635 | if (backpointers == NULL) 636 | goto error; 637 | } 638 | 639 | nmatches = unique_lcs(matches, &hashtable, backpointers, a, b, 0, 0, asize, bsize); 640 | 641 | res = PyList_New(nmatches); 642 | for (i = 0; i < nmatches; i++) { 643 | item = Py_BuildValue("nn", matches[nmatches - i - 1].a, matches[nmatches - i - 1].b); 644 | if (item == NULL) 645 | goto error; 646 | if (PyList_SetItem(res, i, item) != 0) 647 | goto error; 648 | } 649 | 650 | free(backpointers); 651 | free(matches); 652 | free(hashtable.table); 653 | delete_lines(b, bsize); 654 | delete_lines(a, asize); 655 | return res; 656 | 657 | error: 658 | free(backpointers); 659 | free(matches); 660 | free(hashtable.table); 661 | delete_lines(b, bsize); 662 | delete_lines(a, asize); 663 | return NULL; 664 | } 665 | 666 | 667 | static PyObject * 668 | py_recurse_matches(PyObject *self, PyObject *args) 669 | { 670 | PyObject *aseq, *bseq, *item, *answer; 671 | int maxrecursion, res; 672 | Py_ssize_t i, j, asize, bsize, alo, blo, ahi, bhi; 673 | Py_ssize_t *backpointers = NULL; 674 | struct line *a = NULL, *b = NULL; 675 | struct hashtable hashtable; 676 | struct matching_blocks matches; 677 | 678 | if (!PyArg_ParseTuple(args, "OOnnnnOi", &aseq, &bseq, &alo, &blo, 679 | &ahi, &bhi, &answer, &maxrecursion)) 680 | return NULL; 681 | 682 | hashtable.table = NULL; 683 | matches.matches = NULL; 684 | 685 | asize = load_lines(aseq, &a); 686 | bsize = load_lines(bseq, &b); 687 | if (asize == -1 || bsize == -1) 688 | goto error; 689 | 690 | if (!equate_lines(&hashtable, a, b, asize, bsize)) 691 | goto error; 692 | 693 | matches.count = 0; 694 | 695 | if (bsize > 0) { 696 | matches.matches = (struct matching_block *)guarded_malloc(sizeof(struct matching_block) * bsize); 697 | if (matches.matches == NULL) 698 | goto error; 699 | 700 | backpointers = (Py_ssize_t *)guarded_malloc(sizeof(Py_ssize_t) * bsize * 4); 701 | if (backpointers == NULL) 702 | goto error; 703 | } else { 704 | matches.matches = NULL; 705 | backpointers = NULL; 706 | } 707 | 708 | res = recurse_matches(&matches, &hashtable, backpointers, 709 | a, b, alo, blo, ahi, bhi, maxrecursion); 710 | if (!res) 711 | goto error; 712 | 713 | for (i = 0; i < matches.count; i++) { 714 | for (j = 0; j < matches.matches[i].len; j++) { 715 | item = Py_BuildValue("nn", matches.matches[i].a + j, 716 | matches.matches[i].b + j); 717 | if (item == NULL) 718 | goto error; 719 | if (PyList_Append(answer, item) != 0) 720 | goto error; 721 | } 722 | } 723 | 724 | free(backpointers); 725 | free(matches.matches); 726 | free(hashtable.table); 727 | delete_lines(b, bsize); 728 | delete_lines(a, asize); 729 | Py_RETURN_NONE; 730 | 731 | error: 732 | free(backpointers); 733 | free(matches.matches); 734 | free(hashtable.table); 735 | delete_lines(b, bsize); 736 | delete_lines(a, asize); 737 | return NULL; 738 | } 739 | 740 | 741 | static PyObject * 742 | PatienceSequenceMatcher_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 743 | { 744 | PyObject *junk, *a, *b; 745 | PatienceSequenceMatcher *self; 746 | 747 | self = (PatienceSequenceMatcher *)type->tp_alloc(type, 0); 748 | if (self != NULL) { 749 | 750 | if (!PyArg_ParseTuple(args, "OOO", &junk, &a, &b)) { 751 | Py_DECREF(self); 752 | return NULL; 753 | } 754 | 755 | self->asize = load_lines(a, &(self->a)); 756 | self->bsize = load_lines(b, &(self->b)); 757 | 758 | if (self->asize == -1 || self->bsize == -1) { 759 | Py_DECREF(self); 760 | return NULL; 761 | } 762 | 763 | if (!equate_lines(&self->hashtable, self->a, self->b, self->asize, self->bsize)) { 764 | Py_DECREF(self); 765 | return NULL; 766 | } 767 | 768 | if (self->bsize > 0) { 769 | self->backpointers = (Py_ssize_t *)guarded_malloc(sizeof(Py_ssize_t) * self->bsize * 4); 770 | if (self->backpointers == NULL) { 771 | Py_DECREF(self); 772 | PyErr_NoMemory(); 773 | return NULL; 774 | } 775 | } else { 776 | self->backpointers = NULL; 777 | } 778 | 779 | } 780 | 781 | return (PyObject *)self; 782 | } 783 | 784 | 785 | static void 786 | PatienceSequenceMatcher_dealloc(PatienceSequenceMatcher* self) 787 | { 788 | free(self->backpointers); 789 | free(self->hashtable.table); 790 | delete_lines(self->b, self->bsize); 791 | delete_lines(self->a, self->asize); 792 | ((PyObject *)self)->ob_type->tp_free((PyObject *)self); 793 | } 794 | 795 | 796 | static char PatienceSequenceMatcher_get_matching_blocks_doc[] = 797 | "Return list of triples describing matching subsequences.\n" 798 | "\n" 799 | "Each triple is of the form (i, j, n), and means that\n" 800 | "a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in\n" 801 | "i and in j.\n" 802 | "\n" 803 | "The last triple is a dummy, (len(a), len(b), 0), and is the only\n" 804 | "triple with n==0.\n" 805 | "\n" 806 | ">>> s = PatienceSequenceMatcher(None, \"abxcd\", \"abcd\")\n" 807 | ">>> s.get_matching_blocks()\n" 808 | "[(0, 0, 2), (3, 2, 2), (5, 4, 0)]\n"; 809 | 810 | static PyObject * 811 | PatienceSequenceMatcher_get_matching_blocks(PatienceSequenceMatcher* self) 812 | { 813 | PyObject *answer, *item; 814 | int res; 815 | Py_ssize_t i; 816 | struct matching_blocks matches; 817 | 818 | matches.count = 0; 819 | if (self->bsize > 0) { 820 | matches.matches = (struct matching_block *) 821 | guarded_malloc(sizeof(struct matching_block) * self->bsize); 822 | if (matches.matches == NULL) 823 | return PyErr_NoMemory(); 824 | } else 825 | matches.matches = NULL; 826 | 827 | res = recurse_matches(&matches, &self->hashtable, self->backpointers, 828 | self->a, self->b, 0, 0, 829 | self->asize, self->bsize, 10); 830 | if (!res) { 831 | free(matches.matches); 832 | return PyErr_NoMemory(); 833 | } 834 | 835 | answer = PyList_New(matches.count + 1); 836 | if (answer == NULL) { 837 | free(matches.matches); 838 | return NULL; 839 | } 840 | 841 | for (i = 0; i < matches.count; i++) { 842 | item = Py_BuildValue("nnn", matches.matches[i].a, 843 | matches.matches[i].b, matches.matches[i].len); 844 | if (item == NULL) 845 | goto error; 846 | if (PyList_SetItem(answer, i, item) != 0) 847 | goto error; 848 | } 849 | item = Py_BuildValue("nnn", self->asize, self->bsize, 0); 850 | if (item == NULL) 851 | goto error; 852 | if (PyList_SetItem(answer, i, item) != 0) 853 | goto error; 854 | 855 | free(matches.matches); 856 | return answer; 857 | 858 | error: 859 | free(matches.matches); 860 | Py_DECREF(answer); 861 | return NULL; 862 | } 863 | 864 | 865 | static char PatienceSequenceMatcher_get_opcodes_doc[] = 866 | "Return list of 5-tuples describing how to turn a into b.\n" 867 | "\n" 868 | "Each tuple is of the form (tag, i1, i2, j1, j2). The first tuple\n" 869 | "has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the\n" 870 | "tuple preceding it, and likewise for j1 == the previous j2.\n" 871 | "\n" 872 | "The tags are strings, with these meanings:\n" 873 | "\n" 874 | "'replace': a[i1:i2] should be replaced by b[j1:j2]\n" 875 | "'delete': a[i1:i2] should be deleted.\n" 876 | " Note that j1==j2 in this case.\n" 877 | "'insert': b[j1:j2] should be inserted at a[i1:i1].\n" 878 | " Note that i1==i2 in this case.\n" 879 | "'equal': a[i1:i2] == b[j1:j2]\n" 880 | "\n" 881 | ">>> a = \"qabxcd\"\n" 882 | ">>> b = \"abycdf\"\n" 883 | ">>> s = PatienceSequenceMatcher(None, a, b)\n" 884 | ">>> for tag, i1, i2, j1, j2 in s.get_opcodes():\n" 885 | "... print (\"%7s a[%d:%d] (%s) b[%d:%d] (%s)\" %\n" 886 | "... (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2]))\n" 887 | " delete a[0:1] (q) b[0:0] ()\n" 888 | " equal a[1:3] (ab) b[0:2] (ab)\n" 889 | "replace a[3:4] (x) b[2:3] (y)\n" 890 | " equal a[4:6] (cd) b[3:5] (cd)\n" 891 | " insert a[6:6] () b[5:6] (f)\n"; 892 | 893 | static PyObject * 894 | PatienceSequenceMatcher_get_opcodes(PatienceSequenceMatcher* self) 895 | { 896 | PyObject *answer, *item; 897 | Py_ssize_t i, j, k, ai, bj; 898 | int tag, res; 899 | struct matching_blocks matches; 900 | 901 | matches.count = 0; 902 | matches.matches = (struct matching_block *)guarded_malloc(sizeof(struct matching_block) * (self->bsize + 1)); 903 | if (matches.matches == NULL) 904 | return PyErr_NoMemory(); 905 | 906 | res = recurse_matches(&matches, &self->hashtable, self->backpointers, 907 | self->a, self->b, 0, 0, 908 | self->asize, self->bsize, 10); 909 | if (!res) { 910 | free(matches.matches); 911 | return PyErr_NoMemory(); 912 | } 913 | 914 | matches.matches[matches.count].a = self->asize; 915 | matches.matches[matches.count].b = self->bsize; 916 | matches.matches[matches.count].len = 0; 917 | matches.count++; 918 | 919 | answer = PyList_New(0); 920 | if (answer == NULL) { 921 | free(matches.matches); 922 | return NULL; 923 | } 924 | 925 | i = j = 0; 926 | for (k = 0; k < matches.count; k++) { 927 | ai = matches.matches[k].a; 928 | bj = matches.matches[k].b; 929 | 930 | tag = -1; 931 | if (i < ai && j < bj) 932 | tag = OP_REPLACE; 933 | else if (i < ai) 934 | tag = OP_DELETE; 935 | else if (j < bj) 936 | tag = OP_INSERT; 937 | 938 | if (tag != -1) { 939 | item = Py_BuildValue("snnnn", opcode_names[tag], i, ai, j, bj); 940 | if (item == NULL) 941 | goto error; 942 | if (PyList_Append(answer, item) != 0) 943 | goto error; 944 | } 945 | 946 | i = ai + matches.matches[k].len; 947 | j = bj + matches.matches[k].len; 948 | 949 | if (matches.matches[k].len > 0) { 950 | item = Py_BuildValue("snnnn", opcode_names[OP_EQUAL], ai, i, bj, j); 951 | if (item == NULL) 952 | goto error; 953 | if (PyList_Append(answer, item) != 0) 954 | goto error; 955 | } 956 | } 957 | 958 | free(matches.matches); 959 | return answer; 960 | 961 | error: 962 | free(matches.matches); 963 | Py_DECREF(answer); 964 | return NULL; 965 | } 966 | 967 | 968 | static char PatienceSequenceMatcher_get_grouped_opcodes_doc[] = 969 | "Isolate change clusters by eliminating ranges with no changes.\n" 970 | "\n" 971 | "Return a list of groups with upto n lines of context.\n" 972 | "Each group is in the same format as returned by get_opcodes().\n" 973 | "\n" 974 | ">>> from pprint import pprint\n" 975 | ">>> a = map(str, range(1,40))\n" 976 | ">>> b = a[:]\n" 977 | ">>> b[8:8] = ['i'] # Make an insertion\n" 978 | ">>> b[20] += 'x' # Make a replacement\n" 979 | ">>> b[23:28] = [] # Make a deletion\n" 980 | ">>> b[30] += 'y' # Make another replacement\n" 981 | ">>> pprint(PatienceSequenceMatcher(None,a,b).get_grouped_opcodes())\n" 982 | "[[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)],\n" 983 | " [('equal', 16, 19, 17, 20),\n" 984 | " ('replace', 19, 20, 20, 21),\n" 985 | " ('equal', 20, 22, 21, 23),\n" 986 | " ('delete', 22, 27, 23, 23),\n" 987 | " ('equal', 27, 30, 23, 26)],\n" 988 | " [('equal', 31, 34, 27, 30),\n" 989 | " ('replace', 34, 35, 30, 31),\n" 990 | " ('equal', 35, 38, 31, 34)]]\n"; 991 | 992 | static PyObject * 993 | PatienceSequenceMatcher_get_grouped_opcodes(PatienceSequenceMatcher* self, 994 | PyObject *args) 995 | { 996 | PyObject *answer, *group, *item; 997 | Py_ssize_t i, j, k, ai, bj, size, ncodes, tag; 998 | Py_ssize_t i1, i2, j1, j2; 999 | int n = 3, nn, res; 1000 | struct matching_blocks matches; 1001 | struct opcode *codes; 1002 | 1003 | if (!PyArg_ParseTuple(args, "|i", &n)) 1004 | return NULL; 1005 | 1006 | matches.count = 0; 1007 | matches.matches = (struct matching_block *)guarded_malloc(sizeof(struct matching_block) * (self->bsize + 1)); 1008 | if (matches.matches == NULL) 1009 | return PyErr_NoMemory(); 1010 | 1011 | res = recurse_matches(&matches, &self->hashtable, self->backpointers, 1012 | self->a, self->b, 0, 0, 1013 | self->asize, self->bsize, 10); 1014 | if (!res) { 1015 | free(matches.matches); 1016 | return PyErr_NoMemory(); 1017 | } 1018 | 1019 | matches.matches[matches.count].a = self->asize; 1020 | matches.matches[matches.count].b = self->bsize; 1021 | matches.matches[matches.count].len = 0; 1022 | matches.count++; 1023 | 1024 | ncodes = 0; 1025 | codes = (struct opcode *)guarded_malloc(sizeof(struct opcode) * matches.count * 2); 1026 | if (codes == NULL) { 1027 | free(matches.matches); 1028 | return PyErr_NoMemory(); 1029 | } 1030 | 1031 | i = j = 0; 1032 | for (k = 0; k < matches.count; k++) { 1033 | ai = matches.matches[k].a; 1034 | bj = matches.matches[k].b; 1035 | 1036 | tag = -1; 1037 | if (i < ai && j < bj) 1038 | tag = OP_REPLACE; 1039 | else if (i < ai) 1040 | tag = OP_DELETE; 1041 | else if (j < bj) 1042 | tag = OP_INSERT; 1043 | 1044 | if (tag != -1) { 1045 | codes[ncodes].tag = tag; 1046 | codes[ncodes].i1 = i; 1047 | codes[ncodes].i2 = ai; 1048 | codes[ncodes].j1 = j; 1049 | codes[ncodes].j2 = bj; 1050 | ncodes++; 1051 | } 1052 | 1053 | i = ai + matches.matches[k].len; 1054 | j = bj + matches.matches[k].len; 1055 | 1056 | if (matches.matches[k].len > 0) { 1057 | codes[ncodes].tag = OP_EQUAL; 1058 | codes[ncodes].i1 = ai; 1059 | codes[ncodes].i2 = i; 1060 | codes[ncodes].j1 = bj; 1061 | codes[ncodes].j2 = j; 1062 | ncodes++; 1063 | } 1064 | } 1065 | 1066 | if (ncodes == 0) { 1067 | codes[ncodes].tag = OP_EQUAL; 1068 | codes[ncodes].i1 = 0; 1069 | codes[ncodes].i2 = 1; 1070 | codes[ncodes].j1 = 0; 1071 | codes[ncodes].j2 = 1; 1072 | ncodes++; 1073 | } 1074 | 1075 | /* fixup leading and trailing groups if they show no changes. */ 1076 | if (codes[0].tag == OP_EQUAL) { 1077 | codes[0].i1 = MAX(codes[0].i1, codes[0].i2 - n); 1078 | codes[0].j1 = MAX(codes[0].j1, codes[0].j2 - n); 1079 | } 1080 | if (codes[ncodes - 1].tag == OP_EQUAL) { 1081 | codes[ncodes - 1].i2 = MIN(codes[ncodes - 1].i2, 1082 | codes[ncodes - 1].i1 + n); 1083 | codes[ncodes - 1].j2 = MIN(codes[ncodes - 1].j2, 1084 | codes[ncodes - 1].j1 + n); 1085 | } 1086 | 1087 | group = NULL; 1088 | 1089 | answer = PyList_New(0); 1090 | if (answer == NULL) 1091 | goto error; 1092 | 1093 | group = PyList_New(0); 1094 | if (group == NULL) 1095 | goto error; 1096 | 1097 | nn = n + n; 1098 | tag = -1; 1099 | for (i = 0; i < ncodes; i++) { 1100 | tag = codes[i].tag; 1101 | i1 = codes[i].i1; 1102 | i2 = codes[i].i2; 1103 | j1 = codes[i].j1; 1104 | j2 = codes[i].j2; 1105 | /* end the current group and start a new one whenever 1106 | there is a large range with no changes. */ 1107 | if (tag == OP_EQUAL && i2 - i1 > nn) { 1108 | item = Py_BuildValue("snnnn", opcode_names[tag], 1109 | i1, MIN(i2, i1 + n), j1, MIN(j2, j1 + n)); 1110 | if (item == NULL) 1111 | goto error; 1112 | if (PyList_Append(group, item) != 0) 1113 | goto error; 1114 | if (PyList_Append(answer, group) != 0) 1115 | goto error; 1116 | group = PyList_New(0); 1117 | if (group == NULL) 1118 | goto error; 1119 | i1 = MAX(i1, i2 - n); 1120 | j1 = MAX(j1, j2 - n); 1121 | } 1122 | item = Py_BuildValue("snnnn", opcode_names[tag], i1, i2, j1 ,j2); 1123 | if (item == NULL) 1124 | goto error; 1125 | if (PyList_Append(group, item) != 0) 1126 | goto error; 1127 | } 1128 | size = PyList_Size(group); 1129 | if (size > 0 && !(size == 1 && tag == OP_EQUAL)) { 1130 | if (PyList_Append(answer, group) != 0) 1131 | goto error; 1132 | } 1133 | else 1134 | Py_DECREF(group); 1135 | 1136 | free(codes); 1137 | free(matches.matches); 1138 | return answer; 1139 | 1140 | error: 1141 | free(codes); 1142 | free(matches.matches); 1143 | Py_DECREF(group); 1144 | Py_DECREF(answer); 1145 | return NULL; 1146 | } 1147 | 1148 | 1149 | static PyMethodDef PatienceSequenceMatcher_methods[] = { 1150 | {"get_matching_blocks", 1151 | (PyCFunction)PatienceSequenceMatcher_get_matching_blocks, 1152 | METH_NOARGS, 1153 | PatienceSequenceMatcher_get_matching_blocks_doc}, 1154 | {"get_opcodes", 1155 | (PyCFunction)PatienceSequenceMatcher_get_opcodes, 1156 | METH_NOARGS, 1157 | PatienceSequenceMatcher_get_opcodes_doc}, 1158 | {"get_grouped_opcodes", 1159 | (PyCFunction)PatienceSequenceMatcher_get_grouped_opcodes, 1160 | METH_VARARGS, 1161 | PatienceSequenceMatcher_get_grouped_opcodes_doc}, 1162 | {NULL} 1163 | }; 1164 | 1165 | 1166 | static char PatienceSequenceMatcher_doc[] = 1167 | "C implementation of PatienceSequenceMatcher"; 1168 | 1169 | 1170 | static PyTypeObject PatienceSequenceMatcherType = { 1171 | PyVarObject_HEAD_INIT(NULL, 0) 1172 | .tp_name = "PatienceSequenceMatcher", 1173 | .tp_basicsize = sizeof(PatienceSequenceMatcher), 1174 | .tp_dealloc = (destructor)PatienceSequenceMatcher_dealloc, 1175 | .tp_flags = Py_TPFLAGS_DEFAULT, 1176 | .tp_doc = PatienceSequenceMatcher_doc, 1177 | .tp_methods = PatienceSequenceMatcher_methods, 1178 | .tp_new = PatienceSequenceMatcher_new, 1179 | }; 1180 | 1181 | 1182 | static PyMethodDef cpatiencediff_methods[] = { 1183 | {"unique_lcs_c", py_unique_lcs, METH_VARARGS}, 1184 | {"recurse_matches_c", py_recurse_matches, METH_VARARGS}, 1185 | {NULL, NULL} 1186 | }; 1187 | 1188 | static PyObject * 1189 | moduleinit(void) { 1190 | PyObject* m; 1191 | 1192 | if (PyType_Ready(&PatienceSequenceMatcherType) < 0) 1193 | return NULL; 1194 | 1195 | #if PY_MAJOR_VERSION >= 3 1196 | static struct PyModuleDef moduledef = { 1197 | PyModuleDef_HEAD_INIT, 1198 | "_patiencediff_c", /* m_name */ 1199 | "C implementation of PatienceSequenceMatcher", /* m_doc */ 1200 | -1, /* m_size */ 1201 | cpatiencediff_methods, /* m_methods */ 1202 | NULL, /* m_reload */ 1203 | NULL, /* m_traverse */ 1204 | NULL, /* m_clear*/ 1205 | NULL, /* m_free */ 1206 | }; 1207 | 1208 | m = PyModule_Create(&moduledef); 1209 | #else 1210 | m = Py_InitModule3("_patiencediff_c", cpatiencediff_methods, 1211 | "C implementation of PatienceSequenceMatcher"); 1212 | #endif 1213 | if (m == NULL) 1214 | return NULL; 1215 | 1216 | Py_INCREF(&PatienceSequenceMatcherType); 1217 | PyModule_AddObject(m, "PatienceSequenceMatcher_c", 1218 | (PyObject *)&PatienceSequenceMatcherType); 1219 | return m; 1220 | } 1221 | 1222 | #if PY_MAJOR_VERSION >= 3 1223 | PyMODINIT_FUNC 1224 | PyInit__patiencediff_c(void) 1225 | { 1226 | return moduleinit(); 1227 | } 1228 | #else 1229 | PyMODINIT_FUNC 1230 | init_patiencediff_c(void) 1231 | { 1232 | moduleinit(); 1233 | } 1234 | #endif 1235 | 1236 | 1237 | /* vim: sw=4 et 1238 | */ 1239 | -------------------------------------------------------------------------------- /patiencediff/_patiencediff_c.pyi: -------------------------------------------------------------------------------- 1 | import difflib 2 | from typing import Any, Sequence 3 | 4 | class PatienceSequenceMatcher_c(difflib.SequenceMatcher): 5 | def get_matching_blocks(self) -> list[difflib.Match]: ... 6 | 7 | def unique_lcs_c( 8 | a: Sequence[Any], b: Sequence[Any] 9 | ) -> list[tuple[int, int]]: ... 10 | def recurse_matches_c( 11 | a: Sequence[Any], 12 | b: Sequence[Any], 13 | alo: int, 14 | blo: int, 15 | ahi: int, 16 | bhi: int, 17 | answer: list[tuple[int, int]], 18 | maxrecursion: int, 19 | ) -> None: ... 20 | -------------------------------------------------------------------------------- /patiencediff/_patiencediff_py.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2005 Bram Cohen, Copyright (C) 2005, 2006 Canonical Ltd 2 | # 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program; if not, write to the Free Software 15 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 16 | 17 | import difflib 18 | from bisect import bisect 19 | from typing import Any, Dict, List, Optional, Sequence, Tuple 20 | 21 | 22 | class MaxRecursionDepth(Exception): 23 | def __init__(self) -> None: 24 | super().__init__("max recursion depth reached") 25 | 26 | 27 | def unique_lcs_py(a: Sequence[Any], b: Sequence[Any]) -> List[Tuple[int, int]]: 28 | """Find the longest common subset for unique lines. 29 | 30 | :param a: An indexable object (such as string or list of strings) 31 | :param b: Another indexable object (such as string or list of strings) 32 | :return: A list of tuples, one for each line which is matched. 33 | [(line_in_a, line_in_b), ...] 34 | 35 | This only matches lines which are unique on both sides. 36 | This helps prevent common lines from over influencing match 37 | results. 38 | The longest common subset uses the Patience Sorting algorithm: 39 | http://en.wikipedia.org/wiki/Patience_sorting 40 | """ 41 | line: Any 42 | # set index[line in a] = position of line in a unless 43 | # a is a duplicate, in which case it's set to None 44 | index: Dict[Any, Optional[int]] = {} 45 | for i, line in enumerate(a): 46 | if line in index: 47 | index[line] = None 48 | else: 49 | index[line] = i 50 | # make btoa[i] = position of line i in a, unless 51 | # that line doesn't occur exactly once in both, 52 | # in which case it's set to None 53 | btoa: List[Optional[int]] = [None] * len(b) 54 | index2: Dict[Any, int] = {} 55 | for pos, line in enumerate(b): 56 | next = index.get(line) 57 | if next is not None: 58 | if line in index2: 59 | # unset the previous mapping, which we now know to 60 | # be invalid because the line isn't unique 61 | btoa[index2[line]] = None 62 | del index[line] 63 | else: 64 | index2[line] = pos 65 | btoa[pos] = next 66 | # this is the Patience sorting algorithm 67 | # see http://en.wikipedia.org/wiki/Patience_sorting 68 | backpointers: List[Optional[int]] = [None] * len(b) 69 | stacks: List[int] = [] 70 | lasts: List[int] = [] 71 | k: int = 0 72 | for bpos, apos in enumerate(btoa): 73 | if apos is None: 74 | continue 75 | # as an optimization, check if the next line comes at the end, 76 | # because it usually does 77 | if stacks and stacks[-1] < apos: 78 | k = len(stacks) 79 | # as an optimization, check if the next line comes right after 80 | # the previous line, because usually it does 81 | elif ( 82 | stacks 83 | and stacks[k] < apos 84 | and (k == len(stacks) - 1 or stacks[k + 1] > apos) 85 | ): 86 | k += 1 87 | else: 88 | k = bisect(stacks, apos) 89 | if k > 0: 90 | backpointers[bpos] = lasts[k - 1] 91 | if k < len(stacks): 92 | stacks[k] = apos 93 | lasts[k] = bpos 94 | else: 95 | stacks.append(apos) 96 | lasts.append(bpos) 97 | if len(lasts) == 0: 98 | return [] 99 | result = [] 100 | m: Optional[int] = lasts[-1] 101 | while m is not None: 102 | result.append((btoa[m], m)) 103 | m = backpointers[m] 104 | result.reverse() 105 | return result # type: ignore 106 | 107 | 108 | def recurse_matches_py( 109 | a: Sequence[Any], 110 | b: Sequence[Any], 111 | alo: int, 112 | blo: int, 113 | ahi: int, 114 | bhi: int, 115 | answer: List[Tuple[int, int]], 116 | maxrecursion: int, 117 | ) -> None: 118 | """Find all of the matching text in the lines of a and b. 119 | 120 | :param a: A sequence 121 | :param b: Another sequence 122 | :param alo: The start location of a to check, typically 0 123 | :param ahi: The start location of b to check, typically 0 124 | :param ahi: The maximum length of a to check, typically len(a) 125 | :param bhi: The maximum length of b to check, typically len(b) 126 | :param answer: The return array. Will be filled with tuples 127 | indicating [(line_in_a, line_in_b)] 128 | :param maxrecursion: The maximum depth to recurse. 129 | Must be a positive integer. 130 | :return: None, the return value is in the parameter answer, which 131 | should be a list 132 | 133 | """ 134 | if maxrecursion < 0: 135 | # this will never happen normally, this check is to prevent DOS attacks 136 | raise MaxRecursionDepth() 137 | oldlength = len(answer) 138 | if alo == ahi or blo == bhi: 139 | return 140 | last_a_pos = alo - 1 141 | last_b_pos = blo - 1 142 | for apos, bpos in unique_lcs_py(a[alo:ahi], b[blo:bhi]): 143 | # recurse between lines which are unique in each file and match 144 | apos += alo 145 | bpos += blo 146 | # Most of the time, you will have a sequence of similar entries 147 | if last_a_pos + 1 != apos or last_b_pos + 1 != bpos: 148 | recurse_matches_py( 149 | a, 150 | b, 151 | last_a_pos + 1, 152 | last_b_pos + 1, 153 | apos, 154 | bpos, 155 | answer, 156 | maxrecursion - 1, 157 | ) 158 | last_a_pos = apos 159 | last_b_pos = bpos 160 | answer.append((apos, bpos)) 161 | if len(answer) > oldlength: 162 | # find matches between the last match and the end 163 | recurse_matches_py( 164 | a, 165 | b, 166 | last_a_pos + 1, 167 | last_b_pos + 1, 168 | ahi, 169 | bhi, 170 | answer, 171 | maxrecursion - 1, 172 | ) 173 | elif a[alo] == b[blo]: 174 | # find matching lines at the very beginning 175 | while alo < ahi and blo < bhi and a[alo] == b[blo]: 176 | answer.append((alo, blo)) 177 | alo += 1 178 | blo += 1 179 | recurse_matches_py(a, b, alo, blo, ahi, bhi, answer, maxrecursion - 1) 180 | elif a[ahi - 1] == b[bhi - 1]: 181 | # find matching lines at the very end 182 | nahi = ahi - 1 183 | nbhi = bhi - 1 184 | while nahi > alo and nbhi > blo and a[nahi - 1] == b[nbhi - 1]: 185 | nahi -= 1 186 | nbhi -= 1 187 | recurse_matches_py( 188 | a, 189 | b, 190 | last_a_pos + 1, 191 | last_b_pos + 1, 192 | nahi, 193 | nbhi, 194 | answer, 195 | maxrecursion - 1, 196 | ) 197 | for i in range(ahi - nahi): 198 | answer.append((nahi + i, nbhi + i)) 199 | 200 | 201 | def _collapse_sequences(matches): 202 | """Find sequences of lines. 203 | 204 | Given a sequence of [(line_in_a, line_in_b),] 205 | find regions where they both increment at the same time 206 | """ 207 | answer = [] 208 | start_a = start_b = None 209 | length = 0 210 | for i_a, i_b in matches: 211 | if ( 212 | start_a is not None 213 | and (i_a == start_a + length) 214 | and (i_b == start_b + length) 215 | ): 216 | length += 1 217 | else: 218 | if start_a is not None: 219 | answer.append((start_a, start_b, length)) 220 | start_a = i_a 221 | start_b = i_b 222 | length = 1 223 | 224 | if length != 0: 225 | answer.append((start_a, start_b, length)) 226 | 227 | return answer 228 | 229 | 230 | def _check_consistency(answer): 231 | # For consistency sake, make sure all matches are only increasing 232 | next_a = -1 233 | next_b = -1 234 | for a, b, match_len in answer: 235 | if a < next_a: 236 | raise ValueError("Non increasing matches for a") 237 | if b < next_b: 238 | raise ValueError("Non increasing matches for b") 239 | next_a = a + match_len 240 | next_b = b + match_len 241 | 242 | 243 | class PatienceSequenceMatcher_py(difflib.SequenceMatcher): 244 | """Compare a pair of sequences using longest common subset.""" 245 | 246 | _do_check_consistency = True 247 | 248 | def __init__(self, isjunk=None, a="", b="") -> None: 249 | if isjunk is not None: 250 | raise NotImplementedError( 251 | "Currently we do not support isjunk for sequence matching" 252 | ) 253 | difflib.SequenceMatcher.__init__(self, isjunk, a, b) 254 | 255 | def get_matching_blocks(self): 256 | """Return list of triples describing matching subsequences. 257 | 258 | Each triple is of the form (i, j, n), and means that 259 | a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in 260 | i and in j. 261 | 262 | The last triple is a dummy, (len(a), len(b), 0), and is the only 263 | triple with n==0. 264 | 265 | >>> s = PatienceSequenceMatcher(None, "abxcd", "abcd") 266 | >>> s.get_matching_blocks() 267 | [(0, 0, 2), (3, 2, 2), (5, 4, 0)] 268 | """ 269 | # jam 20060525 This is the python 2.4.1 difflib get_matching_blocks 270 | # implementation which uses __helper. 2.4.3 got rid of helper for 271 | # doing it inline with a queue. 272 | # We should consider doing the same for recurse_matches 273 | 274 | if self.matching_blocks is not None: 275 | return self.matching_blocks 276 | 277 | matches = [] 278 | recurse_matches_py( 279 | self.a, self.b, 0, 0, len(self.a), len(self.b), matches, 10 280 | ) 281 | # Matches now has individual line pairs of 282 | # line A matches line B, at the given offsets 283 | self.matching_blocks = _collapse_sequences(matches) 284 | self.matching_blocks.append((len(self.a), len(self.b), 0)) 285 | if PatienceSequenceMatcher_py._do_check_consistency: 286 | if __debug__: 287 | _check_consistency(self.matching_blocks) 288 | 289 | return self.matching_blocks 290 | -------------------------------------------------------------------------------- /patiencediff/_patiencediff_rs.pyi: -------------------------------------------------------------------------------- 1 | import difflib 2 | from typing import Any, Callable, Literal, Sequence, TypeVar 3 | 4 | T = TypeVar("T") 5 | 6 | class PatienceSequenceMatcher_rs(difflib.SequenceMatcher): 7 | def __init__( 8 | self, junk: Callable[[T], bool] | None, a: Sequence[T], b: Sequence[T] 9 | ) -> None: ... 10 | def get_matching_blocks(self) -> list[difflib.Match]: ... 11 | def get_opcodes( 12 | self, 13 | ) -> list[ 14 | tuple[ 15 | Literal["replace", "delete", "insert", "equal"], int, int, int, int 16 | ] 17 | ]: ... 18 | def get_grouped_opcodes( 19 | self, n: int = 3 20 | ) -> list[list[tuple[str, int, int, int, int]]]: ... 21 | 22 | def unique_lcs_rs( 23 | a: Sequence[Any], b: Sequence[Any] 24 | ) -> list[tuple[int, int]]: ... 25 | def recurse_matches_rs( 26 | a: Sequence[Any], 27 | b: Sequence[Any], 28 | alo: int, 29 | blo: int, 30 | ahi: int, 31 | bhi: int, 32 | answer: list[tuple[int, int]], 33 | maxrecursion: int, 34 | ) -> None: ... 35 | -------------------------------------------------------------------------------- /patiencediff/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/breezy-team/patiencediff/fff9527aae89dfaf249fc68b82516facb1350ce2/patiencediff/py.typed -------------------------------------------------------------------------------- /patiencediff/test_patiencediff.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2005, 2006, 2007 Canonical Ltd 2 | # Copyright (C) 2021-2023 Jelmer Vernooij 3 | # 4 | # This program is free software; you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation; either version 2 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with this program; if not, write to the Free Software 16 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | 18 | import os 19 | import shutil 20 | import tempfile 21 | import unittest 22 | 23 | import patiencediff 24 | 25 | from . import _patiencediff_py 26 | 27 | 28 | class TestPatienceDiffLib(unittest.TestCase): 29 | def setUp(self): 30 | super().setUp() 31 | self._unique_lcs = _patiencediff_py.unique_lcs_py 32 | self._recurse_matches = _patiencediff_py.recurse_matches_py 33 | self._PatienceSequenceMatcher = ( 34 | _patiencediff_py.PatienceSequenceMatcher_py 35 | ) 36 | 37 | def test_diff_unicode_string(self): 38 | a = "".join([chr(i) for i in range(4000, 4500, 3)]) 39 | b = "".join([chr(i) for i in range(4300, 4800, 2)]) 40 | sm = self._PatienceSequenceMatcher(None, a, b) 41 | mb = sm.get_matching_blocks() 42 | self.assertEqual(35, len(mb)) 43 | 44 | def test_unique_lcs(self): 45 | unique_lcs = self._unique_lcs 46 | self.assertEqual(unique_lcs("", ""), []) 47 | self.assertEqual(unique_lcs("", "a"), []) 48 | self.assertEqual(unique_lcs("a", ""), []) 49 | self.assertEqual(unique_lcs("a", "a"), [(0, 0)]) 50 | self.assertEqual(unique_lcs("a", "b"), []) 51 | self.assertEqual(unique_lcs("ab", "ab"), [(0, 0), (1, 1)]) 52 | self.assertEqual( 53 | unique_lcs("abcde", "cdeab"), [(2, 0), (3, 1), (4, 2)] 54 | ) 55 | self.assertEqual( 56 | unique_lcs("cdeab", "abcde"), [(0, 2), (1, 3), (2, 4)] 57 | ) 58 | self.assertEqual( 59 | unique_lcs("abXde", "abYde"), [(0, 0), (1, 1), (3, 3), (4, 4)] 60 | ) 61 | self.assertEqual(unique_lcs("acbac", "abc"), [(2, 1)]) 62 | 63 | def test_recurse_matches(self): 64 | def test_one(a, b, matches): 65 | test_matches = [] 66 | self._recurse_matches(a, b, 0, 0, len(a), len(b), test_matches, 10) 67 | self.assertEqual(test_matches, matches) 68 | 69 | test_one( 70 | ["a", "", "b", "", "c"], 71 | ["a", "a", "b", "c", "c"], 72 | [(0, 0), (2, 2), (4, 4)], 73 | ) 74 | test_one( 75 | ["a", "c", "b", "a", "c"], 76 | ["a", "b", "c"], 77 | [(0, 0), (2, 1), (4, 2)], 78 | ) 79 | # Even though 'bc' is not unique globally, and is surrounded by 80 | # non-matching lines, we should still match, because they are locally 81 | # unique 82 | test_one( 83 | "abcdbce", 84 | "afbcgdbce", 85 | [(0, 0), (1, 2), (2, 3), (3, 5), (4, 6), (5, 7), (6, 8)], 86 | ) 87 | 88 | # recurse_matches doesn't match non-unique 89 | # lines surrounded by bogus text. 90 | # The update has been done in patiencediff.SequenceMatcher instead 91 | 92 | # This is what it could be 93 | # test_one('aBccDe', 'abccde', [(0,0), (2,2), (3,3), (5,5)]) 94 | 95 | # This is what it currently gives: 96 | test_one("aBccDe", "abccde", [(0, 0), (5, 5)]) 97 | 98 | def assertDiffBlocks(self, a, b, expected_blocks): 99 | """Check that the sequence matcher returns the correct blocks. 100 | 101 | :param a: A sequence to match 102 | :param b: Another sequence to match 103 | :param expected_blocks: The expected output, not including the final 104 | matching block (len(a), len(b), 0) 105 | """ 106 | matcher = self._PatienceSequenceMatcher(None, a, b) 107 | blocks = matcher.get_matching_blocks() 108 | last = blocks.pop() 109 | self.assertEqual((len(a), len(b), 0), last) 110 | self.assertEqual(expected_blocks, blocks) 111 | 112 | def test_matching_blocks(self): 113 | # Some basic matching tests 114 | self.assertDiffBlocks("", "", []) 115 | self.assertDiffBlocks([], [], []) 116 | self.assertDiffBlocks("abc", "", []) 117 | self.assertDiffBlocks("", "abc", []) 118 | self.assertDiffBlocks("abcd", "abcd", [(0, 0, 4)]) 119 | self.assertDiffBlocks("abcd", "abce", [(0, 0, 3)]) 120 | self.assertDiffBlocks("eabc", "abce", [(1, 0, 3)]) 121 | self.assertDiffBlocks("eabce", "abce", [(1, 0, 4)]) 122 | self.assertDiffBlocks("abcde", "abXde", [(0, 0, 2), (3, 3, 2)]) 123 | self.assertDiffBlocks("abcde", "abXYZde", [(0, 0, 2), (3, 5, 2)]) 124 | self.assertDiffBlocks("abde", "abXYZde", [(0, 0, 2), (2, 5, 2)]) 125 | # This may check too much, but it checks to see that 126 | # a copied block stays attached to the previous section, 127 | # not the later one. 128 | # difflib would tend to grab the trailing longest match 129 | # which would make the diff not look right 130 | self.assertDiffBlocks( 131 | "abcdefghijklmnop", 132 | "abcdefxydefghijklmnop", 133 | [(0, 0, 6), (6, 11, 10)], 134 | ) 135 | 136 | # make sure it supports passing in lists 137 | self.assertDiffBlocks( 138 | ["hello there\n", "world\n", "how are you today?\n"], 139 | ["hello there\n", "how are you today?\n"], 140 | [(0, 0, 1), (2, 1, 1)], 141 | ) 142 | 143 | # non unique lines surrounded by non-matching lines 144 | # won't be found 145 | self.assertDiffBlocks("aBccDe", "abccde", [(0, 0, 1), (5, 5, 1)]) 146 | 147 | # But they only need to be locally unique 148 | self.assertDiffBlocks( 149 | "aBcDec", "abcdec", [(0, 0, 1), (2, 2, 1), (4, 4, 2)] 150 | ) 151 | 152 | # non unique blocks won't be matched 153 | self.assertDiffBlocks("aBcdEcdFg", "abcdecdfg", [(0, 0, 1), (8, 8, 1)]) 154 | 155 | # but locally unique ones will 156 | self.assertDiffBlocks( 157 | "aBcdEeXcdFg", 158 | "abcdecdfg", 159 | [(0, 0, 1), (2, 2, 2), (5, 4, 1), (7, 5, 2), (10, 8, 1)], 160 | ) 161 | 162 | self.assertDiffBlocks("abbabbXd", "cabbabxd", [(7, 7, 1)]) 163 | self.assertDiffBlocks("abbabbbb", "cabbabbc", []) 164 | self.assertDiffBlocks("bbbbbbbb", "cbbbbbbc", []) 165 | 166 | def test_matching_blocks_tuples(self): 167 | # Some basic matching tests 168 | self.assertDiffBlocks([], [], []) 169 | self.assertDiffBlocks([("a",), ("b",), ("c,")], [], []) 170 | self.assertDiffBlocks([], [("a",), ("b",), ("c,")], []) 171 | self.assertDiffBlocks( 172 | [("a",), ("b",), ("c,")], [("a",), ("b",), ("c,")], [(0, 0, 3)] 173 | ) 174 | self.assertDiffBlocks( 175 | [("a",), ("b",), ("c,")], [("a",), ("b",), ("d,")], [(0, 0, 2)] 176 | ) 177 | self.assertDiffBlocks( 178 | [("d",), ("b",), ("c,")], [("a",), ("b",), ("c,")], [(1, 1, 2)] 179 | ) 180 | self.assertDiffBlocks( 181 | [("d",), ("a",), ("b",), ("c,")], 182 | [("a",), ("b",), ("c,")], 183 | [(1, 0, 3)], 184 | ) 185 | self.assertDiffBlocks( 186 | [("a", "b"), ("c", "d"), ("e", "f")], 187 | [("a", "b"), ("c", "X"), ("e", "f")], 188 | [(0, 0, 1), (2, 2, 1)], 189 | ) 190 | self.assertDiffBlocks( 191 | [("a", "b"), ("c", "d"), ("e", "f")], 192 | [("a", "b"), ("c", "dX"), ("e", "f")], 193 | [(0, 0, 1), (2, 2, 1)], 194 | ) 195 | 196 | def test_opcodes(self): 197 | def chk_ops(a, b, expected_codes): 198 | s = self._PatienceSequenceMatcher(None, a, b) 199 | self.assertEqual(expected_codes, s.get_opcodes()) 200 | 201 | chk_ops("", "", []) 202 | chk_ops([], [], []) 203 | chk_ops("abc", "", [("delete", 0, 3, 0, 0)]) 204 | chk_ops("", "abc", [("insert", 0, 0, 0, 3)]) 205 | chk_ops("abcd", "abcd", [("equal", 0, 4, 0, 4)]) 206 | chk_ops( 207 | "abcd", "abce", [("equal", 0, 3, 0, 3), ("replace", 3, 4, 3, 4)] 208 | ) 209 | chk_ops( 210 | "eabc", 211 | "abce", 212 | [ 213 | ("delete", 0, 1, 0, 0), 214 | ("equal", 1, 4, 0, 3), 215 | ("insert", 4, 4, 3, 4), 216 | ], 217 | ) 218 | chk_ops( 219 | "eabce", "abce", [("delete", 0, 1, 0, 0), ("equal", 1, 5, 0, 4)] 220 | ) 221 | chk_ops( 222 | "abcde", 223 | "abXde", 224 | [ 225 | ("equal", 0, 2, 0, 2), 226 | ("replace", 2, 3, 2, 3), 227 | ("equal", 3, 5, 3, 5), 228 | ], 229 | ) 230 | chk_ops( 231 | "abcde", 232 | "abXYZde", 233 | [ 234 | ("equal", 0, 2, 0, 2), 235 | ("replace", 2, 3, 2, 5), 236 | ("equal", 3, 5, 5, 7), 237 | ], 238 | ) 239 | chk_ops( 240 | "abde", 241 | "abXYZde", 242 | [ 243 | ("equal", 0, 2, 0, 2), 244 | ("insert", 2, 2, 2, 5), 245 | ("equal", 2, 4, 5, 7), 246 | ], 247 | ) 248 | chk_ops( 249 | "abcdefghijklmnop", 250 | "abcdefxydefghijklmnop", 251 | [ 252 | ("equal", 0, 6, 0, 6), 253 | ("insert", 6, 6, 6, 11), 254 | ("equal", 6, 16, 11, 21), 255 | ], 256 | ) 257 | chk_ops( 258 | ["hello there\n", "world\n", "how are you today?\n"], 259 | ["hello there\n", "how are you today?\n"], 260 | [ 261 | ("equal", 0, 1, 0, 1), 262 | ("delete", 1, 2, 1, 1), 263 | ("equal", 2, 3, 1, 2), 264 | ], 265 | ) 266 | chk_ops( 267 | "aBccDe", 268 | "abccde", 269 | [ 270 | ("equal", 0, 1, 0, 1), 271 | ("replace", 1, 5, 1, 5), 272 | ("equal", 5, 6, 5, 6), 273 | ], 274 | ) 275 | chk_ops( 276 | "aBcDec", 277 | "abcdec", 278 | [ 279 | ("equal", 0, 1, 0, 1), 280 | ("replace", 1, 2, 1, 2), 281 | ("equal", 2, 3, 2, 3), 282 | ("replace", 3, 4, 3, 4), 283 | ("equal", 4, 6, 4, 6), 284 | ], 285 | ) 286 | chk_ops( 287 | "aBcdEcdFg", 288 | "abcdecdfg", 289 | [ 290 | ("equal", 0, 1, 0, 1), 291 | ("replace", 1, 8, 1, 8), 292 | ("equal", 8, 9, 8, 9), 293 | ], 294 | ) 295 | chk_ops( 296 | "aBcdEeXcdFg", 297 | "abcdecdfg", 298 | [ 299 | ("equal", 0, 1, 0, 1), 300 | ("replace", 1, 2, 1, 2), 301 | ("equal", 2, 4, 2, 4), 302 | ("delete", 4, 5, 4, 4), 303 | ("equal", 5, 6, 4, 5), 304 | ("delete", 6, 7, 5, 5), 305 | ("equal", 7, 9, 5, 7), 306 | ("replace", 9, 10, 7, 8), 307 | ("equal", 10, 11, 8, 9), 308 | ], 309 | ) 310 | 311 | def test_grouped_opcodes(self): 312 | def chk_ops(a, b, expected_codes, n=3): 313 | s = self._PatienceSequenceMatcher(None, a, b) 314 | self.assertEqual(expected_codes, list(s.get_grouped_opcodes(n))) 315 | 316 | chk_ops("", "", []) 317 | chk_ops([], [], []) 318 | chk_ops("abc", "", [[("delete", 0, 3, 0, 0)]]) 319 | chk_ops("", "abc", [[("insert", 0, 0, 0, 3)]]) 320 | chk_ops("abcd", "abcd", []) 321 | chk_ops( 322 | "abcd", "abce", [[("equal", 0, 3, 0, 3), ("replace", 3, 4, 3, 4)]] 323 | ) 324 | chk_ops( 325 | "eabc", 326 | "abce", 327 | [ 328 | [ 329 | ("delete", 0, 1, 0, 0), 330 | ("equal", 1, 4, 0, 3), 331 | ("insert", 4, 4, 3, 4), 332 | ] 333 | ], 334 | ) 335 | chk_ops( 336 | "abcdefghijklmnop", 337 | "abcdefxydefghijklmnop", 338 | [ 339 | [ 340 | ("equal", 3, 6, 3, 6), 341 | ("insert", 6, 6, 6, 11), 342 | ("equal", 6, 9, 11, 14), 343 | ] 344 | ], 345 | ) 346 | chk_ops( 347 | "abcdefghijklmnop", 348 | "abcdefxydefghijklmnop", 349 | [ 350 | [ 351 | ("equal", 2, 6, 2, 6), 352 | ("insert", 6, 6, 6, 11), 353 | ("equal", 6, 10, 11, 15), 354 | ] 355 | ], 356 | 4, 357 | ) 358 | chk_ops( 359 | "Xabcdef", 360 | "abcdef", 361 | [[("delete", 0, 1, 0, 0), ("equal", 1, 4, 0, 3)]], 362 | ) 363 | chk_ops( 364 | "abcdef", 365 | "abcdefX", 366 | [[("equal", 3, 6, 3, 6), ("insert", 6, 6, 6, 7)]], 367 | ) 368 | 369 | def test_multiple_ranges(self): 370 | # There was an earlier bug where we used a bad set of ranges, 371 | # this triggers that specific bug, to make sure it doesn't regress 372 | self.assertDiffBlocks( 373 | "abcdefghijklmnop", 374 | "abcXghiYZQRSTUVWXYZijklmnop", 375 | [(0, 0, 3), (6, 4, 3), (9, 20, 7)], 376 | ) 377 | 378 | self.assertDiffBlocks( 379 | "ABCd efghIjk L", 380 | "AxyzBCn mo pqrstuvwI1 2 L", 381 | [(0, 0, 1), (1, 4, 2), (9, 19, 1), (12, 23, 3)], 382 | ) 383 | 384 | # These are rot13 code snippets. 385 | self.assertDiffBlocks( 386 | '''\ 387 | trg nqqrq jura lbh nqq n svyr va gur qverpgbel. 388 | """ 389 | gnxrf_netf = ['svyr*'] 390 | gnxrf_bcgvbaf = ['ab-erphefr'] 391 | 392 | qrs eha(frys, svyr_yvfg, ab_erphefr=Snyfr): 393 | sebz omeyvo.nqq vzcbeg fzneg_nqq, nqq_ercbegre_cevag, nqq_ercbegre_ahyy 394 | vs vf_dhvrg(): 395 | ercbegre = nqq_ercbegre_ahyy 396 | ryfr: 397 | ercbegre = nqq_ercbegre_cevag 398 | fzneg_nqq(svyr_yvfg, abg ab_erphefr, ercbegre) 399 | 400 | 401 | pynff pzq_zxqve(Pbzznaq): 402 | '''.splitlines(True), 403 | '''\ 404 | trg nqqrq jura lbh nqq n svyr va gur qverpgbel. 405 | 406 | --qel-eha jvyy fubj juvpu svyrf jbhyq or nqqrq, ohg abg npghnyyl 407 | nqq gurz. 408 | """ 409 | gnxrf_netf = ['svyr*'] 410 | gnxrf_bcgvbaf = ['ab-erphefr', 'qel-eha'] 411 | 412 | qrs eha(frys, svyr_yvfg, ab_erphefr=Snyfr, qel_eha=Snyfr): 413 | vzcbeg omeyvo.nqq 414 | 415 | vs qel_eha: 416 | vs vf_dhvrg(): 417 | # Guvf vf cbvagyrff, ohg V'q engure abg envfr na reebe 418 | npgvba = omeyvo.nqq.nqq_npgvba_ahyy 419 | ryfr: 420 | npgvba = omeyvo.nqq.nqq_npgvba_cevag 421 | ryvs vf_dhvrg(): 422 | npgvba = omeyvo.nqq.nqq_npgvba_nqq 423 | ryfr: 424 | npgvba = omeyvo.nqq.nqq_npgvba_nqq_naq_cevag 425 | 426 | omeyvo.nqq.fzneg_nqq(svyr_yvfg, abg ab_erphefr, npgvba) 427 | 428 | 429 | pynff pzq_zxqve(Pbzznaq): 430 | '''.splitlines(True), 431 | [(0, 0, 1), (1, 4, 2), (9, 19, 1), (12, 23, 3)], 432 | ) 433 | 434 | def test_patience_unified_diff(self): 435 | txt_a = ["hello there\n", "world\n", "how are you today?\n"] 436 | txt_b = ["hello there\n", "how are you today?\n"] 437 | unified_diff = patiencediff.unified_diff 438 | psm = self._PatienceSequenceMatcher 439 | self.assertEqual( 440 | [ 441 | "--- \n", 442 | "+++ \n", 443 | "@@ -1,3 +1,2 @@\n", 444 | " hello there\n", 445 | "-world\n", 446 | " how are you today?\n", 447 | ], 448 | list(unified_diff(txt_a, txt_b, sequencematcher=psm)), 449 | ) 450 | txt_a = [x + "\n" for x in "abcdefghijklmnop"] 451 | txt_b = [x + "\n" for x in "abcdefxydefghijklmnop"] 452 | # This is the result with LongestCommonSubstring matching 453 | self.assertEqual( 454 | [ 455 | "--- \n", 456 | "+++ \n", 457 | "@@ -1,6 +1,11 @@\n", 458 | " a\n", 459 | " b\n", 460 | " c\n", 461 | "+d\n", 462 | "+e\n", 463 | "+f\n", 464 | "+x\n", 465 | "+y\n", 466 | " d\n", 467 | " e\n", 468 | " f\n", 469 | ], 470 | list(unified_diff(txt_a, txt_b)), 471 | ) 472 | # And the patience diff 473 | self.assertEqual( 474 | [ 475 | "--- \n", 476 | "+++ \n", 477 | "@@ -4,6 +4,11 @@\n", 478 | " d\n", 479 | " e\n", 480 | " f\n", 481 | "+x\n", 482 | "+y\n", 483 | "+d\n", 484 | "+e\n", 485 | "+f\n", 486 | " g\n", 487 | " h\n", 488 | " i\n", 489 | ], 490 | list(unified_diff(txt_a, txt_b, sequencematcher=psm)), 491 | ) 492 | 493 | def test_patience_unified_diff_with_dates(self): 494 | txt_a = ["hello there\n", "world\n", "how are you today?\n"] 495 | txt_b = ["hello there\n", "how are you today?\n"] 496 | unified_diff = patiencediff.unified_diff 497 | psm = self._PatienceSequenceMatcher 498 | self.assertEqual( 499 | [ 500 | "--- a\t2008-08-08\n", 501 | "+++ b\t2008-09-09\n", 502 | "@@ -1,3 +1,2 @@\n", 503 | " hello there\n", 504 | "-world\n", 505 | " how are you today?\n", 506 | ], 507 | list( 508 | unified_diff( 509 | txt_a, 510 | txt_b, 511 | fromfile="a", 512 | tofile="b", 513 | fromfiledate="2008-08-08", 514 | tofiledate="2008-09-09", 515 | sequencematcher=psm, 516 | ) 517 | ), 518 | ) 519 | 520 | 521 | class TestPatienceDiffLibFiles(unittest.TestCase): 522 | def setUp(self): 523 | super().setUp() 524 | self._PatienceSequenceMatcher = ( 525 | _patiencediff_py.PatienceSequenceMatcher_py 526 | ) 527 | self.test_dir = tempfile.mkdtemp() 528 | self.addCleanup(lambda: shutil.rmtree(self.test_dir)) 529 | 530 | def test_patience_unified_diff_files(self): 531 | txt_a = [b"hello there\n", b"world\n", b"how are you today?\n"] 532 | txt_b = [b"hello there\n", b"how are you today?\n"] 533 | with open(os.path.join(self.test_dir, "a1"), "wb") as f: 534 | f.writelines(txt_a) 535 | with open(os.path.join(self.test_dir, "b1"), "wb") as f: 536 | f.writelines(txt_b) 537 | 538 | unified_diff_files = patiencediff.unified_diff_files 539 | psm = self._PatienceSequenceMatcher 540 | 541 | old_pwd = os.getcwd() 542 | os.chdir(self.test_dir) 543 | try: 544 | self.assertEqual( 545 | [ 546 | "--- a1\n", 547 | "+++ b1\n", 548 | "@@ -1,3 +1,2 @@\n", 549 | " hello there\n", 550 | "-world\n", 551 | " how are you today?\n", 552 | ], 553 | list(unified_diff_files("a1", "b1", sequencematcher=psm)), 554 | ) 555 | finally: 556 | os.chdir(old_pwd) 557 | 558 | txt_a = [x + "\n" for x in "abcdefghijklmnop"] 559 | txt_b = [x + "\n" for x in "abcdefxydefghijklmnop"] 560 | with open(os.path.join(self.test_dir, "a2"), "w") as f: 561 | f.writelines(txt_a) 562 | with open(os.path.join(self.test_dir, "b2"), "w") as f: 563 | f.writelines(txt_b) 564 | 565 | # This is the result with LongestCommonSubstring matching 566 | os.chdir(self.test_dir) 567 | try: 568 | self.assertEqual( 569 | [ 570 | "--- a2\n", 571 | "+++ b2\n", 572 | "@@ -1,6 +1,11 @@\n", 573 | " a\n", 574 | " b\n", 575 | " c\n", 576 | "+d\n", 577 | "+e\n", 578 | "+f\n", 579 | "+x\n", 580 | "+y\n", 581 | " d\n", 582 | " e\n", 583 | " f\n", 584 | ], 585 | list(unified_diff_files("a2", "b2")), 586 | ) 587 | 588 | # And the patience diff 589 | self.assertEqual( 590 | [ 591 | "--- a2\n", 592 | "+++ b2\n", 593 | "@@ -4,6 +4,11 @@\n", 594 | " d\n", 595 | " e\n", 596 | " f\n", 597 | "+x\n", 598 | "+y\n", 599 | "+d\n", 600 | "+e\n", 601 | "+f\n", 602 | " g\n", 603 | " h\n", 604 | " i\n", 605 | ], 606 | list(unified_diff_files("a2", "b2", sequencematcher=psm)), 607 | ) 608 | finally: 609 | os.chdir(old_pwd) 610 | 611 | 612 | class TestPatienceDiffLib_rs(TestPatienceDiffLib): 613 | """Test class for the Rust implementation using PyO3 bindings.""" 614 | 615 | def setUp(self): 616 | super(TestPatienceDiffLib, self).setUp() 617 | try: 618 | from . import _patiencediff_rs 619 | except ImportError: 620 | self.skipTest("Rust extension not built") 621 | self._unique_lcs = _patiencediff_rs.unique_lcs_rs 622 | self._recurse_matches = _patiencediff_rs.recurse_matches_rs 623 | self._PatienceSequenceMatcher = ( 624 | _patiencediff_rs.PatienceSequenceMatcher_rs 625 | ) 626 | 627 | def test_unhashable(self): 628 | """We should get a proper exception here.""" 629 | # We need to be able to hash items in the sequence, lists are 630 | # unhashable, and thus cannot be diffed 631 | self.assertRaises( 632 | TypeError, self._PatienceSequenceMatcher, None, [[]], [] 633 | ) 634 | self.assertRaises( 635 | TypeError, self._PatienceSequenceMatcher, None, ["valid", []], [] 636 | ) 637 | self.assertRaises( 638 | TypeError, self._PatienceSequenceMatcher, None, ["valid"], [[]] 639 | ) 640 | self.assertRaises( 641 | TypeError, 642 | self._PatienceSequenceMatcher, 643 | None, 644 | ["valid"], 645 | ["valid", []], 646 | ) 647 | 648 | 649 | class TestPatienceDiffLibFiles_rs(TestPatienceDiffLibFiles): 650 | """Test class for file operations with the Rust implementation.""" 651 | 652 | def setUp(self): 653 | super().setUp() 654 | try: 655 | from . import _patiencediff_rs 656 | except ImportError: 657 | self.skipTest("Rust extension not built") 658 | self._PatienceSequenceMatcher = ( 659 | _patiencediff_rs.PatienceSequenceMatcher_rs 660 | ) 661 | 662 | 663 | class TestUsingCompiledIfAvailable(unittest.TestCase): 664 | def test_PatienceSequenceMatcher(self): 665 | try: 666 | from ._patiencediff_rs import PatienceSequenceMatcher_rs 667 | 668 | self.assertIs( 669 | PatienceSequenceMatcher_rs, 670 | patiencediff.PatienceSequenceMatcher, 671 | ) 672 | except ImportError: 673 | from ._patiencediff_py import PatienceSequenceMatcher_py 674 | 675 | self.assertIs( 676 | PatienceSequenceMatcher_py, 677 | patiencediff.PatienceSequenceMatcher, 678 | ) 679 | 680 | def test_unique_lcs(self): 681 | try: 682 | from ._patiencediff_rs import unique_lcs_rs 683 | 684 | self.assertIs(unique_lcs_rs, patiencediff.unique_lcs) 685 | except ImportError: 686 | from ._patiencediff_py import unique_lcs_py 687 | 688 | self.assertIs(unique_lcs_py, patiencediff.unique_lcs) 689 | 690 | def test_recurse_matches(self): 691 | try: 692 | from ._patiencediff_rs import recurse_matches_rs 693 | 694 | self.assertIs(recurse_matches_rs, patiencediff.recurse_matches) 695 | except ImportError: 696 | from ._patiencediff_py import recurse_matches_py 697 | 698 | self.assertIs(recurse_matches_py, patiencediff.recurse_matches) 699 | 700 | def test_run_implementation(self): 701 | """Test that we can run the implementation that was loaded.""" 702 | # Simple test with some basic strings 703 | a = "abcde" 704 | b = "abXde" 705 | 706 | # Create a matcher and get blocks 707 | matcher = patiencediff.PatienceSequenceMatcher(None, a, b) 708 | blocks = matcher.get_matching_blocks() 709 | 710 | # Validate results - we should get two blocks plus sentinel 711 | self.assertEqual(3, len(blocks)) 712 | self.assertEqual((0, 0, 2), blocks[0]) # "ab" match 713 | self.assertEqual((3, 3, 2), blocks[1]) # "de" match 714 | self.assertEqual((5, 5, 0), blocks[2]) # sentinel 715 | 716 | # Test that unique_lcs works 717 | matches = patiencediff.unique_lcs(a, b) 718 | self.assertEqual([(0, 0), (1, 1), (3, 3), (4, 4)], matches) 719 | 720 | 721 | if __name__ == "__main__": 722 | # Check which implementation is loaded 723 | import importlib.util 724 | 725 | if importlib.util.find_spec("patiencediff._patiencediff_rs") is not None: 726 | print("Rust extension is loaded successfully!") 727 | else: 728 | print("Rust extension is not available, using Python implementation") 729 | 730 | # Run the tests 731 | unittest.main() 732 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.2", "setuptools-rust>=1.5.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "patiencediff" 7 | description = "Python implementation of the patiencediff algorithm" 8 | readme = "README.rst" 9 | maintainers = [{name = "Breezy Developers", email = "team@breezy-vcs.org"}] 10 | license = {text = "GNU GPLv2 or later"} 11 | classifiers = [ 12 | "Development Status :: 6 - Mature", 13 | "License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)", 14 | "Programming Language :: Python :: 3.9", 15 | "Programming Language :: Python :: 3.10", 16 | "Programming Language :: Python :: 3.11", 17 | "Programming Language :: Python :: 3.12", 18 | "Programming Language :: Python :: 3.13", 19 | "Programming Language :: Python :: Implementation :: CPython", 20 | "Programming Language :: Python :: Implementation :: PyPy", 21 | "Operating System :: POSIX", 22 | ] 23 | requires-python = ">=3.9" 24 | dynamic = ["version"] 25 | dependencies = [] 26 | 27 | [project.urls] 28 | Homepage = "https://www.breezy-vcs.org/" 29 | Repository = "https://github.com/breezy-team/patiencediff" 30 | 31 | [project.scripts] 32 | patiencediff = "patiencediff.__main__:main" 33 | 34 | [tool.setuptools] 35 | packages = ["patiencediff"] 36 | include-package-data = false 37 | 38 | [tool.setuptools.package-data] 39 | patiencediff = ["py.typed"] 40 | 41 | [tool.setuptools.dynamic] 42 | version = {attr = "patiencediff.__version__"} 43 | 44 | [tool.ruff.lint] 45 | select = [ 46 | "ANN", 47 | "D", 48 | "E", 49 | "F", 50 | "I", 51 | "UP", 52 | ] 53 | ignore = [ 54 | "ANN001", 55 | "ANN201", 56 | "ANN202", 57 | "D100", 58 | "D101", 59 | "D102", 60 | "D103", 61 | "D104", 62 | "E501", 63 | ] 64 | 65 | [tool.ruff] 66 | target-version = "py38" 67 | line-length = 79 68 | 69 | [tool.ruff.lint.pydocstyle] 70 | convention = "google" 71 | 72 | [project.optional-dependencies] 73 | dev = [ 74 | "ruff==0.11.11" 75 | ] 76 | 77 | [tool.cibuildwheel] 78 | environment = {PATH="$HOME/.cargo/bin:$PATH"} 79 | before-build = "pip install -U setuptools-rust && curl https://sh.rustup.rs -sSf | sh -s -- --profile=minimal -y && rustup show" 80 | 81 | [tool.cibuildwheel.linux] 82 | skip = "*-musllinux_*" 83 | archs = ["auto", "aarch64"] 84 | before-build = "pip install -U setuptools-rust && yum -y install libatomic && curl https://sh.rustup.rs -sSf | sh -s -- --profile=minimal -y && rustup show" 85 | 86 | [tool.cibuildwheel.macos] 87 | archs = ["auto", "universal2", "x86_64", "arm64"] 88 | before-all = "rustup target add x86_64-apple-darwin aarch64-apple-darwin" 89 | skip = """\ 90 | cp39-macosx_x86_64 cp39-macosx_universal2 \ 91 | cp310-macosx_x86_64 cp310-macosx_universal2 \ 92 | cp311-macosx_x86_64 cp311-macosx_universal2 \ 93 | cp312-macosx_x86_64 cp312-macosx_universal2 \ 94 | cp313-macosx_x86_64 cp313-macosx_universal2 \ 95 | """ 96 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | 5 | from setuptools import setup 6 | from setuptools_rust import Binding, RustExtension 7 | 8 | # Rust extension 9 | rust_extensions = [ 10 | RustExtension( 11 | "patiencediff._patiencediff_rs", 12 | "Cargo.toml", 13 | binding=Binding.PyO3, 14 | optional=os.environ.get("CIBUILDWHEEL", "0") != "1", 15 | ) 16 | ] 17 | 18 | setup( 19 | rust_extensions=rust_extensions, 20 | ) 21 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use pyo3::prelude::*; 2 | use pyo3::types::{PyList, PySequence, PyTuple}; 3 | 4 | /// Find the longest common subsequence of unique elements in sequences a and b. 5 | /// 6 | /// Returns a list of (i, j) tuples where a[i] == b[j]. 7 | /// This implementation uses the patience sorting algorithm. 8 | #[pyfunction] 9 | fn unique_lcs_rs<'py>( 10 | py: Python<'py>, 11 | a: Bound<'py, PyAny>, 12 | b: Bound<'py, PyAny>, 13 | ) -> PyResult> { 14 | // Convert Python sequences to vectors of PyItem for patiencediff crate 15 | let a_seq = a.clone(); 16 | let b_seq = b.clone(); 17 | 18 | let a_len = a_seq.len()?; 19 | let b_len = b_seq.len()?; 20 | 21 | // Create PyItem sequences 22 | let mut a_items = Vec::with_capacity(a_len); 23 | let mut b_items = Vec::with_capacity(b_len); 24 | 25 | // Extract items from sequences 26 | for i in 0..a_len { 27 | let item = a_seq.get_item(i)?; 28 | a_items.push(PyItem(item.into())); 29 | } 30 | 31 | for i in 0..b_len { 32 | let item = b_seq.get_item(i)?; 33 | b_items.push(PyItem(item.into())); 34 | } 35 | 36 | // Use patiencediff crate's unique_lcs function 37 | let matches = patiencediff::unique_lcs(&a_items, &b_items); 38 | 39 | // Create result list 40 | let result = PyList::empty(py); 41 | 42 | // Add matches to the result list 43 | for &(a_pos, b_pos) in &matches { 44 | let tuple = PyTuple::new(py, &[a_pos, b_pos])?; 45 | result.append(tuple)?; 46 | } 47 | 48 | Ok(result) 49 | } 50 | 51 | /// Python item wrapper that implements the necessary traits for patiencediff crate 52 | struct PyItem(PyObject); 53 | 54 | // Implement Clone for PyItem using clone_ref() for PyObject 55 | impl Clone for PyItem { 56 | fn clone(&self) -> Self { 57 | Python::with_gil(|py| PyItem(self.0.clone_ref(py))) 58 | } 59 | } 60 | 61 | // Define equality for PyItem that uses Python's eq 62 | impl PartialEq for PyItem { 63 | fn eq(&self, other: &Self) -> bool { 64 | Python::with_gil(|py| { 65 | let a = self.0.extract::>(py).unwrap(); 66 | let b = other.0.extract::>(py).unwrap(); 67 | a.eq(&b).unwrap_or(false) 68 | }) 69 | } 70 | } 71 | 72 | impl Eq for PyItem {} 73 | 74 | // Define hashing for PyItem that uses Python's hash 75 | impl std::hash::Hash for PyItem { 76 | fn hash(&self, state: &mut H) { 77 | let hash_value = Python::with_gil(|py| { 78 | let obj = self.0.extract::>(py).unwrap(); 79 | match obj.hash() { 80 | Ok(hash) => hash, 81 | Err(e) => { 82 | // Properly propagate a TypeError without panicking 83 | if e.is_instance_of::(py) { 84 | return 0; // Use a constant hash for unhashable types 85 | } 86 | // For any other errors, use a different constant 87 | return 1; 88 | } 89 | } 90 | }); 91 | state.write_isize(hash_value); 92 | } 93 | } 94 | 95 | /// Recursively find matches between two sequences. 96 | /// 97 | /// This function wraps the patiencediff crate's recurse_matches function. 98 | #[pyfunction] 99 | fn recurse_matches_rs<'py>( 100 | py: Python<'py>, 101 | a: Bound<'py, PyAny>, 102 | b: Bound<'py, PyAny>, 103 | alo: usize, 104 | blo: usize, 105 | ahi: usize, 106 | bhi: usize, 107 | answer: Bound<'py, PyList>, 108 | maxrecursion: i32, 109 | ) -> PyResult<()> { 110 | // Early return for base cases 111 | if maxrecursion < 0 || alo == ahi || blo == bhi { 112 | return Ok(()); 113 | } 114 | 115 | // Convert Python sequences to vectors of PyItem for patiencediff crate 116 | let a_seq = a.clone(); 117 | let b_seq = b.clone(); 118 | 119 | // Create vectors of PyItems for the sliced sequences 120 | let mut a_items = Vec::with_capacity(ahi - alo); 121 | let mut b_items = Vec::with_capacity(bhi - blo); 122 | 123 | // Extract the items we need from the sequences 124 | for i in alo..ahi { 125 | let item = a_seq.get_item(i)?; 126 | a_items.push(PyItem(item.into())); 127 | } 128 | 129 | for i in blo..bhi { 130 | let item = b_seq.get_item(i)?; 131 | b_items.push(PyItem(item.into())); 132 | } 133 | 134 | // Create a vector to collect the matches 135 | let mut matches = Vec::new(); 136 | 137 | // Call the patiencediff crate's recurse_matches function 138 | patiencediff::recurse_matches( 139 | &a_items, 140 | &b_items, 141 | 0, 142 | 0, 143 | a_items.len(), 144 | b_items.len(), 145 | &mut matches, 146 | maxrecursion, 147 | ); 148 | 149 | // Convert the results to Python and add to the answer list 150 | for &(rel_a, rel_b) in &matches { 151 | let a_pos = rel_a + alo; 152 | let b_pos = rel_b + blo; 153 | 154 | let tuple = PyTuple::new(py, &[a_pos, b_pos])?; 155 | answer.append(tuple)?; 156 | } 157 | 158 | Ok(()) 159 | } 160 | 161 | /// The PatienceSequenceMatcher class 162 | #[pyclass(name = "PatienceSequenceMatcher_rs")] 163 | struct PatienceSequenceMatcherRs { 164 | matcher: patiencediff::SequenceMatcher, 165 | } 166 | 167 | #[pymethods] 168 | impl PatienceSequenceMatcherRs { 169 | #[new] 170 | fn new(py: Python<'_>, _junk: Option, a: PyObject, b: PyObject) -> PyResult { 171 | // Extract sequences 172 | let a_any = a.extract::>(py)?; 173 | let b_any = b.extract::>(py)?; 174 | 175 | // Convert to sequences 176 | let a_seq = a_any.downcast::()?; 177 | let b_seq = b_any.downcast::()?; 178 | 179 | let a_len = a_seq.len()?; 180 | let b_len = b_seq.len()?; 181 | 182 | // Create PyItem sequences 183 | let mut a_items = Vec::with_capacity(a_len); 184 | let mut b_items = Vec::with_capacity(b_len); 185 | 186 | // Check if all items are hashable before proceeding 187 | for i in 0..a_len { 188 | let item = a_seq.get_item(i)?; 189 | // Try to hash the item to check if it's hashable 190 | if let Err(e) = item.hash() { 191 | if e.is_instance_of::(py) { 192 | return Err(pyo3::exceptions::PyTypeError::new_err("unhashable type")); 193 | } 194 | return Err(e); 195 | } 196 | a_items.push(PyItem(item.into())); 197 | } 198 | 199 | for i in 0..b_len { 200 | let item = b_seq.get_item(i)?; 201 | // Try to hash the item to check if it's hashable 202 | if let Err(e) = item.hash() { 203 | if e.is_instance_of::(py) { 204 | return Err(pyo3::exceptions::PyTypeError::new_err("unhashable type")); 205 | } 206 | return Err(e); 207 | } 208 | b_items.push(PyItem(item.into())); 209 | } 210 | 211 | // Create and return the matcher 212 | let matcher = patiencediff::SequenceMatcher::new(&a_items, &b_items); 213 | 214 | Ok(Self { matcher }) 215 | } 216 | 217 | /// Return list of triples describing matching subsequences. 218 | /// 219 | /// Each triple is of the form (i, j, n), and means that 220 | /// a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in 221 | /// i and in j. 222 | /// 223 | /// The last triple is a dummy, (len(a), len(b), 0), and is the only 224 | /// triple with n==0. 225 | fn get_matching_blocks<'py>(&mut self, py: Python<'py>) -> PyResult> { 226 | // Get matching blocks from the matcher 227 | let blocks = self.matcher.get_matching_blocks(); 228 | 229 | // Convert blocks to Python list 230 | let result = PyList::empty(py); 231 | 232 | for &(a, b, n) in blocks { 233 | let tuple = PyTuple::new(py, &[a, b, n])?; 234 | result.append(tuple)?; 235 | } 236 | 237 | Ok(result) 238 | } 239 | 240 | /// Return list of 5-tuples describing how to turn a into b. 241 | /// 242 | /// Each tuple is of the form (tag, i1, i2, j1, j2). The first tuple 243 | /// has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the 244 | /// tuple preceding it, and likewise for j1 == the previous j2. 245 | /// 246 | /// The tags are strings, with these meanings: 247 | /// 248 | /// 'replace': a[i1:i2] should be replaced by b[j1:j2] 249 | /// 'delete': a[i1:i2] should be deleted. 250 | /// Note that j1==j2 in this case. 251 | /// 'insert': b[j1:j2] should be inserted at a[i1:i1]. 252 | /// Note that i1==i2 in this case. 253 | /// 'equal': a[i1:i2] == b[j1:j2] 254 | fn get_opcodes<'py>(&mut self, py: Python<'py>) -> PyResult> { 255 | // Get opcodes directly from the matcher 256 | let opcodes = self.matcher.get_opcodes(); 257 | 258 | // Convert opcodes to Python list 259 | let result = PyList::empty(py); 260 | 261 | for opcode in opcodes { 262 | match opcode { 263 | patiencediff::Opcode::Equal(i1, i2, j1, j2) => { 264 | let tuple = PyTuple::new( 265 | py, 266 | &[ 267 | "equal".into_py(py), 268 | i1.into_py(py), 269 | i2.into_py(py), 270 | j1.into_py(py), 271 | j2.into_py(py), 272 | ], 273 | )?; 274 | result.append(tuple)?; 275 | } 276 | patiencediff::Opcode::Replace(i1, i2, j1, j2) => { 277 | let tuple = PyTuple::new( 278 | py, 279 | &[ 280 | "replace".into_py(py), 281 | i1.into_py(py), 282 | i2.into_py(py), 283 | j1.into_py(py), 284 | j2.into_py(py), 285 | ], 286 | )?; 287 | result.append(tuple)?; 288 | } 289 | patiencediff::Opcode::Delete(i1, i2, j1, j2) => { 290 | let tuple = PyTuple::new( 291 | py, 292 | &[ 293 | "delete".into_py(py), 294 | i1.into_py(py), 295 | i2.into_py(py), 296 | j1.into_py(py), 297 | j2.into_py(py), 298 | ], 299 | )?; 300 | result.append(tuple)?; 301 | } 302 | patiencediff::Opcode::Insert(i1, i2, j1, j2) => { 303 | let tuple = PyTuple::new( 304 | py, 305 | &[ 306 | "insert".into_py(py), 307 | i1.into_py(py), 308 | i2.into_py(py), 309 | j1.into_py(py), 310 | j2.into_py(py), 311 | ], 312 | )?; 313 | result.append(tuple)?; 314 | } 315 | } 316 | } 317 | 318 | Ok(result) 319 | } 320 | 321 | /// Return a list of groups with upto n lines of context. 322 | /// 323 | /// Each group is in the same format as returned by get_opcodes(). 324 | fn get_grouped_opcodes<'py>( 325 | &mut self, 326 | py: Python<'py>, 327 | n: Option, 328 | ) -> PyResult> { 329 | let n = n.unwrap_or(3); 330 | 331 | // Get grouped opcodes directly from the matcher 332 | let grouped_opcodes = self.matcher.get_grouped_opcodes(n); 333 | 334 | // Convert to Python list 335 | let result = PyList::empty(py); 336 | 337 | for group in grouped_opcodes { 338 | let group_list = PyList::empty(py); 339 | 340 | for opcode in group { 341 | let (tag, i1, i2, j1, j2) = match opcode { 342 | patiencediff::Opcode::Equal(i1, i2, j1, j2) => ("equal", i1, i2, j1, j2), 343 | patiencediff::Opcode::Replace(i1, i2, j1, j2) => ("replace", i1, i2, j1, j2), 344 | patiencediff::Opcode::Delete(i1, i2, j1, j2) => ("delete", i1, i2, j1, j2), 345 | patiencediff::Opcode::Insert(i1, i2, j1, j2) => ("insert", i1, i2, j1, j2), 346 | }; 347 | 348 | let tuple = PyTuple::new( 349 | py, 350 | &[ 351 | tag.into_py(py), 352 | i1.into_py(py), 353 | i2.into_py(py), 354 | j1.into_py(py), 355 | j2.into_py(py), 356 | ], 357 | )?; 358 | 359 | group_list.append(tuple)?; 360 | } 361 | 362 | if group_list.len() > 0 { 363 | result.append(group_list)?; 364 | } 365 | } 366 | 367 | // Note: We're not adding a default group for empty result anymore 368 | Ok(result) 369 | } 370 | } 371 | 372 | #[pymodule] 373 | fn _patiencediff_rs(m: &Bound<'_, PyModule>) -> PyResult<()> { 374 | m.add_class::()?; 375 | m.add_function(wrap_pyfunction!(unique_lcs_rs, m)?)?; 376 | m.add_function(wrap_pyfunction!(recurse_matches_rs, m)?)?; 377 | Ok(()) 378 | } 379 | --------------------------------------------------------------------------------