├── .gitignore ├── .travis.yml ├── AUTHORS ├── HISTORY ├── LICENSE ├── MANIFEST.in ├── README.rst ├── bin └── unidiff ├── run_tests.sh ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── samples │ ├── binary.diff │ ├── bzr.diff │ ├── git.diff │ ├── git_cr.diff │ ├── git_delete.diff │ ├── git_filenames_with_spaces.diff │ ├── git_filenames_with_spaces_prefix.diff │ ├── git_no_prefix.diff │ ├── git_quoted_filename.diff │ ├── git_rename.diff │ ├── hg.diff │ ├── sample0.diff │ ├── sample1.diff │ ├── sample2.diff │ ├── sample3.diff │ ├── sample4.diff │ ├── sample5.diff │ ├── sample6.diff │ ├── sample7.diff │ ├── sample8.diff │ └── svn.diff ├── test_hunks.py ├── test_line.py ├── test_parser.py └── test_patchedfile.py └── unidiff ├── __init__.py ├── __version__.py ├── constants.py ├── errors.py └── patch.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | *.py[cod] 3 | __pycache__ 4 | build 5 | dist 6 | unidiff.egg-info 7 | 8 | # Vim 9 | *.swp 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.7" 4 | - "3.8" 5 | - "3.9" 6 | - "3.10" 7 | - "3.11" 8 | script: ./run_tests.sh 9 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Main developer 2 | -------------- 3 | 4 | * Matias Bordese (`@matiasb`_) 5 | 6 | 7 | Contributors 8 | ------------ 9 | 10 | * Natalia Bidart (`@nessita`_) 11 | * Jacobo de Vera (`@jdevera`_) 12 | * Lei Zhang (`@antiAgainst`_) 13 | * Sumeet Agarwal (`@sumeet`_) 14 | * Philipp Kewisch (`@kewisch`_) 15 | * Allan Lewis (`@allanlewis`_) 16 | * Andrew Lapidas (`@alapidas`_) 17 | * Daniel Thompson (`@daniel-thompson`_) 18 | * Sebastian Kreft (`@sk-`_) 19 | * Thomas Grainger (`@graingert`_) 20 | * (`snake-scaly`_) 21 | * Dan Callaghan (`@danc86`_) 22 | * Max Bittker (`@MaxBittker`_) 23 | * Volo Zyko (`@volo-zyko`_) 24 | * Robert Estelle (`@erydo`_) 25 | * Dylan Grafmyre 26 | * Povilas Kanapickas (`@p12tic`_) 27 | * Snowhite (`@CirQ`_) 28 | * earonesty (`@earonesty`_) 29 | * Ben Carlsson (`@glacials`_) 30 | * (`@huichen-cs`) 31 | * Mikhail f. Shiryaev (`@Felixoid`) 32 | * Ronuk Raval (`@rraval`) 33 | * anthony sottile (`@asottile-sentry`) 34 | * (`@cpackham-atlnz`) 35 | * David Leen (`@dleen`) 36 | * Martin Liška (`@marxin`) 37 | * Tushar Sadhwani (`@tushar-deepsource`) 38 | -------------------------------------------------------------------------------- /HISTORY: -------------------------------------------------------------------------------- 1 | History 2 | ------- 3 | 4 | 0.7.5 - 2023-03-09 5 | ------------------ 6 | 7 | * Fixed issue with spaces in filename when using custom git prefix. 8 | * Support binary patch format. 9 | 10 | 0.7.4 - 2022-06-26 11 | ------------------ 12 | 13 | * Fixed git diff parsing issues (filename with spaces, only one added/deleted file). 14 | 15 | 0.7.3 - 2022-02-06 16 | ------------------ 17 | 18 | * Fixed RE_BINARY_DIFF regex to make it a raw string. 19 | 20 | 0.7.2 - 2022-01-28 21 | ------------------ 22 | 23 | * Fixed issue when parsing git diff header generated with `--no-prefix`. 24 | 25 | 0.7.1 - 2022-01-27 26 | ------------------ 27 | 28 | * Improved git added/deleted file detection. 29 | * Added `newline` optional param when parsing `from_filename`. 30 | 31 | 0.7.0 - 2021-08-16 32 | ------------------ 33 | 34 | * Fixed issues handling multiple git renames. 35 | * Renamed files return target filename as PatchedFile.path. 36 | * Fixed error when first change is a binary file. 37 | * Added source code type hints. 38 | 39 | 0.6.0 - 2020-05-07 40 | ---------------- 41 | 42 | * Updated PatchSet constructor to accept an optional (default to False) 43 | metadata_only parameter to only keep diff metadata information without 44 | the diff text data (better performance). 45 | * Identify and track changed binary files. 46 | * Added support for git rename syntax. 47 | 48 | 0.5.5 - 2018-01-03 49 | ------------------ 50 | 51 | * Updated PatchSet constructor to accept string data. 52 | * Added support to parse extended patch info. 53 | 54 | 0.5.4 - 2017-05-26 55 | ------------------ 56 | 57 | * Added PatchSet.from_string helper. 58 | * Do not install tests as top-level package. 59 | 60 | 0.5.3 - 2017-04-10 61 | ------------------ 62 | 63 | * Re-released 0.5.2 as 0.5.3 because of issues with PyPI. 64 | 65 | 0.5.2 - 2016-02-02 66 | ------------------ 67 | 68 | * Added diff line number to Line metadata. 69 | * Optimizations for large hunks. 70 | * Fix for git empty new lines. 71 | * Added (optional) errors parameter to PatchSet.from_filename, 72 | to specify how to handle encoding errors. 73 | 74 | 75 | 0.5.1 - 2015-01-18 76 | ------------------ 77 | 78 | * Added (optional) encoding parameter to PatchSet. 79 | * Added support to get any iterable as PatchSet diff argument. 80 | 81 | 82 | 0.5 - 2014-12-14 83 | ---------------- 84 | 85 | * Release on PyPI. 86 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2012 Matias Bordese 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all 12 | copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 17 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 18 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 20 | OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include bin/* 2 | include tests/samples/* 3 | include HISTORY 4 | include LICENSE 5 | include README.rst 6 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Unidiff 2 | ======= 3 | 4 | Simple Python library to parse and interact with unified diff data. 5 | 6 | .. image:: https://www.travis-ci.com/matiasb/python-unidiff.svg?branch=master 7 | :target: https://travis-ci.com/matiasb/python-unidiff 8 | 9 | Installing unidiff 10 | ------------------ 11 | 12 | :: 13 | 14 | $ pip install unidiff 15 | 16 | 17 | Quick start 18 | ----------- 19 | 20 | .. code-block:: python 21 | 22 | >>> import urllib.request 23 | >>> from unidiff import PatchSet 24 | >>> diff = urllib.request.urlopen('https://github.com/matiasb/python-unidiff/pull/3.diff') 25 | >>> encoding = diff.headers.get_charsets()[0] 26 | >>> patch = PatchSet(diff, encoding=encoding) 27 | >>> patch 28 | , , ]> 29 | >>> patch[0] 30 | 31 | >>> patch[0].is_added_file 32 | True 33 | >>> patch[0].added 34 | 6 35 | >>> patch[1] 36 | 37 | >>> patch[1].added, patch[1].removed 38 | (20, 11) 39 | >>> len(patch[1]) 40 | 6 41 | >>> patch[1][2] 42 | 43 | >>> patch[2] 44 | 45 | >>> print(patch[2]) 46 | diff --git a/unidiff/utils.py b/unidiff/utils.py 47 | index eae63e6..29c896a 100644 48 | --- a/unidiff/utils.py 49 | +++ b/unidiff/utils.py 50 | @@ -37,4 +37,3 @@ 51 | # - deleted line 52 | # \ No newline case (ignore) 53 | RE_HUNK_BODY_LINE = re.compile(r'^([- \+\\])') 54 | - 55 | 56 | 57 | Load unified diff data by instantiating :code:`PatchSet` with a file-like object as 58 | argument, or using :code:`PatchSet.from_filename` class method to read diff from file. 59 | 60 | A :code:`PatchSet` is a list of files updated by the given patch. For each :code:`PatchedFile` 61 | you can get stats (if it is a new, removed or modified file; the source/target 62 | lines; etc), besides having access to each hunk (also like a list) and its 63 | respective info. 64 | 65 | At any point you can get the string representation of the current object, and 66 | that will return the unified diff data of it. 67 | 68 | As a quick example of what can be done, check bin/unidiff file. 69 | 70 | Also, once installed, unidiff provides a command-line program that displays 71 | information from diff data (a file, or stdin). For example: 72 | 73 | :: 74 | 75 | $ git diff | unidiff 76 | Summary 77 | ------- 78 | README.md: +6 additions, -0 deletions 79 | 80 | 1 modified file(s), 0 added file(s), 0 removed file(s) 81 | Total: 6 addition(s), 0 deletion(s) 82 | 83 | 84 | Load a local diff file 85 | ---------------------- 86 | 87 | To instantiate :code:`PatchSet` from a local file, you can use: 88 | 89 | .. code-block:: python 90 | 91 | >>> from unidiff import PatchSet 92 | >>> patch = PatchSet.from_filename('tests/samples/bzr.diff', encoding='utf-8') 93 | >>> patch 94 | , , ]> 95 | 96 | Notice the (optional) :code:`encoding` parameter. If not specified, unicode input will be expected. Or alternatively: 97 | 98 | .. code-block:: python 99 | 100 | >>> import codecs 101 | >>> from unidiff import PatchSet 102 | >>> with codecs.open('tests/samples/bzr.diff', 'r', encoding='utf-8') as diff: 103 | ... patch = PatchSet(diff) 104 | ... 105 | >>> patch 106 | , , ]> 107 | 108 | Finally, you can also instantiate :code:`PatchSet` passing any iterable (and encoding, if needed): 109 | 110 | .. code-block:: python 111 | 112 | >>> from unidiff import PatchSet 113 | >>> with open('tests/samples/bzr.diff', 'r') as diff: 114 | ... data = diff.readlines() 115 | ... 116 | >>> patch = PatchSet(data) 117 | >>> patch 118 | , , ]> 119 | 120 | If you don't need to be able to rebuild the original unified diff input, you can pass 121 | :code:`metadata_only=True` (defaults to :code:`False`), which should help making the 122 | parsing more efficient: 123 | 124 | .. code-block:: python 125 | 126 | >>> from unidiff import PatchSet 127 | >>> patch = PatchSet.from_filename('tests/samples/bzr.diff', encoding='utf-8', metadata_only=True) 128 | 129 | 130 | References 131 | ---------- 132 | 133 | * https://en.wikipedia.org/wiki/Diff_utility 134 | * https://www.artima.com/weblogs/viewpost.jsp?thread=164293 135 | -------------------------------------------------------------------------------- /bin/unidiff: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function, unicode_literals 4 | 5 | import argparse 6 | import codecs 7 | import sys 8 | 9 | from unidiff import DEFAULT_ENCODING, PatchSet 10 | 11 | 12 | PY2 = sys.version_info[0] == 2 13 | DESCRIPTION = """Unified diff metadata. 14 | 15 | Examples: 16 | $ git diff | unidiff 17 | $ hg diff | unidiff --show-diff 18 | $ unidiff -f patch.diff 19 | 20 | """ 21 | 22 | def get_parser(): 23 | parser = argparse.ArgumentParser( 24 | formatter_class=argparse.RawDescriptionHelpFormatter, 25 | description=DESCRIPTION) 26 | parser.add_argument('--show-diff', action="store_true", default=False, 27 | dest='show_diff', help='output diff to stdout') 28 | parser.add_argument('-f', '--file', dest='diff_file', 29 | type=argparse.FileType('r'), 30 | help='if not specified, read diff data from stdin') 31 | return parser 32 | 33 | 34 | if __name__ == '__main__': 35 | parser = get_parser() 36 | args = parser.parse_args() 37 | 38 | encoding = DEFAULT_ENCODING 39 | if args.diff_file: 40 | diff_file = args.diff_file 41 | else: 42 | encoding = sys.stdin.encoding or encoding 43 | diff_file = sys.stdin 44 | 45 | if PY2: 46 | diff_file = codecs.getreader(encoding)(diff_file) 47 | 48 | patch = PatchSet(diff_file, metadata_only=(not args.show_diff)) 49 | 50 | if args.show_diff: 51 | print(patch) 52 | print() 53 | 54 | print('Summary') 55 | print('-------') 56 | additions = 0 57 | deletions = 0 58 | renamed_files = 0 59 | for f in patch: 60 | if f.is_binary_file: 61 | print('%s:' % f.path, '(binary file)') 62 | else: 63 | additions += f.added 64 | deletions += f.removed 65 | print('%s:' % f.path, '+%d additions,' % f.added, 66 | '-%d deletions' % f.removed) 67 | renamed_files = renamed_files + 1 if f.is_rename else renamed_files 68 | 69 | print() 70 | print('%d modified file(s), %d added file(s), %d removed file(s)' % ( 71 | len(patch.modified_files), len(patch.added_files), 72 | len(patch.removed_files))) 73 | if renamed_files: 74 | print('%d file(s) renamed' % renamed_files) 75 | print('Total: %d addition(s), %d deletion(s)' % (additions, deletions)) 76 | -------------------------------------------------------------------------------- /run_tests.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | PYTHONPATH=unidiff python -m unittest discover -s tests/ 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | 4 | [metadata] 5 | license_file = LICENSE 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Author: Matías Bordese 3 | 4 | import codecs 5 | import os 6 | 7 | from setuptools import find_packages, setup 8 | 9 | 10 | # metadata 11 | NAME = 'unidiff' 12 | DESCRIPTION = 'Unified diff parsing/metadata extraction library.' 13 | KEYWORDS = ['unified', 'diff', 'parse', 'metadata'] 14 | URL = 'https://github.com/matiasb/python-unidiff' 15 | EMAIL = 'mbordese@gmail.com' 16 | AUTHOR = 'Matias Bordese' 17 | LICENSE = 'MIT' 18 | 19 | HERE = os.path.abspath(os.path.dirname(__file__)) 20 | 21 | # use README as the long-description 22 | with codecs.open(os.path.join(HERE, 'README.rst'), "rb", "utf-8") as f: 23 | long_description = f.read() 24 | 25 | 26 | # load __version__.py module as a dictionary 27 | about = {} 28 | with open(os.path.join(HERE, 'unidiff/__version__.py')) as f: 29 | exec(f.read(), about) 30 | 31 | 32 | setup( 33 | name=NAME, 34 | version=about['__version__'], 35 | description=DESCRIPTION, 36 | long_description=long_description, 37 | keywords=KEYWORDS, 38 | author=AUTHOR, 39 | author_email=EMAIL, 40 | url=URL, 41 | packages=find_packages(exclude=('tests',)), 42 | scripts=['bin/unidiff'], 43 | include_package_data=True, 44 | license=LICENSE, 45 | classifiers=[ 46 | 'Intended Audience :: Developers', 47 | 'Development Status :: 4 - Beta', 48 | 'Programming Language :: Python :: 3.7', 49 | 'Programming Language :: Python :: 3.8', 50 | 'Programming Language :: Python :: 3.9', 51 | 'Programming Language :: Python :: 3.10', 52 | 'Programming Language :: Python :: 3.11', 53 | ], 54 | test_suite='tests', 55 | ) 56 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | # Copyright (c) 2014-2017 Matias Bordese 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 17 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 18 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 20 | # OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | """Tests for unidiff.""" 23 | -------------------------------------------------------------------------------- /tests/samples/binary.diff: -------------------------------------------------------------------------------- 1 | From 62ea0eaf7e00170a8fca79f665442f9f44725956 Mon Sep 17 00:00:00 2001 2 | From: Martin Liska 3 | Date: Fri, 9 Dec 2022 12:05:32 +0100 4 | Subject: [PATCH] add pixel 5 | 6 | --- 7 | 1x1.png | Bin 0 -> 95 bytes 8 | 1 file changed, 0 insertions(+), 0 deletions(-) 9 | create mode 100644 1x1.png 10 | 11 | diff --git a/1x1.png b/1x1.png 12 | new file mode 100644 13 | index 0000000000000000000000000000000000000000..1914264c08781d1f30ee0b8482bccf44586f2dc1 14 | GIT binary patch 15 | literal 95 16 | zcmeAS@N?(olHy`uVBq!ia0vp^j3CU&3?x-=hn)ga%mF?ju0VQumF+E%TuG2$FoVOh 17 | l8)-lem#2$k2*>s01R$Gz9%CSj!PC{xWt~$(697H@6ZHT9 18 | 19 | literal 0 20 | HcmV?d00001 21 | 22 | -- 23 | 2.38.1 24 | -------------------------------------------------------------------------------- /tests/samples/bzr.diff: -------------------------------------------------------------------------------- 1 | === added file 'added_file' 2 | --- added_file 1970-01-01 00:00:00 +0000 3 | +++ added_file 2013-10-13 23:44:04 +0000 4 | @@ -0,0 +1,4 @@ 5 | +This was missing! 6 | +Adding it now. 7 | + 8 | +Only for testing purposes. 9 | \ No newline at end of file 10 | 11 | === modified file 'modified_file' 12 | --- modified_file 2013-10-13 23:53:13 +0000 13 | +++ modified_file 2013-10-13 23:53:26 +0000 14 | @@ -1,5 +1,7 @@ 15 | This is the original content. 16 | 17 | -This should be updated. 18 | +This is now updated. 19 | + 20 | +This is a new line. 21 | 22 | This will stay. 23 | \ No newline at end of file 24 | 25 | === removed file 'removed_file' 26 | --- removed_file 2013-10-13 23:53:13 +0000 27 | +++ removed_file 1970-01-01 00:00:00 +0000 28 | @@ -1,3 +0,0 @@ 29 | -This content shouldn't be here. 30 | - 31 | -This file will be removed. 32 | \ No newline at end of file 33 | 34 | 35 | -------------------------------------------------------------------------------- /tests/samples/git.diff: -------------------------------------------------------------------------------- 1 | diff --git a/added_file b/added_file 2 | new file mode 100644 3 | index 0000000..9b710f3 4 | --- /dev/null 5 | +++ b/added_file 6 | @@ -0,0 +1,4 @@ 7 | +This was missing! 8 | +Adding it now. 9 | + 10 | +Only for testing purposes. 11 | \ No newline at end of file 12 | diff --git a/modified_file b/modified_file 13 | index c7921f5..8946660 100644 14 | --- a/modified_file 15 | +++ b/modified_file 16 | @@ -1,5 +1,7 @@ 17 | This is the original content. 18 | 19 | -This should be updated. 20 | +This is now updated. 21 | + 22 | +This is a new line. 23 | 24 | This will stay. 25 | \ No newline at end of file 26 | diff --git a/removed_file b/removed_file 27 | deleted file mode 100644 28 | index 1f38447..0000000 29 | --- a/removed_file 30 | +++ /dev/null 31 | @@ -1,3 +0,0 @@ 32 | -This content shouldn't be here. 33 | - 34 | -This file will be removed. 35 | \ No newline at end of file 36 | 37 | -------------------------------------------------------------------------------- /tests/samples/git_cr.diff: -------------------------------------------------------------------------------- 1 | diff --git a/src/test/org/apache/commons/math/util/ExpandableDoubleArrayTest.java b/src/test/org/apache/commons/math/util/ExpandableDoubleArrayTest.java 2 | new file mode 100644 3 | index 000000000..2b38fa232 4 | --- /dev/null 5 | +++ b/src/test/org/apache/commons/math/util/ExpandableDoubleArrayTest.java 6 | @@ -0,0 +1,3 @@ 7 | + "This line is broken into two lines by CR. " + "but it should be treated as one line in the text diff file" 8 | + "This has no CR" 9 | + "This line also has CR. " + "but it should also be treated as one line in the text diff file". 10 | -------------------------------------------------------------------------------- /tests/samples/git_delete.diff: -------------------------------------------------------------------------------- 1 | diff --git a/somefile.c b/somefile.c 2 | deleted file mode 100644 3 | index abcdefbbb8..0000000000 4 | --- a/somefile.c 5 | +++ /dev/null 6 | @@ -1,10 +0,0 @@ 7 | -/** 8 | - * @file somefile.c 9 | - */ 10 | -#include 11 | - 12 | -int main(int argc, cahr *argv[]) 13 | -{ 14 | - printf("Hello World\n"); 15 | - return 0; 16 | -} 17 | -------------------------------------------------------------------------------- /tests/samples/git_filenames_with_spaces.diff: -------------------------------------------------------------------------------- 1 | diff --git a/has spaces/t.sql b/has spaces/t.sql 2 | new file mode 100644 3 | index 0000000..8a9b485 4 | --- /dev/null 5 | +++ b/has spaces/t.sql 6 | @@ -0,0 +1 @@ 7 | +select * FROM t; 8 | -------------------------------------------------------------------------------- /tests/samples/git_filenames_with_spaces_prefix.diff: -------------------------------------------------------------------------------- 1 | diff --git src://foo bar/baz dst://foo bar/baz 2 | new file mode 100644 3 | index 00000000000..0a72e5064c8 4 | --- /dev/null 5 | +++ dst://foo bar/baz 6 | @@ -0,0 +1,1 @@ 7 | +blah 8 | -------------------------------------------------------------------------------- /tests/samples/git_no_prefix.diff: -------------------------------------------------------------------------------- 1 | diff --git file1 file1 2 | deleted file mode 100644 3 | index 42f90fd..0000000 4 | --- file1 5 | +++ /dev/null 6 | @@ -1,3 +0,0 @@ 7 | -line11 8 | -line12 9 | -line13 10 | diff --git file2 file2 11 | index c337bf1..1cb02b9 100644 12 | --- file2 13 | +++ file2 14 | @@ -4,0 +5,3 @@ line24 15 | +line24n 16 | +line24n2 17 | +line24n3 18 | @@ -15,0 +19,3 @@ line215 19 | +line215n 20 | +line215n2 21 | +line215n3 22 | diff --git file3 file3 23 | new file mode 100644 24 | index 0000000..632e269 25 | --- /dev/null 26 | +++ file3 27 | @@ -0,0 +1,3 @@ 28 | +line31 29 | +line32 30 | +line33 31 | -------------------------------------------------------------------------------- /tests/samples/git_quoted_filename.diff: -------------------------------------------------------------------------------- 1 | diff --git "a/A \303\242 B.py" "b/A \303\242 B.py" 2 | new file mode 100644 3 | index 0000000..ce01362 4 | --- /dev/null 5 | +++ "b/A \303\242 B.py" 6 | @@ -0,0 +1 @@ 7 | +hello 8 | -------------------------------------------------------------------------------- /tests/samples/git_rename.diff: -------------------------------------------------------------------------------- 1 | diff --git a/added b/moved 2 | similarity index 85% 3 | rename from added 4 | rename to moved 5 | index a071991..4dbab21 100644 6 | --- a/added 7 | +++ b/moved 8 | @@ -9,4 +9,4 @@ Some content 9 | Some content 10 | Some content 11 | Some content 12 | -Some content 13 | +Some modified content 14 | 15 | diff --git a/oldfile b/newfile 16 | similarity index 85% 17 | rename from oldfile 18 | rename to newfile 19 | index a071991..4dbab21 100644 20 | --- a/oldfile 21 | +++ b/newfile 22 | @@ -9,4 +9,4 @@ Some content 23 | Some content 24 | Some content 25 | Some content 26 | -Some content 27 | +Some modified content 28 | 29 | diff --git a/sub/onefile b/sub/otherfile 30 | similarity index 100% 31 | rename from onefile 32 | rename to otherfile 33 | -------------------------------------------------------------------------------- /tests/samples/hg.diff: -------------------------------------------------------------------------------- 1 | diff -r 44299fd3d1a8 added_file 2 | --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3 | +++ b/added_file Sun Oct 13 20:51:40 2013 -0300 4 | @@ -0,0 +1,4 @@ 5 | +This was missing! 6 | +Adding it now. 7 | + 8 | +Only for testing purposes. 9 | \ No newline at end of file 10 | diff -r 44299fd3d1a8 modified_file 11 | --- a/modified_file Sun Oct 13 20:51:07 2013 -0300 12 | +++ b/modified_file Sun Oct 13 20:51:40 2013 -0300 13 | @@ -1,5 +1,7 @@ 14 | This is the original content. 15 | 16 | -This should be updated. 17 | +This is now updated. 18 | + 19 | +This is a new line. 20 | 21 | This will stay. 22 | \ No newline at end of file 23 | diff -r 44299fd3d1a8 removed_file 24 | --- a/removed_file Sun Oct 13 20:51:07 2013 -0300 25 | +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 26 | @@ -1,3 +0,0 @@ 27 | -This content shouldn't be here. 28 | - 29 | -This file will be removed. 30 | \ No newline at end of file 31 | -------------------------------------------------------------------------------- /tests/samples/sample0.diff: -------------------------------------------------------------------------------- 1 | --- /path/to/original ''timestamp'' 2 | +++ /path/to/new ''timestamp'' 3 | @@ -1,3 +1,9 @@ Section Header 4 | +This is an important 5 | +notice! It should 6 | +therefore be located at 7 | +the beginning of this 8 | +document! 9 | + 10 | This part of the 11 | document has stayed the 12 | same from version to 13 | @@ -5,16 +11,10 @@ 14 | be shown if it doesn't 15 | change. Otherwise, that 16 | would not be helping to 17 | -compress the size of the 18 | -changes. 19 | - 20 | -This paragraph contains 21 | -text that is outdated. 22 | -It will be deleted in the 23 | -near future. 24 | +compress anything. 25 | 26 | It is important to spell 27 | -check this dokument. On 28 | +check this document. On 29 | the other hand, a 30 | misspelled word isn't 31 | the end of the world. 32 | @@ -22,3 +22,7 @@ 33 | this paragraph needs to 34 | be changed. Things can 35 | be added after it. 36 | + 37 | +This paragraph contains 38 | +important new additions 39 | +to this document. 40 | --- /dev/null 41 | +++ /path/to/another_new 42 | @@ -0,0 +1,9 @@ 43 | +This is an important 44 | +notice! It should 45 | +therefore be located at 46 | +the beginning of this 47 | +document! 48 | + 49 | +This part of the 50 | +document has stayed the 51 | +same from version to 52 | --- /path/to/existing 53 | +++ /dev/null 54 | @@ -1,9 +0,0 @@ 55 | -This is an important 56 | -notice! It should 57 | -therefore be located at 58 | -the beginning of this 59 | -document! 60 | - 61 | -This part of the 62 | -document has stayed the 63 | -same from version to 64 | -------------------------------------------------------------------------------- /tests/samples/sample1.diff: -------------------------------------------------------------------------------- 1 | --- /path/to/original ''timestamp'' 2 | +++ /path/to/new ''timestamp'' 3 | @@ -1,3 +1,9 @@ 4 | +This is an important 5 | +notice! It should 6 | +therefore be located at 7 | +the beginning of this 8 | +document! 9 | + 10 | This part of the 11 | document has stayed the 12 | same from version to 13 | @@ -5,16 +11,13 @@ 14 | be shown if it doesn't 15 | change. Otherwise, that 16 | would not be helping to 17 | -compress the size of the 18 | -changes. 19 | - 20 | -This paragraph contains 21 | -text that is outdated. 22 | -It will be deleted in the 23 | -near future. 24 | +compress anything. 25 | 26 | It is important to spell 27 | -check this dokument. On 28 | +check this document. On 29 | the other hand, a 30 | misspelled word isn't 31 | the end of the world. 32 | @@ -22,3 +22,7 @@ 33 | this paragraph needs to 34 | be changed. Things can 35 | be added after it. 36 | + 37 | +This paragraph contains 38 | +important new additions 39 | +to this document. 40 | -------------------------------------------------------------------------------- /tests/samples/sample2.diff: -------------------------------------------------------------------------------- 1 | # HG changeset patch 2 | # Parent 13ba6cbdb304cd251fbc22466cadb21019ee817f 3 | # User Bill McCloskey 4 | 5 | diff --git a/content/base/src/nsContentUtils.cpp b/content/base/src/nsContentUtils.cpp 6 | --- a/content/base/src/nsContentUtils.cpp 7 | +++ b/content/base/src/nsContentUtils.cpp 8 | @@ -6369,17 +6369,17 @@ public: 9 | nsCycleCollectionParticipant* helper) 10 | { 11 | } 12 | 13 | NS_IMETHOD_(void) NoteNextEdgeName(const char* name) 14 | { 15 | } 16 | 17 | - NS_IMETHOD_(void) NoteWeakMapping(void* map, void* key, void* val) 18 | + NS_IMETHOD_(void) NoteWeakMapping(void* map, void* key, void* kdelegate, void* val) 19 | { 20 | } 21 | 22 | bool mFound; 23 | 24 | private: 25 | void* mWrapper; 26 | }; 27 | diff --git a/js/src/jsfriendapi.cpp b/js/src/jsfriendapi.cpp 28 | --- a/js/src/jsfriendapi.cpp 29 | +++ b/js/src/jsfriendapi.cpp 30 | @@ -527,16 +527,24 @@ js::VisitGrayWrapperTargets(JSCompartmen 31 | { 32 | for (WrapperMap::Enum e(comp->crossCompartmentWrappers); !e.empty(); e.popFront()) { 33 | gc::Cell *thing = e.front().key.wrapped; 34 | if (thing->isMarked(gc::GRAY)) 35 | callback(closure, thing); 36 | } 37 | } 38 | 39 | +JS_FRIEND_API(JSObject *) 40 | +js::GetWeakmapKeyDelegate(JSObject *key) 41 | +{ 42 | + if (JSWeakmapKeyDelegateOp op = key->getClass()->ext.weakmapKeyDelegateOp) 43 | + return op(key); 44 | + return NULL; 45 | +} 46 | + 47 | JS_FRIEND_API(void) 48 | JS_SetAccumulateTelemetryCallback(JSRuntime *rt, JSAccumulateTelemetryDataCallback callback) 49 | { 50 | rt->telemetryCallback = callback; 51 | } 52 | 53 | JS_FRIEND_API(JSObject *) -------------------------------------------------------------------------------- /tests/samples/sample3.diff: -------------------------------------------------------------------------------- 1 | === added file 'added_file' 2 | --- added_file 1970-01-01 00:00:00 +0000 3 | +++ added_file 2013-10-13 23:44:04 +0000 4 | @@ -0,0 +1,4 @@ 5 | +This was missing! 6 | +holá mundo! 7 | + 8 | +Only for testing purposes. 9 | \ No newline at end of file 10 | 11 | === modified file 'modified_file' 12 | --- modified_file 2013-10-13 23:53:13 +0000 13 | +++ modified_file 2013-10-13 23:53:26 +0000 14 | @@ -1,5 +1,7 @@ 15 | This is the original content. 16 | 17 | -This should be updated. 18 | +This is now updated. 19 | + 20 | +This is a new line. 21 | 22 | -This will stay. 23 | \ No newline at end of file 24 | +This will stay. 25 | 26 | === removed file 'removed_file' 27 | --- removed_file 2013-10-13 23:53:13 +0000 28 | +++ removed_file 1970-01-01 00:00:00 +0000 29 | @@ -1,3 +0,0 @@ 30 | -This content shouldn't be here. 31 | - 32 | -This file will be removed. 33 | \ No newline at end of file 34 | 35 | 36 | -------------------------------------------------------------------------------- /tests/samples/sample4.diff: -------------------------------------------------------------------------------- 1 | === added file 'added_file' 2 | --- added_file 1970-01-01 00:00:00 +0000 3 | +++ added_file 2013-10-13 23:44:04 +0000 4 | @@ -0,0 +1,4 @@ 5 | +This was missing! 6 | +holá mundo! 7 | + 8 | +Only for testing purposes. 9 | \ No newline at end of file 10 | 11 | === modified file 'modified_file' 12 | --- modified_file 2013-10-13 23:53:13 +0000 13 | +++ modified_file 2013-10-13 23:53:26 +0000 14 | @@ -1,5 +1,7 @@ 15 | This is the original content. 16 | 17 | -This should be updated. 18 | +This is now updated. 19 | + 20 | +This is a new line. 21 | 22 | This will stay. 23 | \ No newline at end of file 24 | 25 | === removed file 'removed_file' 26 | --- removed_file 2013-10-13 23:53:13 +0000 27 | +++ removed_file 1970-01-01 00:00:00 +0000 28 | @@ -1,3 +0,0 @@ 29 | -This content shouldn't be here. 30 | - 31 | -This file will be removed. 32 | \ No newline at end of file 33 | 34 | 35 | -------------------------------------------------------------------------------- /tests/samples/sample5.diff: -------------------------------------------------------------------------------- 1 | === modified file 'modified_file1' 2 | --- modified_file1 2013-10-13 23:53:13 +0000 3 | +++ modified_file1 2013-10-13 23:53:26 +0000 4 | @@ -1,5 +1,7 @@ 5 | This is the original content. 6 | 7 | -This should be updated. 8 | +This is now updated. 9 | + 10 | +This is a new line. 11 | 12 | This will stay. 13 | \ No newline at end of file 14 | 15 | === modified file 'modified_file2' 16 | --- modified_file2 2013-10-13 23:53:13 +0000 17 | +++ modified_file2 2013-10-13 23:53:26 +0000 18 | @@ -1,5 +1,7 @@ 19 | This is the original content. 20 | 21 | -This should be updated. 22 | +This is now updated. 23 | + 24 | +This is a new line. 25 | 26 | This will stay. 27 | \ No newline at end of file 28 | 29 | 30 | -------------------------------------------------------------------------------- /tests/samples/sample6.diff: -------------------------------------------------------------------------------- 1 | --- /path/to/original ''timestamp'' 2 | +++ /path/to/new ''timestamp'' 3 | @@ -1,3 +1,9 @@ 4 | +This is an important 5 | +notice! It should 6 | +therefore be located at 7 | +the beginning of this 8 | +document! 9 | + 10 | This part of the 11 | document has stayed the 12 | same from version to 13 | @@ -5,16 +11,13 @@ 14 | be shown if it doesn't 15 | change. Otherwise, that 16 | would not be helping to 17 | -compress the size of the 18 | -changes. 19 | - 20 | -This paragraph contains 21 | -text that is outdated. 22 | -It will be deleted in the 23 | -near future. 24 | +compress anything. 25 | 26 | It is important to spell 27 | -check this dokument. On 28 | +check this document. On 29 | the other hand, a 30 | misspelled word isn't 31 | the end of the world. 32 | this paragraph needs to 33 | be changed. Things can 34 | be added after it. 35 | + 36 | +This paragraph contains 37 | +important new additions 38 | +to this document. 39 | -------------------------------------------------------------------------------- /tests/samples/sample7.diff: -------------------------------------------------------------------------------- 1 | --- /path/to/original ''timestamp'' 2 | +++ /path/to/new ''timestamp'' 3 | @@ -1,3 +1,9 @@ 4 | +This is an important 5 | +notice! It should 6 | +therefore be located at 7 | +the beginning of this 8 | +document! 9 | + 10 | This part of the 11 | document has stayed the 12 | same from version to 13 | @@ -5,16 +11,13 @@ 14 | be shown if it doesn't 15 | change. Otherwise, that 16 | would not be helping to 17 | -compress the size of the 18 | -changes. 19 | - 20 | -This paragraph contains 21 | -text that is outdated. 22 | +compress anything. 23 | 24 | It is important to spell 25 | -check this dokument. On 26 | +check this document. On 27 | the other hand, a 28 | misspelled word isn't 29 | the end of the world. 30 | -------------------------------------------------------------------------------- /tests/samples/sample8.diff: -------------------------------------------------------------------------------- 1 | diff --git a/boo.bin b/boo.bin 2 | new file mode 100644 3 | index 0000000..ae000000 4 | diff --git a/foo.bin b/foo.bin 5 | new file mode 100644 6 | index 0000000..af000000 7 | Binary files /dev/null and b/foo.bin differ 8 | diff --git a/bar.bin b/bar.bin 9 | index ad000000..ac000000 100644 10 | Binary files a/bar.bin and b/bar.bin differ 11 | diff --git a/baz.bin b/baz.bin 12 | deleted file mode 100644 13 | index af000000..0000000 14 | Binary files a/baz.bin and /dev/null differ 15 | diff --git a/fuz.bin b/fuz.bin 16 | new file mode 100644 17 | index 0000000..ae000000 18 | -------------------------------------------------------------------------------- /tests/samples/svn.diff: -------------------------------------------------------------------------------- 1 | Index: modified_file 2 | =================================================================== 3 | --- modified_file (revision 191) 4 | +++ modified_file (working copy) 5 | @@ -1,5 +1,7 @@ 6 | This is the original content. 7 | 8 | -This should be updated. 9 | +This is now updated. 10 | 11 | +This is a new line. 12 | + 13 | This will stay. 14 | \ No newline at end of file 15 | Index: removed_file 16 | =================================================================== 17 | --- removed_file (revision 188) 18 | +++ removed_file (working copy) 19 | @@ -1,3 +0,0 @@ 20 | -This content shouldn't be here. 21 | - 22 | -This file will be removed. 23 | \ No newline at end of file 24 | Index: added_file 25 | =================================================================== 26 | --- added_file (revision 0) 27 | +++ added_file (revision 0) 28 | @@ -0,0 +1,4 @@ 29 | +This was missing! 30 | +Adding it now. 31 | + 32 | +Only for testing purposes. 33 | \ No newline at end of file 34 | -------------------------------------------------------------------------------- /tests/test_hunks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # The MIT License (MIT) 4 | # Copyright (c) 2014-2017 Matias Bordese 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | 25 | """Tests for Hunk.""" 26 | 27 | from __future__ import unicode_literals 28 | 29 | import unittest 30 | 31 | from unidiff.patch import ( 32 | LINE_TYPE_ADDED, 33 | LINE_TYPE_CONTEXT, 34 | LINE_TYPE_REMOVED, 35 | Hunk, 36 | Line, 37 | ) 38 | 39 | 40 | class TestHunk(unittest.TestCase): 41 | """Tests for Hunk.""" 42 | 43 | def setUp(self): 44 | super(TestHunk, self).setUp() 45 | self.context_line = Line('Sample line', line_type=LINE_TYPE_CONTEXT) 46 | self.added_line = Line('Sample line', line_type=LINE_TYPE_ADDED) 47 | self.removed_line = Line('Sample line', line_type=LINE_TYPE_REMOVED) 48 | 49 | def test_missing_length(self): 50 | hunk = Hunk(src_len=None, tgt_len=None) 51 | hunk.append(self.context_line) 52 | self.assertTrue(hunk.is_valid()) 53 | 54 | def test_default_is_valid(self): 55 | hunk = Hunk() 56 | self.assertTrue(hunk.is_valid()) 57 | 58 | def test_missing_data_is_not_valid(self): 59 | hunk = Hunk(src_len=1, tgt_len=1) 60 | self.assertFalse(hunk.is_valid()) 61 | 62 | def test_append_context(self): 63 | hunk = Hunk(src_len=1, tgt_len=1) 64 | hunk.append(self.context_line) 65 | self.assertTrue(hunk.is_valid()) 66 | self.assertEqual(len(hunk.source), 1) 67 | self.assertEqual(hunk.target, hunk.source) 68 | self.assertIn(str(self.context_line), hunk.source) 69 | source_lines = list(hunk.source_lines()) 70 | target_lines = list(hunk.target_lines()) 71 | self.assertEqual(target_lines, source_lines) 72 | self.assertEqual(target_lines, [self.context_line]) 73 | 74 | def test_append_added_line(self): 75 | hunk = Hunk(src_len=0, tgt_len=1) 76 | hunk.append(self.added_line) 77 | self.assertTrue(hunk.is_valid()) 78 | self.assertEqual(len(hunk.target), 1) 79 | self.assertEqual(hunk.source, []) 80 | self.assertIn(str(self.added_line), hunk.target) 81 | target_lines = list(hunk.target_lines()) 82 | self.assertEqual(target_lines, [self.added_line]) 83 | 84 | def test_append_deleted_line(self): 85 | hunk = Hunk(src_len=1, tgt_len=0) 86 | hunk.append(self.removed_line) 87 | self.assertTrue(hunk.is_valid()) 88 | self.assertEqual(len(hunk.source), 1) 89 | self.assertEqual(hunk.target, []) 90 | self.assertIn(str(self.removed_line), hunk.source) 91 | source_lines = list(hunk.source_lines()) 92 | self.assertEqual(source_lines, [self.removed_line]) 93 | -------------------------------------------------------------------------------- /tests/test_line.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # The MIT License (MIT) 4 | # Copyright (c) 2017 Matias Bordese 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | 25 | """Tests for Line.""" 26 | 27 | from __future__ import unicode_literals 28 | 29 | import unittest 30 | 31 | from unidiff.patch import ( 32 | LINE_TYPE_ADDED, 33 | LINE_TYPE_CONTEXT, 34 | LINE_TYPE_REMOVED, 35 | Line, 36 | ) 37 | 38 | 39 | class TestLine(unittest.TestCase): 40 | """Tests for Line.""" 41 | 42 | def setUp(self): 43 | super(TestLine, self).setUp() 44 | self.context_line = Line('Sample line', line_type=LINE_TYPE_CONTEXT) 45 | self.added_line = Line('Sample line', line_type=LINE_TYPE_ADDED) 46 | self.removed_line = Line('Sample line', line_type=LINE_TYPE_REMOVED) 47 | 48 | def test_str(self): 49 | self.assertEqual(str(self.added_line), '+Sample line') 50 | 51 | def test_repr(self): 52 | self.assertEqual(repr(self.added_line), '') 53 | 54 | def test_equal(self): 55 | other = Line('Sample line', line_type=LINE_TYPE_ADDED) 56 | self.assertEqual(self.added_line, other) 57 | 58 | def test_not_equal(self): 59 | self.assertNotEqual(self.added_line, self.removed_line) 60 | 61 | def test_is_added(self): 62 | self.assertTrue(self.added_line.is_added) 63 | self.assertFalse(self.context_line.is_added) 64 | self.assertFalse(self.removed_line.is_added) 65 | 66 | def test_is_removed(self): 67 | self.assertTrue(self.removed_line.is_removed) 68 | self.assertFalse(self.added_line.is_removed) 69 | self.assertFalse(self.context_line.is_removed) 70 | 71 | def test_is_context(self): 72 | self.assertTrue(self.context_line.is_context) 73 | self.assertFalse(self.added_line.is_context) 74 | self.assertFalse(self.removed_line.is_context) 75 | -------------------------------------------------------------------------------- /tests/test_parser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # The MIT License (MIT) 4 | # Copyright (c) 2014-2023 Matias Bordese 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | 25 | """Tests for the unified diff parser process.""" 26 | 27 | from __future__ import unicode_literals 28 | 29 | import codecs 30 | import os.path 31 | import unittest 32 | 33 | from unidiff import PatchSet 34 | from unidiff.patch import PY2 35 | from unidiff.errors import UnidiffParseError 36 | 37 | if not PY2: 38 | unicode = str 39 | 40 | class TestUnidiffParser(unittest.TestCase): 41 | """Tests for Unified Diff Parser.""" 42 | 43 | def setUp(self): 44 | super(TestUnidiffParser, self).setUp() 45 | self.samples_dir = os.path.dirname(os.path.realpath(__file__)) 46 | self.sample_file = os.path.join( 47 | self.samples_dir, 'samples/sample0.diff') 48 | self.sample_bad_file = os.path.join( 49 | self.samples_dir, 'samples/sample1.diff') 50 | 51 | def test_missing_encoding(self): 52 | utf8_file = os.path.join(self.samples_dir, 'samples/sample3.diff') 53 | # read bytes 54 | with open(utf8_file, 'rb') as diff_file: 55 | if PY2: 56 | self.assertRaises(UnicodeDecodeError, PatchSet, diff_file) 57 | else: 58 | # unicode expected 59 | self.assertRaises(TypeError, PatchSet, diff_file) 60 | 61 | def test_encoding_param(self): 62 | utf8_file = os.path.join(self.samples_dir, 'samples/sample3.diff') 63 | with open(utf8_file, 'rb') as diff_file: 64 | res = PatchSet(diff_file, encoding='utf-8') 65 | 66 | # 3 files updated by diff 67 | self.assertEqual(len(res), 3) 68 | added_unicode_line = res.added_files[0][0][1] 69 | self.assertEqual(added_unicode_line.value, 'holá mundo!\n') 70 | 71 | def test_no_newline_at_end_of_file(self): 72 | utf8_file = os.path.join(self.samples_dir, 'samples/sample3.diff') 73 | with open(utf8_file, 'rb') as diff_file: 74 | res = PatchSet(diff_file, encoding='utf-8') 75 | 76 | # 3 files updated by diff 77 | self.assertEqual(len(res), 3) 78 | added_unicode_line = res.added_files[0][0][4] 79 | self.assertEqual(added_unicode_line.line_type, '\\') 80 | self.assertEqual(added_unicode_line.value, ' No newline at end of file\n') 81 | added_unicode_line = res.modified_files[0][0][8] 82 | self.assertEqual(added_unicode_line.line_type, '\\') 83 | self.assertEqual(added_unicode_line.value, ' No newline at end of file\n') 84 | 85 | def test_preserve_dos_line_endings(self): 86 | utf8_file = os.path.join(self.samples_dir, 'samples/sample4.diff') 87 | with open(utf8_file, 'rb') as diff_file: 88 | res = PatchSet(diff_file, encoding='utf-8') 89 | 90 | # 3 files updated by diff 91 | self.assertEqual(len(res), 3) 92 | added_unicode_line = res.added_files[0][0][1] 93 | self.assertEqual(added_unicode_line.value, 'holá mundo!\r\n') 94 | 95 | def test_preserve_dos_line_endings_empty_line_type(self): 96 | utf8_file = os.path.join(self.samples_dir, 'samples/sample5.diff') 97 | with open(utf8_file, 'rb') as diff_file: 98 | res = PatchSet(diff_file, encoding='utf-8') 99 | 100 | # 2 files updated by diff 101 | self.assertEqual(len(res), 2) 102 | modified_unicode_line = res.modified_files[0][0][6] 103 | self.assertEqual(modified_unicode_line.value, '\r\n') 104 | self.assertEqual(modified_unicode_line.line_type, ' ') 105 | 106 | modified_unicode_line = res.modified_files[1][0][6] 107 | self.assertEqual(modified_unicode_line.value, '\n') 108 | self.assertEqual(modified_unicode_line.line_type, ' ') 109 | 110 | def test_print_hunks_without_gaps(self): 111 | with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file: 112 | res = PatchSet(diff_file) 113 | lines = unicode(res).splitlines() 114 | self.assertEqual(lines[12], '@@ -5,16 +11,10 @@') 115 | self.assertEqual(lines[31], '@@ -22,3 +22,7 @@') 116 | 117 | def _test_parse_sample(self, metadata_only): 118 | """Parse sample file.""" 119 | with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file: 120 | res = PatchSet(diff_file, metadata_only=metadata_only) 121 | 122 | # three file in the patch 123 | self.assertEqual(len(res), 3) 124 | # three hunks 125 | self.assertEqual(len(res[0]), 3) 126 | 127 | # first file is modified 128 | self.assertTrue(res[0].is_modified_file) 129 | self.assertFalse(res[0].is_removed_file) 130 | self.assertFalse(res[0].is_added_file) 131 | self.assertFalse(res[0].is_binary_file) 132 | 133 | # Hunk 1: five additions, no deletions, a section header 134 | self.assertEqual(res[0][0].added, 6) 135 | self.assertEqual(res[0][0].removed, 0) 136 | self.assertEqual(res[0][0].section_header, 'Section Header') 137 | 138 | # Hunk 2: 2 additions, 8 deletions, no section header 139 | self.assertEqual(res[0][1].added, 2) 140 | self.assertEqual(res[0][1].removed, 8) 141 | self.assertEqual(res[0][1].section_header, '') 142 | 143 | # Hunk 3: four additions, no deletions, no section header 144 | self.assertEqual(res[0][2].added, 4) 145 | self.assertEqual(res[0][2].removed, 0) 146 | self.assertEqual(res[0][2].section_header, '') 147 | 148 | # Check file totals 149 | self.assertEqual(res[0].added, 12) 150 | self.assertEqual(res[0].removed, 8) 151 | 152 | # second file is added 153 | self.assertFalse(res[1].is_modified_file) 154 | self.assertFalse(res[1].is_removed_file) 155 | self.assertTrue(res[1].is_added_file) 156 | self.assertFalse(res[1].is_binary_file) 157 | 158 | # third file is removed 159 | self.assertFalse(res[2].is_modified_file) 160 | self.assertTrue(res[2].is_removed_file) 161 | self.assertFalse(res[2].is_added_file) 162 | self.assertFalse(res[2].is_binary_file) 163 | 164 | self.assertEqual(res.added, 21) 165 | self.assertEqual(res.removed, 17) 166 | 167 | def test_parse_sample_full(self): 168 | self._test_parse_sample(metadata_only=False) 169 | 170 | def test_parse_sample_metadata_only(self): 171 | self._test_parse_sample(metadata_only=True) 172 | 173 | def test_patchset_compare(self): 174 | with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file: 175 | ps1 = PatchSet(diff_file) 176 | 177 | with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file: 178 | ps2 = PatchSet(diff_file) 179 | 180 | other_file = os.path.join(self.samples_dir, 'samples/sample3.diff') 181 | with open(other_file, 'rb') as diff_file: 182 | ps3 = PatchSet(diff_file, encoding='utf-8') 183 | 184 | self.assertEqual(ps1, ps2) 185 | self.assertNotEqual(ps1, ps3) 186 | 187 | def test_patchset_from_string(self): 188 | with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file: 189 | diff_data = diff_file.read() 190 | ps1 = PatchSet.from_string(diff_data) 191 | 192 | with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file: 193 | ps2 = PatchSet(diff_file) 194 | 195 | self.assertEqual(ps1, ps2) 196 | 197 | def test_patchset_from_bytes_string(self): 198 | with codecs.open(self.sample_file, 'rb') as diff_file: 199 | diff_data = diff_file.read() 200 | ps1 = PatchSet.from_string(diff_data, encoding='utf-8') 201 | 202 | with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file: 203 | ps2 = PatchSet(diff_file) 204 | 205 | self.assertEqual(ps1, ps2) 206 | 207 | def test_patchset_string_input(self): 208 | with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file: 209 | diff_data = diff_file.read() 210 | ps1 = PatchSet(diff_data) 211 | 212 | with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file: 213 | ps2 = PatchSet(diff_file) 214 | 215 | self.assertEqual(ps1, ps2) 216 | 217 | def test_parse_malformed_diff(self): 218 | """Parse malformed file.""" 219 | with open(self.sample_bad_file) as diff_file: 220 | self.assertRaises(UnidiffParseError, PatchSet, diff_file) 221 | 222 | def test_parse_malformed_diff_longer_than_expected(self): 223 | """Parse malformed file with non-terminated hunk.""" 224 | utf8_file = os.path.join(self.samples_dir, 'samples/sample6.diff') 225 | with open(utf8_file, 'r') as diff_file: 226 | self.assertRaises(UnidiffParseError, PatchSet, diff_file) 227 | 228 | def test_parse_malformed_diff_shorter_than_expected(self): 229 | """Parse malformed file with non-terminated hunk.""" 230 | utf8_file = os.path.join(self.samples_dir, 'samples/sample7.diff') 231 | with open(utf8_file, 'r') as diff_file: 232 | self.assertRaises(UnidiffParseError, PatchSet, diff_file) 233 | 234 | def test_from_filename_with_cr_in_diff_text_files(self): 235 | """Parse git diff text files that contain CR""" 236 | utf8_file = os.path.join(self.samples_dir, 'samples/git_cr.diff') 237 | self.assertRaises(UnidiffParseError, PatchSet.from_filename, utf8_file) 238 | 239 | ps1 = PatchSet.from_filename(utf8_file, newline='\n') 240 | import io 241 | with io.open(utf8_file, 'r', newline='\n') as diff_file: 242 | ps2 = PatchSet(diff_file) 243 | 244 | self.assertEqual(ps1, ps2) 245 | 246 | def test_parse_diff_with_new_and_modified_binary_files(self): 247 | """Parse git diff file with newly added and modified binaries files.""" 248 | utf8_file = os.path.join(self.samples_dir, 'samples/sample8.diff') 249 | with open(utf8_file, 'r') as diff_file: 250 | res = PatchSet(diff_file) 251 | 252 | # three file in the patch 253 | self.assertEqual(len(res), 5) 254 | 255 | # first empty file is added 256 | self.assertFalse(res[0].is_modified_file) 257 | self.assertFalse(res[0].is_removed_file) 258 | self.assertTrue(res[0].is_added_file) 259 | self.assertFalse(res[0].is_binary_file) 260 | 261 | # second file is added 262 | self.assertFalse(res[1].is_modified_file) 263 | self.assertFalse(res[1].is_removed_file) 264 | self.assertTrue(res[1].is_added_file) 265 | self.assertTrue(res[1].is_binary_file) 266 | 267 | # third file is modified 268 | self.assertTrue(res[2].is_modified_file) 269 | self.assertFalse(res[2].is_removed_file) 270 | self.assertFalse(res[2].is_added_file) 271 | self.assertTrue(res[2].is_binary_file) 272 | 273 | # fourth file is removed 274 | self.assertFalse(res[3].is_modified_file) 275 | self.assertTrue(res[3].is_removed_file) 276 | self.assertFalse(res[3].is_added_file) 277 | self.assertTrue(res[3].is_binary_file) 278 | 279 | # fifth empty file is added 280 | self.assertFalse(res[4].is_modified_file) 281 | self.assertFalse(res[4].is_removed_file) 282 | self.assertTrue(res[4].is_added_file) 283 | self.assertFalse(res[4].is_binary_file) 284 | 285 | def test_parse_round_trip_with_binary_files_in_diff(self): 286 | """Parse git diff with binary files though round trip""" 287 | utf8_file = os.path.join(self.samples_dir, 'samples/sample8.diff') 288 | with open(utf8_file, 'r') as diff_file: 289 | res1 = PatchSet(diff_file) 290 | 291 | res2 = PatchSet(str(res1)) 292 | self.assertEqual(res1, res2) 293 | 294 | def test_parse_diff_git_no_prefix(self): 295 | utf8_file = os.path.join(self.samples_dir, 'samples/git_no_prefix.diff') 296 | with open(utf8_file, 'r') as diff_file: 297 | res = PatchSet(diff_file) 298 | 299 | self.assertEqual(len(res), 3) 300 | 301 | self.assertEqual(res[0].source_file, 'file1') 302 | self.assertEqual(res[0].target_file, '/dev/null') 303 | self.assertTrue(res[0].is_removed_file) 304 | self.assertEqual(res[0].path, 'file1') 305 | 306 | self.assertEqual(res[1].source_file, 'file2') 307 | self.assertEqual(res[1].target_file, 'file2') 308 | self.assertTrue(res[1].is_modified_file) 309 | self.assertEqual(res[1].path, 'file2') 310 | 311 | self.assertEqual(res[2].source_file, '/dev/null') 312 | self.assertEqual(res[2].target_file, 'file3') 313 | self.assertTrue(res[2].is_added_file) 314 | self.assertEqual(res[2].path, 'file3') 315 | 316 | def test_parse_filename_with_spaces(self): 317 | filename = os.path.join(self.samples_dir, 'samples/git_filenames_with_spaces.diff') 318 | with open(filename) as f: 319 | res = PatchSet(f) 320 | 321 | self.assertEqual(len(res), 1) 322 | 323 | self.assertEqual(res[0].source_file, '/dev/null') 324 | self.assertEqual(res[0].target_file, 'b/has spaces/t.sql') 325 | self.assertTrue(res[0].is_added_file) 326 | self.assertEqual(res[0].path, 'has spaces/t.sql') 327 | 328 | def test_parse_filename_prefix_with_spaces(self): 329 | filename = os.path.join(self.samples_dir, 'samples/git_filenames_with_spaces_prefix.diff') 330 | with open(filename) as f: 331 | res = PatchSet(f) 332 | 333 | self.assertEqual(len(res), 1) 334 | 335 | self.assertEqual(res[0].source_file, '/dev/null') 336 | self.assertEqual(res[0].target_file, 'dst://foo bar/baz') 337 | self.assertTrue(res[0].is_added_file) 338 | self.assertEqual(res[0].path, 'dst://foo bar/baz') 339 | 340 | def test_parse_quoted_filename(self): 341 | filename = os.path.join(self.samples_dir, 'samples/git_quoted_filename.diff') 342 | with open(filename) as f: 343 | res = PatchSet(f) 344 | 345 | self.assertEqual(len(res), 1) 346 | 347 | self.assertEqual(res[0].source_file, '/dev/null') 348 | self.assertEqual(res[0].target_file, '"b/A \\303\\242 B.py"') 349 | self.assertTrue(res[0].is_added_file) 350 | self.assertEqual(res[0].path, '"A \\303\\242 B.py"') 351 | 352 | 353 | def test_deleted_file(self): 354 | filename = os.path.join(self.samples_dir, 'samples/git_delete.diff') 355 | with open(filename) as f: 356 | res = PatchSet(f) 357 | 358 | self.assertEqual(len(res), 1) 359 | self.assertEqual(res[0].source_file, 'a/somefile.c') 360 | self.assertEqual(res[0].target_file, '/dev/null') 361 | self.assertTrue(res[0].is_removed_file) 362 | 363 | def test_diff_lines_linenos(self): 364 | with open(self.sample_file, 'rb') as diff_file: 365 | res = PatchSet(diff_file, encoding='utf-8') 366 | 367 | target_line_nos = [] 368 | source_line_nos = [] 369 | diff_line_nos = [] 370 | for diff_file in res: 371 | for hunk in diff_file: 372 | for line in hunk: 373 | target_line_nos.append(line.target_line_no) 374 | source_line_nos.append(line.source_line_no) 375 | diff_line_nos.append(line.diff_line_no) 376 | 377 | expected_target_line_nos = [ 378 | # File: 1, Hunk: 1 379 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 380 | # File: 1, Hunk: 2 381 | 11, 12, 13, None, None, None, None, None, None, None, 14, 15, 16, None, 17, 18, 19, 20, 382 | # File: 1, Hunk: 3 383 | 22, 23, 24, 25, 26, 27, 28, 384 | # File: 2, Hunk 1 385 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 386 | # File: 3, Hunk 1 387 | None, None, None, None, None, None, None, None, None, 388 | ] 389 | expected_source_line_nos = [ 390 | # File: 1, Hunk: 1 391 | None, None, None, None, None, None, 1, 2, 3, 392 | # File: 1, Hunk: 2 393 | 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, None, 15, 16, 17, None, 18, 19, 20, 394 | # File: 1, Hunk: 3 395 | 22, 23, 24, None, None, None, None, 396 | # File: 2, Hunk 1 397 | None, None, None, None, None, None, None, None, None, 398 | # File: 3, Hunk 1 399 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 400 | ] 401 | expected_diff_line_nos = [ 402 | # File: 1, Hunk: 1 403 | 4, 5, 6, 7, 8, 9, 10, 11, 12, 404 | # File: 1, Hunk: 2 405 | 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 406 | # File: 1, Hunk: 3 407 | 33, 34, 35, 36, 37, 38, 39, 408 | # File: 2, Hunk 1 409 | 43, 44, 45, 46, 47, 48, 49, 50, 51, 410 | # File: 3, Hunk 1 411 | 55, 56, 57, 58, 59, 60, 61, 62, 63, 412 | ] 413 | 414 | self.assertEqual(target_line_nos, expected_target_line_nos) 415 | self.assertEqual(source_line_nos, expected_source_line_nos) 416 | self.assertEqual(diff_line_nos, expected_diff_line_nos) 417 | 418 | def test_diff_hunk_positions(self): 419 | with open(self.sample_file, 'rb') as diff_file: 420 | res = PatchSet(diff_file, encoding='utf-8') 421 | self.do_test_diff_hunk_positions(res) 422 | 423 | def test_diff_metadata_only(self): 424 | with open(self.sample_file, 'rb') as diff_file: 425 | res = PatchSet(diff_file, encoding='utf-8', metadata_only=True) 426 | self.do_test_diff_hunk_positions(res) 427 | 428 | def do_test_diff_hunk_positions(self, res): 429 | hunk_positions = [] 430 | for diff_file in res: 431 | for hunk in diff_file: 432 | hunk_positions.append((hunk.source_start, hunk.target_start, 433 | hunk.source_length, hunk.target_length)) 434 | 435 | expected_hunk_positions = [ 436 | # File: 1, Hunk: 1 437 | (1, 1, 3, 9), 438 | # File: 1, Hunk: 2 439 | (5, 11, 16, 10), 440 | # File: 1, Hunk: 3 441 | (22, 22, 3, 7), 442 | # File: 2, Hunk: 1 443 | (0, 1, 0, 9), 444 | # File: 3, Hunk: 1 445 | (1, 0, 9, 0) 446 | ] 447 | 448 | self.assertEqual(hunk_positions, expected_hunk_positions) 449 | 450 | def test_binary_patch(self): 451 | utf8_file = os.path.join(self.samples_dir, 'samples/binary.diff') 452 | with open(utf8_file, 'r') as diff_file: 453 | res = PatchSet(diff_file) 454 | self.assertEqual(len(res), 1) 455 | patch = res[0] 456 | self.assertEqual(patch.source_file, '/dev/null') 457 | self.assertEqual(patch.target_file, 'b/1x1.png') 458 | self.assertTrue(patch.is_binary_file) 459 | self.assertTrue(patch.is_added_file) 460 | 461 | class TestVCSSamples(unittest.TestCase): 462 | """Tests for real examples from VCS.""" 463 | 464 | samples = ['bzr.diff', 'git.diff', 'hg.diff', 'svn.diff'] 465 | 466 | def test_samples(self): 467 | tests_dir = os.path.dirname(os.path.realpath(__file__)) 468 | for fname in self.samples: 469 | file_path = os.path.join(tests_dir, 'samples', fname) 470 | with codecs.open(file_path, 'r', encoding='utf-8') as diff_file: 471 | res = PatchSet(diff_file) 472 | 473 | # 3 files updated by diff 474 | self.assertEqual(len(res), 3) 475 | 476 | # 1 added file 477 | added_files = res.added_files 478 | self.assertEqual(len(added_files), 1) 479 | self.assertEqual(added_files[0].path, 'added_file') 480 | # 1 hunk, 4 lines 481 | self.assertEqual(len(added_files[0]), 1) 482 | self.assertEqual(added_files[0].added, 4) 483 | self.assertEqual(added_files[0].removed, 0) 484 | 485 | # 1 removed file 486 | removed_files = res.removed_files 487 | self.assertEqual(len(removed_files), 1) 488 | self.assertEqual(removed_files[0].path, 'removed_file') 489 | # 1 hunk, 3 removed lines 490 | self.assertEqual(len(removed_files[0]), 1) 491 | self.assertEqual(removed_files[0].added, 0) 492 | self.assertEqual(removed_files[0].removed, 3) 493 | 494 | # 1 modified file 495 | modified_files = res.modified_files 496 | self.assertEqual(len(modified_files), 1) 497 | self.assertEqual(modified_files[0].path, 'modified_file') 498 | # 1 hunk, 3 added lines, 1 removed line 499 | self.assertEqual(len(modified_files[0]), 1) 500 | self.assertEqual(modified_files[0].added, 3) 501 | self.assertEqual(modified_files[0].removed, 1) 502 | 503 | self.assertEqual(res.added, 7) 504 | self.assertEqual(res.removed, 4) 505 | 506 | # check that original diffs and those produced 507 | # by unidiff are the same 508 | with codecs.open(file_path, 'r', encoding='utf-8') as diff_file: 509 | self.assertEqual(diff_file.read(), str(res)) 510 | 511 | def test_git_renaming(self): 512 | tests_dir = os.path.dirname(os.path.realpath(__file__)) 513 | file_path = os.path.join(tests_dir, 'samples/git_rename.diff') 514 | with codecs.open(file_path, 'r', encoding='utf-8') as diff_file: 515 | res = PatchSet(diff_file) 516 | 517 | self.assertEqual(len(res), 3) 518 | self.assertEqual(len(res.modified_files), 3) 519 | self.assertEqual(len(res.added_files), 0) 520 | self.assertEqual(len(res.removed_files), 0) 521 | 522 | # renamed and modified files 523 | for patch in res[:2]: 524 | self.assertTrue(patch.is_rename) 525 | self.assertEqual(patch.added, 1) 526 | self.assertEqual(patch.removed, 1) 527 | # renamed file under sub-path 528 | patch = res[2] 529 | self.assertTrue(patch.is_rename) 530 | self.assertEqual(patch.added, 0) 531 | self.assertEqual(patch.removed, 0) 532 | # confirm the full path is in source/target filenames 533 | self.assertEqual(patch.source_file, 'a/sub/onefile') 534 | self.assertEqual(patch.target_file, 'b/sub/otherfile') 535 | # check path is the target path 536 | self.assertEqual(patch.path, 'sub/otherfile') 537 | 538 | # check that original diffs and those produced 539 | # by unidiff are the same 540 | with codecs.open(file_path, 'r', encoding='utf-8') as diff_file: 541 | self.assertEqual(diff_file.read(), str(res)) 542 | -------------------------------------------------------------------------------- /tests/test_patchedfile.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # The MIT License (MIT) 4 | # Copyright (c) 2014-2017 Matias Bordese 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | 25 | """Tests for PatchedFile.""" 26 | 27 | from __future__ import unicode_literals 28 | 29 | import unittest 30 | 31 | from unidiff.patch import PatchedFile, Hunk 32 | 33 | 34 | class TestPatchedFile(unittest.TestCase): 35 | """Tests for PatchedFile.""" 36 | 37 | def setUp(self): 38 | super(TestPatchedFile, self).setUp() 39 | self.patched_file = PatchedFile() 40 | 41 | def test_is_added_file(self): 42 | hunk = Hunk(src_start=0, src_len=0, tgt_start=1, tgt_len=10) 43 | self.patched_file.append(hunk) 44 | self.assertTrue(self.patched_file.is_added_file) 45 | 46 | def test_is_removed_file(self): 47 | hunk = Hunk(src_start=1, src_len=10, tgt_start=0, tgt_len=0) 48 | self.patched_file.append(hunk) 49 | self.assertTrue(self.patched_file.is_removed_file) 50 | 51 | def test_is_modified_file(self): 52 | hunk = Hunk(src_start=1, src_len=10, tgt_start=1, tgt_len=8) 53 | self.patched_file.append(hunk) 54 | self.assertTrue(self.patched_file.is_modified_file) 55 | -------------------------------------------------------------------------------- /unidiff/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # The MIT License (MIT) 4 | # Copyright (c) 2014-2017 Matias Bordese 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | 25 | """Unidiff parsing library.""" 26 | 27 | from __future__ import unicode_literals 28 | 29 | from unidiff import __version__ 30 | from unidiff.patch import ( 31 | DEFAULT_ENCODING, 32 | LINE_TYPE_ADDED, 33 | LINE_TYPE_CONTEXT, 34 | LINE_TYPE_REMOVED, 35 | Hunk, 36 | PatchedFile, 37 | PatchSet, 38 | UnidiffParseError, 39 | ) 40 | 41 | VERSION = __version__.__version__ 42 | -------------------------------------------------------------------------------- /unidiff/__version__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # The MIT License (MIT) 4 | # Copyright (c) 2014-2023 Matias Bordese 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | __version__ = '0.7.5' 25 | -------------------------------------------------------------------------------- /unidiff/constants.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # The MIT License (MIT) 4 | # Copyright (c) 2014-2023 Matias Bordese 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | 25 | """Useful constants and regexes used by the package.""" 26 | 27 | from __future__ import unicode_literals 28 | 29 | import re 30 | 31 | 32 | RE_SOURCE_FILENAME = re.compile( 33 | r'^--- (?P"?[^\t\n]+"?)(?:\t(?P[^\n]+))?') 34 | RE_TARGET_FILENAME = re.compile( 35 | r'^\+\+\+ (?P"?[^\t\n]+"?)(?:\t(?P[^\n]+))?') 36 | 37 | 38 | # check diff git line for git renamed files support 39 | RE_DIFF_GIT_HEADER = re.compile( 40 | r'^diff --git (?P"?a/[^\t\n]+"?) (?P"?b/[^\t\n]+"?)') 41 | RE_DIFF_GIT_HEADER_URI_LIKE = re.compile( 42 | r'^diff --git (?P.*://[^\t\n]+) (?P.*://[^\t\n]+)') 43 | RE_DIFF_GIT_HEADER_NO_PREFIX = re.compile( 44 | r'^diff --git (?P[^\t\n]+) (?P[^\t\n]+)') 45 | 46 | # check diff git new file marker `deleted file mode 100644` 47 | RE_DIFF_GIT_DELETED_FILE = re.compile(r'^deleted file mode \d+$') 48 | 49 | # check diff git new file marker `new file mode 100644` 50 | RE_DIFF_GIT_NEW_FILE = re.compile(r'^new file mode \d+$') 51 | 52 | 53 | # @@ (source offset, length) (target offset, length) @@ (section header) 54 | RE_HUNK_HEADER = re.compile( 55 | r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))?\ @@[ ]?(.*)") 56 | 57 | # kept line (context) 58 | # \n empty line (treat like context) 59 | # + added line 60 | # - deleted line 61 | # \ No newline case 62 | RE_HUNK_BODY_LINE = re.compile( 63 | r'^(?P[- \+\\])(?P.*)', re.DOTALL) 64 | RE_HUNK_EMPTY_BODY_LINE = re.compile( 65 | r'^(?P[- \+\\]?)(?P[\r\n]{1,2})', re.DOTALL) 66 | 67 | RE_NO_NEWLINE_MARKER = re.compile(r'^\\ No newline at end of file') 68 | 69 | RE_BINARY_DIFF = re.compile( 70 | r'^Binary files? ' 71 | r'(?P[^\t]+?)(?:\t(?P[\s0-9:\+-]+))?' 72 | r'(?: and (?P[^\t]+?)(?:\t(?P[\s0-9:\+-]+))?)? (differ|has changed)') 73 | 74 | DEFAULT_ENCODING = 'UTF-8' 75 | 76 | DEV_NULL = '/dev/null' 77 | LINE_TYPE_ADDED = '+' 78 | LINE_TYPE_REMOVED = '-' 79 | LINE_TYPE_CONTEXT = ' ' 80 | LINE_TYPE_EMPTY = '' 81 | LINE_TYPE_NO_NEWLINE = '\\' 82 | LINE_VALUE_NO_NEWLINE = ' No newline at end of file' 83 | -------------------------------------------------------------------------------- /unidiff/errors.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # The MIT License (MIT) 4 | # Copyright (c) 2014-2017 Matias Bordese 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | 25 | """Errors and exceptions raised by the package.""" 26 | 27 | from __future__ import unicode_literals 28 | 29 | 30 | class UnidiffParseError(Exception): 31 | """Exception when parsing the unified diff data.""" 32 | -------------------------------------------------------------------------------- /unidiff/patch.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # The MIT License (MIT) 4 | # Copyright (c) 2014-2023 Matias Bordese 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | 25 | """Classes used by the unified diff parser to keep the diff data.""" 26 | 27 | from __future__ import unicode_literals 28 | 29 | import codecs 30 | import sys 31 | 32 | from unidiff.constants import ( 33 | DEFAULT_ENCODING, 34 | DEV_NULL, 35 | LINE_TYPE_ADDED, 36 | LINE_TYPE_CONTEXT, 37 | LINE_TYPE_EMPTY, 38 | LINE_TYPE_REMOVED, 39 | LINE_TYPE_NO_NEWLINE, 40 | LINE_VALUE_NO_NEWLINE, 41 | RE_DIFF_GIT_DELETED_FILE, 42 | RE_DIFF_GIT_HEADER, 43 | RE_DIFF_GIT_HEADER_URI_LIKE, 44 | RE_DIFF_GIT_HEADER_NO_PREFIX, 45 | RE_DIFF_GIT_NEW_FILE, 46 | RE_HUNK_BODY_LINE, 47 | RE_HUNK_EMPTY_BODY_LINE, 48 | RE_HUNK_HEADER, 49 | RE_SOURCE_FILENAME, 50 | RE_TARGET_FILENAME, 51 | RE_NO_NEWLINE_MARKER, 52 | RE_BINARY_DIFF, 53 | ) 54 | from unidiff.errors import UnidiffParseError 55 | 56 | 57 | PY2 = sys.version_info[0] == 2 58 | if PY2: 59 | import io 60 | from StringIO import StringIO 61 | open_file = io.open 62 | make_str = lambda x: x.encode(DEFAULT_ENCODING) 63 | 64 | def implements_to_string(cls): 65 | cls.__unicode__ = cls.__str__ 66 | cls.__str__ = lambda x: x.__unicode__().encode(DEFAULT_ENCODING) 67 | return cls 68 | else: 69 | from io import StringIO 70 | from typing import Iterable, Optional, Union 71 | open_file = open 72 | make_str = str 73 | implements_to_string = lambda x: x 74 | unicode = str 75 | basestring = str 76 | 77 | 78 | @implements_to_string 79 | class Line(object): 80 | """A diff line.""" 81 | 82 | def __init__(self, value, line_type, 83 | source_line_no=None, target_line_no=None, diff_line_no=None): 84 | # type: (str, str, Optional[int], Optional[int], Optional[int]) -> None 85 | super(Line, self).__init__() 86 | self.source_line_no = source_line_no 87 | self.target_line_no = target_line_no 88 | self.diff_line_no = diff_line_no 89 | self.line_type = line_type 90 | self.value = value 91 | 92 | def __repr__(self): 93 | # type: () -> str 94 | return make_str("") % (self.line_type, self.value) 95 | 96 | def __str__(self): 97 | # type: () -> str 98 | return "%s%s" % (self.line_type, self.value) 99 | 100 | def __eq__(self, other): 101 | # type: (Line) -> bool 102 | return (self.source_line_no == other.source_line_no and 103 | self.target_line_no == other.target_line_no and 104 | self.diff_line_no == other.diff_line_no and 105 | self.line_type == other.line_type and 106 | self.value == other.value) 107 | 108 | @property 109 | def is_added(self): 110 | # type: () -> bool 111 | return self.line_type == LINE_TYPE_ADDED 112 | 113 | @property 114 | def is_removed(self): 115 | # type: () -> bool 116 | return self.line_type == LINE_TYPE_REMOVED 117 | 118 | @property 119 | def is_context(self): 120 | # type: () -> bool 121 | return self.line_type == LINE_TYPE_CONTEXT 122 | 123 | 124 | @implements_to_string 125 | class PatchInfo(list): 126 | """Lines with extended patch info. 127 | 128 | Format of this info is not documented and it very much depends on 129 | patch producer. 130 | 131 | """ 132 | 133 | def __repr__(self): 134 | # type: () -> str 135 | value = "" % self[0].strip() 136 | return make_str(value) 137 | 138 | def __str__(self): 139 | # type: () -> str 140 | return ''.join(unicode(line) for line in self) 141 | 142 | 143 | @implements_to_string 144 | class Hunk(list): 145 | """Each of the modified blocks of a file.""" 146 | 147 | def __init__(self, src_start=0, src_len=0, tgt_start=0, tgt_len=0, 148 | section_header=''): 149 | # type: (int, int, int, int, str) -> None 150 | super(Hunk, self).__init__() 151 | if src_len is None: 152 | src_len = 1 153 | if tgt_len is None: 154 | tgt_len = 1 155 | self.source_start = int(src_start) 156 | self.source_length = int(src_len) 157 | self.target_start = int(tgt_start) 158 | self.target_length = int(tgt_len) 159 | self.section_header = section_header 160 | self._added = None # Optional[int] 161 | self._removed = None # Optional[int] 162 | 163 | def __repr__(self): 164 | # type: () -> str 165 | value = "" % (self.source_start, 166 | self.source_length, 167 | self.target_start, 168 | self.target_length, 169 | self.section_header) 170 | return make_str(value) 171 | 172 | def __str__(self): 173 | # type: () -> str 174 | # section header is optional and thus we output it only if it's present 175 | head = "@@ -%d,%d +%d,%d @@%s\n" % ( 176 | self.source_start, self.source_length, 177 | self.target_start, self.target_length, 178 | ' ' + self.section_header if self.section_header else '') 179 | content = ''.join(unicode(line) for line in self) 180 | return head + content 181 | 182 | def append(self, line): 183 | # type: (Line) -> None 184 | """Append the line to hunk, and keep track of source/target lines.""" 185 | # Make sure the line is encoded correctly. This is a no-op except for 186 | # potentially raising a UnicodeDecodeError. 187 | str(line) 188 | super(Hunk, self).append(line) 189 | 190 | @property 191 | def added(self): 192 | # type: () -> Optional[int] 193 | if self._added is not None: 194 | return self._added 195 | # re-calculate each time to allow for hunk modifications 196 | # (which should mean metadata_only switch wasn't used) 197 | return sum(1 for line in self if line.is_added) 198 | 199 | @property 200 | def removed(self): 201 | # type: () -> Optional[int] 202 | if self._removed is not None: 203 | return self._removed 204 | # re-calculate each time to allow for hunk modifications 205 | # (which should mean metadata_only switch wasn't used) 206 | return sum(1 for line in self if line.is_removed) 207 | 208 | def is_valid(self): 209 | # type: () -> bool 210 | """Check hunk header data matches entered lines info.""" 211 | return (len(self.source) == self.source_length and 212 | len(self.target) == self.target_length) 213 | 214 | def source_lines(self): 215 | # type: () -> Iterable[Line] 216 | """Hunk lines from source file (generator).""" 217 | return (l for l in self if l.is_context or l.is_removed) 218 | 219 | @property 220 | def source(self): 221 | # type: () -> Iterable[str] 222 | return [str(l) for l in self.source_lines()] 223 | 224 | def target_lines(self): 225 | # type: () -> Iterable[Line] 226 | """Hunk lines from target file (generator).""" 227 | return (l for l in self if l.is_context or l.is_added) 228 | 229 | @property 230 | def target(self): 231 | # type: () -> Iterable[str] 232 | return [str(l) for l in self.target_lines()] 233 | 234 | 235 | class PatchedFile(list): 236 | """Patch updated file, it is a list of Hunks.""" 237 | 238 | def __init__(self, patch_info=None, source='', target='', 239 | source_timestamp=None, target_timestamp=None, 240 | is_binary_file=False): 241 | # type: (Optional[PatchInfo], str, str, Optional[str], Optional[str], bool, bool) -> None 242 | super(PatchedFile, self).__init__() 243 | self.patch_info = patch_info 244 | self.source_file = source 245 | self.source_timestamp = source_timestamp 246 | self.target_file = target 247 | self.target_timestamp = target_timestamp 248 | self.is_binary_file = is_binary_file 249 | 250 | def __repr__(self): 251 | # type: () -> str 252 | return make_str("") % make_str(self.path) 253 | 254 | def __str__(self): 255 | # type: () -> str 256 | source = '' 257 | target = '' 258 | # patch info is optional 259 | info = '' if self.patch_info is None else str(self.patch_info) 260 | if not self.is_binary_file and self: 261 | source = "--- %s%s\n" % ( 262 | self.source_file, 263 | '\t' + self.source_timestamp if self.source_timestamp else '') 264 | target = "+++ %s%s\n" % ( 265 | self.target_file, 266 | '\t' + self.target_timestamp if self.target_timestamp else '') 267 | hunks = ''.join(unicode(hunk) for hunk in self) 268 | return info + source + target + hunks 269 | 270 | def _parse_hunk(self, header, diff, encoding, metadata_only): 271 | # type: (str, enumerate[str], Optional[str], bool) -> None 272 | """Parse hunk details.""" 273 | header_info = RE_HUNK_HEADER.match(header) 274 | hunk_info = header_info.groups() 275 | hunk = Hunk(*hunk_info) 276 | 277 | source_line_no = hunk.source_start 278 | target_line_no = hunk.target_start 279 | expected_source_end = source_line_no + hunk.source_length 280 | expected_target_end = target_line_no + hunk.target_length 281 | added = 0 282 | removed = 0 283 | 284 | for diff_line_no, line in diff: 285 | if encoding is not None: 286 | line = line.decode(encoding) 287 | 288 | if metadata_only: 289 | # quick line type detection, no regex required 290 | line_type = line[0] if line else LINE_TYPE_CONTEXT 291 | if line_type not in (LINE_TYPE_ADDED, 292 | LINE_TYPE_REMOVED, 293 | LINE_TYPE_CONTEXT, 294 | LINE_TYPE_NO_NEWLINE): 295 | raise UnidiffParseError( 296 | 'Hunk diff line expected: %s' % line) 297 | 298 | if line_type == LINE_TYPE_ADDED: 299 | target_line_no += 1 300 | added += 1 301 | elif line_type == LINE_TYPE_REMOVED: 302 | source_line_no += 1 303 | removed += 1 304 | elif line_type == LINE_TYPE_CONTEXT: 305 | target_line_no += 1 306 | source_line_no += 1 307 | 308 | # no file content tracking 309 | original_line = None 310 | 311 | else: 312 | # parse diff line content 313 | valid_line = RE_HUNK_BODY_LINE.match(line) 314 | if not valid_line: 315 | valid_line = RE_HUNK_EMPTY_BODY_LINE.match(line) 316 | 317 | if not valid_line: 318 | raise UnidiffParseError( 319 | 'Hunk diff line expected: %s' % line) 320 | 321 | line_type = valid_line.group('line_type') 322 | if line_type == LINE_TYPE_EMPTY: 323 | line_type = LINE_TYPE_CONTEXT 324 | 325 | value = valid_line.group('value') # type: str 326 | original_line = Line(value, line_type=line_type) 327 | 328 | if line_type == LINE_TYPE_ADDED: 329 | original_line.target_line_no = target_line_no 330 | target_line_no += 1 331 | elif line_type == LINE_TYPE_REMOVED: 332 | original_line.source_line_no = source_line_no 333 | source_line_no += 1 334 | elif line_type == LINE_TYPE_CONTEXT: 335 | original_line.target_line_no = target_line_no 336 | original_line.source_line_no = source_line_no 337 | target_line_no += 1 338 | source_line_no += 1 339 | elif line_type == LINE_TYPE_NO_NEWLINE: 340 | pass 341 | else: 342 | original_line = None 343 | 344 | # stop parsing if we got past expected number of lines 345 | if (source_line_no > expected_source_end or 346 | target_line_no > expected_target_end): 347 | raise UnidiffParseError('Hunk is longer than expected') 348 | 349 | if original_line: 350 | original_line.diff_line_no = diff_line_no 351 | hunk.append(original_line) 352 | 353 | # if hunk source/target lengths are ok, hunk is complete 354 | if (source_line_no == expected_source_end and 355 | target_line_no == expected_target_end): 356 | break 357 | 358 | # report an error if we haven't got expected number of lines 359 | if (source_line_no < expected_source_end or 360 | target_line_no < expected_target_end): 361 | raise UnidiffParseError('Hunk is shorter than expected') 362 | 363 | if metadata_only: 364 | # HACK: set fixed calculated values when metadata_only is enabled 365 | hunk._added = added 366 | hunk._removed = removed 367 | 368 | self.append(hunk) 369 | 370 | def _add_no_newline_marker_to_last_hunk(self): 371 | # type: () -> None 372 | if not self: 373 | raise UnidiffParseError( 374 | 'Unexpected marker:' + LINE_VALUE_NO_NEWLINE) 375 | last_hunk = self[-1] 376 | last_hunk.append( 377 | Line(LINE_VALUE_NO_NEWLINE + '\n', line_type=LINE_TYPE_NO_NEWLINE)) 378 | 379 | def _append_trailing_empty_line(self): 380 | # type: () -> None 381 | if not self: 382 | raise UnidiffParseError('Unexpected trailing newline character') 383 | last_hunk = self[-1] 384 | last_hunk.append(Line('\n', line_type=LINE_TYPE_EMPTY)) 385 | 386 | @property 387 | def path(self): 388 | # type: () -> str 389 | """Return the file path abstracted from VCS.""" 390 | filepath = self.source_file 391 | if filepath in (None, DEV_NULL) or ( 392 | self.is_rename and self.target_file not in (None, DEV_NULL)): 393 | # if this is a rename, prefer the target filename 394 | filepath = self.target_file 395 | 396 | quoted = filepath.startswith('"') and filepath.endswith('"') 397 | if quoted: 398 | filepath = filepath[1:-1] 399 | 400 | if filepath.startswith('a/') or filepath.startswith('b/'): 401 | filepath = filepath[2:] 402 | 403 | if quoted: 404 | filepath = '"{}"'.format(filepath) 405 | 406 | return filepath 407 | 408 | @property 409 | def added(self): 410 | # type: () -> int 411 | """Return the file total added lines.""" 412 | return sum([hunk.added for hunk in self]) 413 | 414 | @property 415 | def removed(self): 416 | # type: () -> int 417 | """Return the file total removed lines.""" 418 | return sum([hunk.removed for hunk in self]) 419 | 420 | @property 421 | def is_rename(self): 422 | return (self.source_file != DEV_NULL 423 | and self.target_file != DEV_NULL 424 | and self.source_file[2:] != self.target_file[2:]) 425 | 426 | @property 427 | def is_added_file(self): 428 | # type: () -> bool 429 | """Return True if this patch adds the file.""" 430 | if self.source_file == DEV_NULL: 431 | return True 432 | return (len(self) == 1 and self[0].source_start == 0 and 433 | self[0].source_length == 0) 434 | 435 | @property 436 | def is_removed_file(self): 437 | # type: () -> bool 438 | """Return True if this patch removes the file.""" 439 | if self.target_file == DEV_NULL: 440 | return True 441 | return (len(self) == 1 and self[0].target_start == 0 and 442 | self[0].target_length == 0) 443 | 444 | @property 445 | def is_modified_file(self): 446 | # type: () -> bool 447 | """Return True if this patch modifies the file.""" 448 | return not (self.is_added_file or self.is_removed_file) 449 | 450 | 451 | @implements_to_string 452 | class PatchSet(list): 453 | """A list of PatchedFiles.""" 454 | 455 | def __init__(self, f, encoding=None, metadata_only=False): 456 | # type: (Union[StringIO, str], Optional[str], bool) -> None 457 | super(PatchSet, self).__init__() 458 | 459 | # convert string inputs to StringIO objects 460 | if isinstance(f, basestring): 461 | f = self._convert_string(f, encoding) # type: StringIO 462 | 463 | # make sure we pass an iterator object to parse 464 | data = iter(f) 465 | # if encoding is None, assume we are reading unicode data 466 | # when metadata_only is True, only perform a minimal metadata parsing 467 | # (ie. hunks without content) which is around 2.5-6 times faster; 468 | # it will still validate the diff metadata consistency and get counts 469 | self._parse(data, encoding=encoding, metadata_only=metadata_only) 470 | 471 | def __repr__(self): 472 | # type: () -> str 473 | return make_str('') % super(PatchSet, self).__repr__() 474 | 475 | def __str__(self): 476 | # type: () -> str 477 | return ''.join(unicode(patched_file) for patched_file in self) 478 | 479 | def _parse(self, diff, encoding, metadata_only): 480 | # type: (StringIO, Optional[str], bool) -> None 481 | current_file = None 482 | patch_info = None 483 | 484 | diff = enumerate(diff, 1) 485 | for unused_diff_line_no, line in diff: 486 | if encoding is not None: 487 | line = line.decode(encoding) 488 | 489 | # check for a git file rename 490 | is_diff_git_header = RE_DIFF_GIT_HEADER.match(line) or \ 491 | RE_DIFF_GIT_HEADER_URI_LIKE.match(line) or \ 492 | RE_DIFF_GIT_HEADER_NO_PREFIX.match(line) 493 | if is_diff_git_header: 494 | patch_info = PatchInfo() 495 | source_file = is_diff_git_header.group('source') 496 | target_file = is_diff_git_header.group('target') 497 | current_file = PatchedFile( 498 | patch_info, source_file, target_file, None, None) 499 | self.append(current_file) 500 | patch_info.append(line) 501 | continue 502 | 503 | # check for a git new file 504 | is_diff_git_new_file = RE_DIFF_GIT_NEW_FILE.match(line) 505 | if is_diff_git_new_file: 506 | if current_file is None or patch_info is None: 507 | raise UnidiffParseError('Unexpected new file found: %s' % line) 508 | current_file.source_file = DEV_NULL 509 | patch_info.append(line) 510 | continue 511 | 512 | # check for a git deleted file 513 | is_diff_git_deleted_file = RE_DIFF_GIT_DELETED_FILE.match(line) 514 | if is_diff_git_deleted_file: 515 | if current_file is None or patch_info is None: 516 | raise UnidiffParseError('Unexpected deleted file found: %s' % line) 517 | current_file.target_file = DEV_NULL 518 | patch_info.append(line) 519 | continue 520 | 521 | # check for source file header 522 | is_source_filename = RE_SOURCE_FILENAME.match(line) 523 | if is_source_filename: 524 | source_file = is_source_filename.group('filename') 525 | source_timestamp = is_source_filename.group('timestamp') 526 | # reset current file, unless we are processing a rename 527 | # (in that case, source files should match) 528 | if current_file is not None and not ( 529 | current_file.source_file == source_file): 530 | current_file = None 531 | elif current_file is not None: 532 | current_file.source_timestamp = source_timestamp 533 | continue 534 | 535 | # check for target file header 536 | is_target_filename = RE_TARGET_FILENAME.match(line) 537 | if is_target_filename: 538 | target_file = is_target_filename.group('filename') 539 | target_timestamp = is_target_filename.group('timestamp') 540 | if current_file is not None and not (current_file.target_file == target_file): 541 | raise UnidiffParseError('Target without source: %s' % line) 542 | if current_file is None: 543 | # add current file to PatchSet 544 | current_file = PatchedFile( 545 | patch_info, source_file, target_file, 546 | source_timestamp, target_timestamp) 547 | self.append(current_file) 548 | patch_info = None 549 | else: 550 | current_file.target_timestamp = target_timestamp 551 | continue 552 | 553 | # check for hunk header 554 | is_hunk_header = RE_HUNK_HEADER.match(line) 555 | if is_hunk_header: 556 | patch_info = None 557 | if current_file is None: 558 | raise UnidiffParseError('Unexpected hunk found: %s' % line) 559 | current_file._parse_hunk(line, diff, encoding, metadata_only) 560 | continue 561 | 562 | # check for no newline marker 563 | is_no_newline = RE_NO_NEWLINE_MARKER.match(line) 564 | if is_no_newline: 565 | if current_file is None: 566 | raise UnidiffParseError('Unexpected marker: %s' % line) 567 | current_file._add_no_newline_marker_to_last_hunk() 568 | continue 569 | 570 | # sometimes hunks can be followed by empty lines 571 | if line == '\n' and current_file is not None: 572 | current_file._append_trailing_empty_line() 573 | continue 574 | 575 | # if nothing has matched above then this line is a patch info 576 | if patch_info is None: 577 | current_file = None 578 | patch_info = PatchInfo() 579 | 580 | is_binary_diff = RE_BINARY_DIFF.match(line) 581 | if is_binary_diff: 582 | source_file = is_binary_diff.group('source_filename') 583 | target_file = is_binary_diff.group('target_filename') 584 | patch_info.append(line) 585 | if current_file is not None: 586 | current_file.is_binary_file = True 587 | else: 588 | current_file = PatchedFile( 589 | patch_info, source_file, target_file, is_binary_file=True) 590 | self.append(current_file) 591 | patch_info = None 592 | current_file = None 593 | continue 594 | 595 | if line == 'GIT binary patch\n': 596 | current_file.is_binary_file = True 597 | patch_info = None 598 | current_file = None 599 | continue 600 | 601 | patch_info.append(line) 602 | 603 | @classmethod 604 | def from_filename(cls, filename, encoding=DEFAULT_ENCODING, errors=None, newline=None): 605 | # type: (str, str, Optional[str]) -> PatchSet 606 | """Return a PatchSet instance given a diff filename.""" 607 | with open_file(filename, 'r', encoding=encoding, errors=errors, newline=newline) as f: 608 | instance = cls(f) 609 | return instance 610 | 611 | @staticmethod 612 | def _convert_string(data, encoding=None, errors='strict'): 613 | # type: (Union[str, bytes], str, str) -> StringIO 614 | if encoding is not None: 615 | # if encoding is given, assume bytes and decode 616 | data = unicode(data, encoding=encoding, errors=errors) 617 | return StringIO(data) 618 | 619 | @classmethod 620 | def from_string(cls, data, encoding=None, errors='strict'): 621 | # type: (str, str, Optional[str]) -> PatchSet 622 | """Return a PatchSet instance given a diff string.""" 623 | return cls(cls._convert_string(data, encoding, errors)) 624 | 625 | @property 626 | def added_files(self): 627 | # type: () -> list[PatchedFile] 628 | """Return patch added files as a list.""" 629 | return [f for f in self if f.is_added_file] 630 | 631 | @property 632 | def removed_files(self): 633 | # type: () -> list[PatchedFile] 634 | """Return patch removed files as a list.""" 635 | return [f for f in self if f.is_removed_file] 636 | 637 | @property 638 | def modified_files(self): 639 | # type: () -> list[PatchedFile] 640 | """Return patch modified files as a list.""" 641 | return [f for f in self if f.is_modified_file] 642 | 643 | @property 644 | def added(self): 645 | # type: () -> int 646 | """Return the patch total added lines.""" 647 | return sum([f.added for f in self]) 648 | 649 | @property 650 | def removed(self): 651 | # type: () -> int 652 | """Return the patch total removed lines.""" 653 | return sum([f.removed for f in self]) 654 | --------------------------------------------------------------------------------