├── .gitignore
├── .travis.yml
├── AUTHORS
├── HISTORY
├── LICENSE
├── MANIFEST.in
├── README.rst
├── bin
    └── unidiff
├── run_tests.sh
├── setup.cfg
├── setup.py
├── tests
    ├── __init__.py
    ├── samples
    │   ├── binary.diff
    │   ├── bzr.diff
    │   ├── git.diff
    │   ├── git_cr.diff
    │   ├── git_delete.diff
    │   ├── git_filenames_with_spaces.diff
    │   ├── git_filenames_with_spaces_prefix.diff
    │   ├── git_no_prefix.diff
    │   ├── git_quoted_filename.diff
    │   ├── git_rename.diff
    │   ├── hg.diff
    │   ├── sample0.diff
    │   ├── sample1.diff
    │   ├── sample2.diff
    │   ├── sample3.diff
    │   ├── sample4.diff
    │   ├── sample5.diff
    │   ├── sample6.diff
    │   ├── sample7.diff
    │   ├── sample8.diff
    │   └── svn.diff
    ├── test_hunks.py
    ├── test_line.py
    ├── test_parser.py
    └── test_patchedfile.py
└── unidiff
    ├── __init__.py
    ├── __version__.py
    ├── constants.py
    ├── errors.py
    └── patch.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | *.py[cod]
 3 | __pycache__
 4 | build
 5 | dist
 6 | unidiff.egg-info
 7 | 
 8 | # Vim
 9 | *.swp
10 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 |   - "3.7"
4 |   - "3.8"
5 |   - "3.9"
6 |   - "3.10"
7 |   - "3.11"
8 | script: ./run_tests.sh
9 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
 1 | Main developer
 2 | --------------
 3 | 
 4 |   * Matias Bordese (`@matiasb`_)
 5 | 
 6 | 
 7 | Contributors
 8 | ------------
 9 | 
10 |   * Natalia Bidart (`@nessita`_)
11 |   * Jacobo de Vera (`@jdevera`_)
12 |   * Lei Zhang (`@antiAgainst`_)
13 |   * Sumeet Agarwal (`@sumeet`_)
14 |   * Philipp Kewisch (`@kewisch`_)
15 |   * Allan Lewis (`@allanlewis`_)
16 |   * Andrew Lapidas (`@alapidas`_)
17 |   * Daniel Thompson (`@daniel-thompson`_)
18 |   * Sebastian Kreft (`@sk-`_)
19 |   * Thomas Grainger (`@graingert`_)
20 |   * (`snake-scaly`_)
21 |   * Dan Callaghan (`@danc86`_)
22 |   * Max Bittker (`@MaxBittker`_)
23 |   * Volo Zyko (`@volo-zyko`_)
24 |   * Robert Estelle (`@erydo`_)
25 |   * Dylan Grafmyre
26 |   * Povilas Kanapickas (`@p12tic`_)
27 |   * Snowhite (`@CirQ`_)
28 |   * earonesty (`@earonesty`_)
29 |   * Ben Carlsson (`@glacials`_)
30 |   * (`@huichen-cs`)
31 |   * Mikhail f. Shiryaev (`@Felixoid`)
32 |   * Ronuk Raval (`@rraval`)
33 |   * anthony sottile (`@asottile-sentry`)
34 |   * (`@cpackham-atlnz`)
35 |   * David Leen (`@dleen`)
36 |   * Martin Liška (`@marxin`)
37 |   * Tushar Sadhwani (`@tushar-deepsource`)
38 | 


--------------------------------------------------------------------------------
/HISTORY:
--------------------------------------------------------------------------------
 1 | History
 2 | -------
 3 | 
 4 | 0.7.5 - 2023-03-09
 5 | ------------------
 6 | 
 7 | * Fixed issue with spaces in filename when using custom git prefix.
 8 | * Support binary patch format.
 9 | 
10 | 0.7.4 - 2022-06-26
11 | ------------------
12 | 
13 | * Fixed git diff parsing issues (filename with spaces, only one added/deleted file).
14 | 
15 | 0.7.3 - 2022-02-06
16 | ------------------
17 | 
18 | * Fixed RE_BINARY_DIFF regex to make it a raw string.
19 | 
20 | 0.7.2 - 2022-01-28
21 | ------------------
22 | 
23 | * Fixed issue when parsing git diff header generated with `--no-prefix`.
24 | 
25 | 0.7.1 - 2022-01-27
26 | ------------------
27 | 
28 | * Improved git added/deleted file detection.
29 | * Added `newline` optional param when parsing `from_filename`.
30 | 
31 | 0.7.0 - 2021-08-16
32 | ------------------
33 | 
34 | * Fixed issues handling multiple git renames.
35 | * Renamed files return target filename as PatchedFile.path.
36 | * Fixed error when first change is a binary file.
37 | * Added source code type hints.
38 | 
39 | 0.6.0 - 2020-05-07
40 | ----------------
41 | 
42 | * Updated PatchSet constructor to accept an optional (default to False)
43 | metadata_only parameter to only keep diff metadata information without
44 | the diff text data (better performance).
45 | * Identify and track changed binary files.
46 | * Added support for git rename syntax.
47 | 
48 | 0.5.5 - 2018-01-03
49 | ------------------
50 | 
51 | * Updated PatchSet constructor to accept string data.
52 | * Added support to parse extended patch info.
53 | 
54 | 0.5.4 - 2017-05-26
55 | ------------------
56 | 
57 | * Added PatchSet.from_string helper.
58 | * Do not install tests as top-level package.
59 | 
60 | 0.5.3 - 2017-04-10
61 | ------------------
62 | 
63 | * Re-released 0.5.2 as 0.5.3 because of issues with PyPI.
64 | 
65 | 0.5.2 - 2016-02-02
66 | ------------------
67 | 
68 | * Added diff line number to Line metadata.
69 | * Optimizations for large hunks.
70 | * Fix for git empty new lines.
71 | * Added (optional) errors parameter to PatchSet.from_filename,
72 | to specify how to handle encoding errors.
73 | 
74 | 
75 | 0.5.1 - 2015-01-18
76 | ------------------
77 | 
78 | * Added (optional) encoding parameter to PatchSet.
79 | * Added support to get any iterable as PatchSet diff argument.
80 | 
81 | 
82 | 0.5 - 2014-12-14
83 | ----------------
84 | 
85 | * Release on PyPI.
86 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | Copyright (c) 2012 Matias Bordese
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in all
12 | copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
18 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
20 | OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include bin/*
2 | include tests/samples/*
3 | include HISTORY
4 | include LICENSE
5 | include README.rst
6 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Unidiff
  2 | =======
  3 | 
  4 | Simple Python library to parse and interact with unified diff data.
  5 | 
  6 | .. image:: https://www.travis-ci.com/matiasb/python-unidiff.svg?branch=master
  7 |     :target: https://travis-ci.com/matiasb/python-unidiff
  8 | 
  9 | Installing unidiff
 10 | ------------------
 11 | 
 12 | ::
 13 | 
 14 |     $ pip install unidiff
 15 | 
 16 | 
 17 | Quick start
 18 | -----------
 19 | 
 20 | .. code-block:: python
 21 | 
 22 |     >>> import urllib.request
 23 |     >>> from unidiff import PatchSet
 24 |     >>> diff = urllib.request.urlopen('https://github.com/matiasb/python-unidiff/pull/3.diff')
 25 |     >>> encoding = diff.headers.get_charsets()[0]
 26 |     >>> patch = PatchSet(diff, encoding=encoding)
 27 |     >>> patch
 28 |     <PatchSet: [<PatchedFile: .gitignore>, <PatchedFile: unidiff/patch.py>, <PatchedFile: unidiff/utils.py>]>
 29 |     >>> patch[0]
 30 |     <PatchedFile: .gitignore>
 31 |     >>> patch[0].is_added_file
 32 |     True
 33 |     >>> patch[0].added
 34 |     6
 35 |     >>> patch[1]
 36 |     <PatchedFile: unidiff/patch.py>
 37 |     >>> patch[1].added, patch[1].removed
 38 |     (20, 11)
 39 |     >>> len(patch[1])
 40 |     6
 41 |     >>> patch[1][2]
 42 |     <Hunk: @@ 109,14 110,21 @@ def __repr__(self):>
 43 |     >>> patch[2]
 44 |     <PatchedFile: unidiff/utils.py>
 45 |     >>> print(patch[2])
 46 |     diff --git a/unidiff/utils.py b/unidiff/utils.py
 47 |     index eae63e6..29c896a 100644
 48 |     --- a/unidiff/utils.py
 49 |     +++ b/unidiff/utils.py
 50 |     @@ -37,4 +37,3 @@
 51 |     # - deleted line
 52 |     # \ No newline case (ignore)
 53 |     RE_HUNK_BODY_LINE = re.compile(r'^([- \+\\])')
 54 |     -
 55 | 
 56 | 
 57 | Load unified diff data by instantiating :code:`PatchSet` with a file-like object as
 58 | argument, or using :code:`PatchSet.from_filename` class method to read diff from file.
 59 | 
 60 | A :code:`PatchSet` is a list of files updated by the given patch. For each :code:`PatchedFile`
 61 | you can get stats (if it is a new, removed or modified file; the source/target
 62 | lines; etc), besides having access to each hunk (also like a list) and its
 63 | respective info.
 64 | 
 65 | At any point you can get the string representation of the current object, and
 66 | that will return the unified diff data of it.
 67 | 
 68 | As a quick example of what can be done, check bin/unidiff file.
 69 | 
 70 | Also, once installed, unidiff provides a command-line program that displays
 71 | information from diff data (a file, or stdin). For example:
 72 | 
 73 | ::
 74 | 
 75 |     $ git diff | unidiff
 76 |     Summary
 77 |     -------
 78 |     README.md: +6 additions, -0 deletions
 79 | 
 80 |     1 modified file(s), 0 added file(s), 0 removed file(s)
 81 |     Total: 6 addition(s), 0 deletion(s)
 82 | 
 83 | 
 84 | Load a local diff file
 85 | ----------------------
 86 | 
 87 | To instantiate :code:`PatchSet` from a local file, you can use:
 88 | 
 89 | .. code-block:: python
 90 | 
 91 |     >>> from unidiff import PatchSet
 92 |     >>> patch = PatchSet.from_filename('tests/samples/bzr.diff', encoding='utf-8')
 93 |     >>> patch
 94 |     <PatchSet: [<PatchedFile: added_file>, <PatchedFile: modified_file>, <PatchedFile: removed_file>]>
 95 | 
 96 | Notice the (optional) :code:`encoding` parameter. If not specified, unicode input will be expected. Or alternatively:
 97 | 
 98 | .. code-block:: python
 99 | 
100 |     >>> import codecs
101 |     >>> from unidiff import PatchSet
102 |     >>> with codecs.open('tests/samples/bzr.diff', 'r', encoding='utf-8') as diff:
103 |     ...     patch = PatchSet(diff)
104 |     ...
105 |     >>> patch
106 |     <PatchSet: [<PatchedFile: added_file>, <PatchedFile: modified_file>, <PatchedFile: removed_file>]>
107 | 
108 | Finally, you can also instantiate :code:`PatchSet` passing any iterable (and encoding, if needed):
109 | 
110 | .. code-block:: python
111 | 
112 |     >>> from unidiff import PatchSet
113 |     >>> with open('tests/samples/bzr.diff', 'r') as diff:
114 |     ...     data = diff.readlines()
115 |     ...
116 |     >>> patch = PatchSet(data)
117 |     >>> patch
118 |     <PatchSet: [<PatchedFile: added_file>, <PatchedFile: modified_file>, <PatchedFile: removed_file>]>
119 | 
120 | If you don't need to be able to rebuild the original unified diff input, you can pass
121 | :code:`metadata_only=True` (defaults to :code:`False`), which should help making the
122 | parsing more efficient:
123 | 
124 | .. code-block:: python
125 | 
126 |     >>> from unidiff import PatchSet
127 |     >>> patch = PatchSet.from_filename('tests/samples/bzr.diff', encoding='utf-8', metadata_only=True)
128 | 
129 | 
130 | References
131 | ----------
132 | 
133 | * https://en.wikipedia.org/wiki/Diff_utility
134 | * https://www.artima.com/weblogs/viewpost.jsp?thread=164293
135 | 


--------------------------------------------------------------------------------
/bin/unidiff:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from __future__ import print_function, unicode_literals
 4 | 
 5 | import argparse
 6 | import codecs
 7 | import sys
 8 | 
 9 | from unidiff import DEFAULT_ENCODING, PatchSet
10 | 
11 | 
12 | PY2 = sys.version_info[0] == 2
13 | DESCRIPTION = """Unified diff metadata.
14 | 
15 | Examples:
16 |     $ git diff | unidiff
17 |     $ hg diff | unidiff --show-diff
18 |     $ unidiff -f patch.diff
19 | 
20 | """
21 | 
22 | def get_parser():
23 |     parser = argparse.ArgumentParser(
24 |         formatter_class=argparse.RawDescriptionHelpFormatter,
25 |         description=DESCRIPTION)
26 |     parser.add_argument('--show-diff', action="store_true", default=False,
27 |                         dest='show_diff', help='output diff to stdout')
28 |     parser.add_argument('-f', '--file', dest='diff_file',
29 |                         type=argparse.FileType('r'),
30 |                         help='if not specified, read diff data from stdin')
31 |     return parser
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     parser = get_parser()
36 |     args = parser.parse_args()
37 | 
38 |     encoding = DEFAULT_ENCODING
39 |     if args.diff_file:
40 |         diff_file = args.diff_file
41 |     else:
42 |         encoding = sys.stdin.encoding or encoding
43 |         diff_file = sys.stdin
44 | 
45 |     if PY2:
46 |         diff_file = codecs.getreader(encoding)(diff_file)
47 | 
48 |     patch = PatchSet(diff_file, metadata_only=(not args.show_diff))
49 | 
50 |     if args.show_diff:
51 |         print(patch)
52 |         print()
53 | 
54 |     print('Summary')
55 |     print('-------')
56 |     additions = 0
57 |     deletions = 0
58 |     renamed_files = 0
59 |     for f in patch:
60 |         if f.is_binary_file:
61 |             print('%s:' % f.path, '(binary file)')
62 |         else:
63 |             additions += f.added
64 |             deletions += f.removed
65 |             print('%s:' % f.path, '+%d additions,' % f.added,
66 |                   '-%d deletions' % f.removed)
67 |         renamed_files = renamed_files + 1 if f.is_rename else renamed_files
68 | 
69 |     print()
70 |     print('%d modified file(s), %d added file(s), %d removed file(s)' % (
71 |         len(patch.modified_files), len(patch.added_files),
72 |         len(patch.removed_files)))
73 |     if renamed_files:
74 |         print('%d file(s) renamed' % renamed_files)
75 |     print('Total: %d addition(s), %d deletion(s)' % (additions, deletions))
76 | 


--------------------------------------------------------------------------------
/run_tests.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | PYTHONPATH=unidiff python -m unittest discover -s tests/
3 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
3 | 
4 | [metadata]
5 | license_file = LICENSE
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Author: Matías Bordese
 3 | 
 4 | import codecs
 5 | import os
 6 | 
 7 | from setuptools import find_packages, setup
 8 | 
 9 | 
10 | # metadata
11 | NAME = 'unidiff'
12 | DESCRIPTION = 'Unified diff parsing/metadata extraction library.'
13 | KEYWORDS = ['unified', 'diff', 'parse', 'metadata']
14 | URL = 'https://github.com/matiasb/python-unidiff'
15 | EMAIL = 'mbordese@gmail.com'
16 | AUTHOR = 'Matias Bordese'
17 | LICENSE = 'MIT'
18 | 
19 | HERE = os.path.abspath(os.path.dirname(__file__))
20 | 
21 | # use README as the long-description
22 | with codecs.open(os.path.join(HERE, 'README.rst'), "rb", "utf-8") as f:
23 |     long_description = f.read()
24 | 
25 | 
26 | # load __version__.py module as a dictionary
27 | about = {}
28 | with open(os.path.join(HERE, 'unidiff/__version__.py')) as f:
29 |     exec(f.read(), about)
30 | 
31 | 
32 | setup(
33 |     name=NAME,
34 |     version=about['__version__'],
35 |     description=DESCRIPTION,
36 |     long_description=long_description,
37 |     keywords=KEYWORDS,
38 |     author=AUTHOR,
39 |     author_email=EMAIL,
40 |     url=URL,
41 |     packages=find_packages(exclude=('tests',)),
42 |     scripts=['bin/unidiff'],
43 |     include_package_data=True,
44 |     license=LICENSE,
45 |     classifiers=[
46 |         'Intended Audience :: Developers',
47 |         'Development Status :: 4 - Beta',
48 |         'Programming Language :: Python :: 3.7',
49 |         'Programming Language :: Python :: 3.8',
50 |         'Programming Language :: Python :: 3.9',
51 |         'Programming Language :: Python :: 3.10',
52 |         'Programming Language :: Python :: 3.11',
53 |     ],
54 |     test_suite='tests',
55 | )
56 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # The MIT License (MIT)
 2 | # Copyright (c) 2014-2017 Matias Bordese
 3 | #
 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | # of this software and associated documentation files (the "Software"), to deal
 6 | # in the Software without restriction, including without limitation the rights
 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | # copies of the Software, and to permit persons to whom the Software is
 9 | # furnished to do so, subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be included in
12 | # all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
18 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
20 | # OR OTHER DEALINGS IN THE SOFTWARE.
21 | 
22 | """Tests for unidiff."""
23 | 


--------------------------------------------------------------------------------
/tests/samples/binary.diff:
--------------------------------------------------------------------------------
 1 | From 62ea0eaf7e00170a8fca79f665442f9f44725956 Mon Sep 17 00:00:00 2001
 2 | From: Martin Liska <mliska@suse.cz>
 3 | Date: Fri, 9 Dec 2022 12:05:32 +0100
 4 | Subject: [PATCH] add pixel
 5 | 
 6 | ---
 7 |  1x1.png | Bin 0 -> 95 bytes
 8 |  1 file changed, 0 insertions(+), 0 deletions(-)
 9 |  create mode 100644 1x1.png
10 | 
11 | diff --git a/1x1.png b/1x1.png
12 | new file mode 100644
13 | index 0000000000000000000000000000000000000000..1914264c08781d1f30ee0b8482bccf44586f2dc1
14 | GIT binary patch
15 | literal 95
16 | zcmeAS@N?(olHy`uVBq!ia0vp^j3CU&3?x-=hn)ga%mF?ju0VQumF+E%TuG2$FoVOh
17 | l8)-lem#2$k2*>s01R$G<f$@>z9%CSj!PC{xWt~$(697H@6ZHT9
18 | 
19 | literal 0
20 | HcmV?d00001
21 | 
22 | --
23 | 2.38.1
24 | 


--------------------------------------------------------------------------------
/tests/samples/bzr.diff:
--------------------------------------------------------------------------------
 1 | === added file 'added_file'
 2 | --- added_file	1970-01-01 00:00:00 +0000
 3 | +++ added_file	2013-10-13 23:44:04 +0000
 4 | @@ -0,0 +1,4 @@
 5 | +This was missing!
 6 | +Adding it now.
 7 | +
 8 | +Only for testing purposes.
 9 | \ No newline at end of file
10 | 
11 | === modified file 'modified_file'
12 | --- modified_file	2013-10-13 23:53:13 +0000
13 | +++ modified_file	2013-10-13 23:53:26 +0000
14 | @@ -1,5 +1,7 @@
15 |  This is the original content.
16 |  
17 | -This should be updated.
18 | +This is now updated.
19 | +
20 | +This is a new line.
21 |  
22 |  This will stay.
23 | \ No newline at end of file
24 | 
25 | === removed file 'removed_file'
26 | --- removed_file	2013-10-13 23:53:13 +0000
27 | +++ removed_file	1970-01-01 00:00:00 +0000
28 | @@ -1,3 +0,0 @@
29 | -This content shouldn't be here.
30 | -
31 | -This file will be removed.
32 | \ No newline at end of file
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/tests/samples/git.diff:
--------------------------------------------------------------------------------
 1 | diff --git a/added_file b/added_file
 2 | new file mode 100644
 3 | index 0000000..9b710f3
 4 | --- /dev/null
 5 | +++ b/added_file
 6 | @@ -0,0 +1,4 @@
 7 | +This was missing!
 8 | +Adding it now.
 9 | +
10 | +Only for testing purposes.
11 | \ No newline at end of file
12 | diff --git a/modified_file b/modified_file
13 | index c7921f5..8946660 100644
14 | --- a/modified_file
15 | +++ b/modified_file
16 | @@ -1,5 +1,7 @@
17 |  This is the original content.
18 |  
19 | -This should be updated.
20 | +This is now updated.
21 | +
22 | +This is a new line.
23 |  
24 |  This will stay.
25 | \ No newline at end of file
26 | diff --git a/removed_file b/removed_file
27 | deleted file mode 100644
28 | index 1f38447..0000000
29 | --- a/removed_file
30 | +++ /dev/null
31 | @@ -1,3 +0,0 @@
32 | -This content shouldn't be here.
33 | -
34 | -This file will be removed.
35 | \ No newline at end of file
36 | 
37 | 


--------------------------------------------------------------------------------
/tests/samples/git_cr.diff:
--------------------------------------------------------------------------------
 1 | diff --git a/src/test/org/apache/commons/math/util/ExpandableDoubleArrayTest.java b/src/test/org/apache/commons/math/util/ExpandableDoubleArrayTest.java
 2 | new file mode 100644
 3 | index 000000000..2b38fa232
 4 | --- /dev/null
 5 | +++ b/src/test/org/apache/commons/math/util/ExpandableDoubleArrayTest.java
 6 | @@ -0,0 +1,3 @@
 7 | + "This line is broken into two lines by CR. " +		"but it should be treated as one line in the text diff file"
 8 | + "This has no CR"
 9 | + "This line also has CR. " +              "but it should also be treated as one line in the text diff file". 
10 | 


--------------------------------------------------------------------------------
/tests/samples/git_delete.diff:
--------------------------------------------------------------------------------
 1 | diff --git a/somefile.c b/somefile.c
 2 | deleted file mode 100644
 3 | index abcdefbbb8..0000000000
 4 | --- a/somefile.c
 5 | +++ /dev/null
 6 | @@ -1,10 +0,0 @@
 7 | -/**
 8 | - *  @file somefile.c
 9 | - */
10 | -#include <stdio.h>
11 | -
12 | -int main(int argc, cahr *argv[])
13 | -{
14 | -	printf("Hello World\n");
15 | -	return 0;
16 | -}
17 | 


--------------------------------------------------------------------------------
/tests/samples/git_filenames_with_spaces.diff:
--------------------------------------------------------------------------------
1 | diff --git a/has spaces/t.sql b/has spaces/t.sql
2 | new file mode 100644
3 | index 0000000..8a9b485
4 | --- /dev/null
5 | +++ b/has spaces/t.sql
6 | @@ -0,0 +1 @@
7 | +select * FROM t;
8 | 


--------------------------------------------------------------------------------
/tests/samples/git_filenames_with_spaces_prefix.diff:
--------------------------------------------------------------------------------
1 | diff --git src://foo bar/baz dst://foo bar/baz
2 | new file mode 100644
3 | index 00000000000..0a72e5064c8
4 | --- /dev/null
5 | +++ dst://foo bar/baz
6 | @@ -0,0 +1,1 @@
7 | +blah
8 | 


--------------------------------------------------------------------------------
/tests/samples/git_no_prefix.diff:
--------------------------------------------------------------------------------
 1 | diff --git file1 file1
 2 | deleted file mode 100644
 3 | index 42f90fd..0000000
 4 | --- file1
 5 | +++ /dev/null
 6 | @@ -1,3 +0,0 @@
 7 | -line11
 8 | -line12
 9 | -line13
10 | diff --git file2 file2
11 | index c337bf1..1cb02b9 100644
12 | --- file2
13 | +++ file2
14 | @@ -4,0 +5,3 @@ line24
15 | +line24n
16 | +line24n2
17 | +line24n3
18 | @@ -15,0 +19,3 @@ line215
19 | +line215n
20 | +line215n2
21 | +line215n3
22 | diff --git file3 file3
23 | new file mode 100644
24 | index 0000000..632e269
25 | --- /dev/null
26 | +++ file3
27 | @@ -0,0 +1,3 @@
28 | +line31
29 | +line32
30 | +line33
31 | 


--------------------------------------------------------------------------------
/tests/samples/git_quoted_filename.diff:
--------------------------------------------------------------------------------
1 | diff --git "a/A \303\242 B.py" "b/A \303\242 B.py"
2 | new file mode 100644
3 | index 0000000..ce01362
4 | --- /dev/null
5 | +++ "b/A \303\242 B.py"
6 | @@ -0,0 +1 @@
7 | +hello
8 | 


--------------------------------------------------------------------------------
/tests/samples/git_rename.diff:
--------------------------------------------------------------------------------
 1 | diff --git a/added b/moved
 2 | similarity index 85%
 3 | rename from added
 4 | rename to moved
 5 | index a071991..4dbab21 100644
 6 | --- a/added
 7 | +++ b/moved
 8 | @@ -9,4 +9,4 @@ Some content
 9 |  Some content
10 |  Some content
11 |  Some content
12 | -Some content
13 | +Some modified content
14 | 
15 | diff --git a/oldfile b/newfile
16 | similarity index 85%
17 | rename from oldfile
18 | rename to newfile
19 | index a071991..4dbab21 100644
20 | --- a/oldfile
21 | +++ b/newfile
22 | @@ -9,4 +9,4 @@ Some content
23 |  Some content
24 |  Some content
25 |  Some content
26 | -Some content
27 | +Some modified content
28 | 
29 | diff --git a/sub/onefile b/sub/otherfile
30 | similarity index 100%
31 | rename from onefile
32 | rename to otherfile
33 | 


--------------------------------------------------------------------------------
/tests/samples/hg.diff:
--------------------------------------------------------------------------------
 1 | diff -r 44299fd3d1a8 added_file
 2 | --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
 3 | +++ b/added_file	Sun Oct 13 20:51:40 2013 -0300
 4 | @@ -0,0 +1,4 @@
 5 | +This was missing!
 6 | +Adding it now.
 7 | +
 8 | +Only for testing purposes.
 9 | \ No newline at end of file
10 | diff -r 44299fd3d1a8 modified_file
11 | --- a/modified_file	Sun Oct 13 20:51:07 2013 -0300
12 | +++ b/modified_file	Sun Oct 13 20:51:40 2013 -0300
13 | @@ -1,5 +1,7 @@
14 |  This is the original content.
15 |  
16 | -This should be updated.
17 | +This is now updated.
18 | +
19 | +This is a new line.
20 |  
21 |  This will stay.
22 | \ No newline at end of file
23 | diff -r 44299fd3d1a8 removed_file
24 | --- a/removed_file	Sun Oct 13 20:51:07 2013 -0300
25 | +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
26 | @@ -1,3 +0,0 @@
27 | -This content shouldn't be here.
28 | -
29 | -This file will be removed.
30 | \ No newline at end of file
31 | 


--------------------------------------------------------------------------------
/tests/samples/sample0.diff:
--------------------------------------------------------------------------------
 1 | --- /path/to/original	''timestamp''
 2 | +++ /path/to/new	''timestamp''
 3 | @@ -1,3 +1,9 @@ Section Header
 4 | +This is an important
 5 | +notice! It should
 6 | +therefore be located at
 7 | +the beginning of this
 8 | +document!
 9 | +
10 |  This part of the
11 |  document has stayed the
12 |  same from version to
13 | @@ -5,16 +11,10 @@
14 |  be shown if it doesn't
15 |  change.  Otherwise, that
16 |  would not be helping to
17 | -compress the size of the
18 | -changes.
19 | -
20 | -This paragraph contains
21 | -text that is outdated.
22 | -It will be deleted in the
23 | -near future.
24 | +compress anything.
25 |  
26 |  It is important to spell
27 | -check this dokument. On
28 | +check this document. On
29 |  the other hand, a
30 |  misspelled word isn't
31 |  the end of the world.
32 | @@ -22,3 +22,7 @@
33 |  this paragraph needs to
34 |  be changed. Things can
35 |  be added after it.
36 | +
37 | +This paragraph contains
38 | +important new additions
39 | +to this document.
40 | --- /dev/null
41 | +++ /path/to/another_new
42 | @@ -0,0 +1,9 @@
43 | +This is an important
44 | +notice! It should
45 | +therefore be located at
46 | +the beginning of this
47 | +document!
48 | +
49 | +This part of the
50 | +document has stayed the
51 | +same from version to
52 | --- /path/to/existing
53 | +++ /dev/null
54 | @@ -1,9 +0,0 @@
55 | -This is an important
56 | -notice! It should
57 | -therefore be located at
58 | -the beginning of this
59 | -document!
60 | -
61 | -This part of the
62 | -document has stayed the
63 | -same from version to
64 | 


--------------------------------------------------------------------------------
/tests/samples/sample1.diff:
--------------------------------------------------------------------------------
 1 | --- /path/to/original	''timestamp''
 2 | +++ /path/to/new	''timestamp''
 3 | @@ -1,3 +1,9 @@
 4 | +This is an important
 5 | +notice! It should
 6 | +therefore be located at
 7 | +the beginning of this
 8 | +document!
 9 | +
10 |  This part of the
11 |  document has stayed the
12 |  same from version to
13 | @@ -5,16 +11,13 @@
14 |  be shown if it doesn't
15 |  change.  Otherwise, that
16 |  would not be helping to
17 | -compress the size of the
18 | -changes.
19 | -
20 | -This paragraph contains
21 | -text that is outdated.
22 | -It will be deleted in the
23 | -near future.
24 | +compress anything.
25 |  
26 |  It is important to spell
27 | -check this dokument. On
28 | +check this document. On
29 |  the other hand, a
30 |  misspelled word isn't
31 |  the end of the world.
32 | @@ -22,3 +22,7 @@
33 |  this paragraph needs to
34 |  be changed. Things can
35 |  be added after it.
36 | +
37 | +This paragraph contains
38 | +important new additions
39 | +to this document.
40 | 


--------------------------------------------------------------------------------
/tests/samples/sample2.diff:
--------------------------------------------------------------------------------
 1 | # HG changeset patch
 2 | # Parent 13ba6cbdb304cd251fbc22466cadb21019ee817f
 3 | # User Bill McCloskey <wmccloskey@mozilla.com>
 4 | 
 5 | diff --git a/content/base/src/nsContentUtils.cpp b/content/base/src/nsContentUtils.cpp
 6 | --- a/content/base/src/nsContentUtils.cpp
 7 | +++ b/content/base/src/nsContentUtils.cpp
 8 | @@ -6369,17 +6369,17 @@ public:
 9 |                                      nsCycleCollectionParticipant* helper)
10 |    {
11 |    }
12 |  
13 |    NS_IMETHOD_(void) NoteNextEdgeName(const char* name)
14 |    {
15 |    }
16 |  
17 | -  NS_IMETHOD_(void) NoteWeakMapping(void* map, void* key, void* val)
18 | +  NS_IMETHOD_(void) NoteWeakMapping(void* map, void* key, void* kdelegate, void* val)
19 |    {
20 |    }
21 |  
22 |    bool mFound;
23 |  
24 |  private:
25 |    void* mWrapper;
26 |  };
27 | diff --git a/js/src/jsfriendapi.cpp b/js/src/jsfriendapi.cpp
28 | --- a/js/src/jsfriendapi.cpp
29 | +++ b/js/src/jsfriendapi.cpp
30 | @@ -527,16 +527,24 @@ js::VisitGrayWrapperTargets(JSCompartmen
31 |  {
32 |      for (WrapperMap::Enum e(comp->crossCompartmentWrappers); !e.empty(); e.popFront()) {
33 |          gc::Cell *thing = e.front().key.wrapped;
34 |          if (thing->isMarked(gc::GRAY))
35 |              callback(closure, thing);
36 |      }
37 |  }
38 |  
39 | +JS_FRIEND_API(JSObject *)
40 | +js::GetWeakmapKeyDelegate(JSObject *key)
41 | +{
42 | +    if (JSWeakmapKeyDelegateOp op = key->getClass()->ext.weakmapKeyDelegateOp)
43 | +        return op(key);
44 | +    return NULL;
45 | +}
46 | +
47 |  JS_FRIEND_API(void)
48 |  JS_SetAccumulateTelemetryCallback(JSRuntime *rt, JSAccumulateTelemetryDataCallback callback)
49 |  {
50 |      rt->telemetryCallback = callback;
51 |  }
52 |  
53 |  JS_FRIEND_API(JSObject *)


--------------------------------------------------------------------------------
/tests/samples/sample3.diff:
--------------------------------------------------------------------------------
 1 | === added file 'added_file'
 2 | --- added_file	1970-01-01 00:00:00 +0000
 3 | +++ added_file	2013-10-13 23:44:04 +0000
 4 | @@ -0,0 +1,4 @@
 5 | +This was missing!
 6 | +holá mundo!
 7 | +
 8 | +Only for testing purposes.
 9 | \ No newline at end of file
10 | 
11 | === modified file 'modified_file'
12 | --- modified_file	2013-10-13 23:53:13 +0000
13 | +++ modified_file	2013-10-13 23:53:26 +0000
14 | @@ -1,5 +1,7 @@
15 |  This is the original content.
16 |  
17 | -This should be updated.
18 | +This is now updated.
19 | +
20 | +This is a new line.
21 |  
22 | -This will stay.
23 | \ No newline at end of file
24 | +This will stay.
25 | 
26 | === removed file 'removed_file'
27 | --- removed_file	2013-10-13 23:53:13 +0000
28 | +++ removed_file	1970-01-01 00:00:00 +0000
29 | @@ -1,3 +0,0 @@
30 | -This content shouldn't be here.
31 | -
32 | -This file will be removed.
33 | \ No newline at end of file
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/tests/samples/sample4.diff:
--------------------------------------------------------------------------------
 1 | === added file 'added_file'
 2 | --- added_file	1970-01-01 00:00:00 +0000
 3 | +++ added_file	2013-10-13 23:44:04 +0000
 4 | @@ -0,0 +1,4 @@
 5 | +This was missing!
 6 | +holá mundo!
 7 | +
 8 | +Only for testing purposes.
 9 | \ No newline at end of file
10 | 
11 | === modified file 'modified_file'
12 | --- modified_file	2013-10-13 23:53:13 +0000
13 | +++ modified_file	2013-10-13 23:53:26 +0000
14 | @@ -1,5 +1,7 @@
15 |  This is the original content.
16 |  
17 | -This should be updated.
18 | +This is now updated.
19 | +
20 | +This is a new line.
21 |  
22 |  This will stay.
23 | \ No newline at end of file
24 | 
25 | === removed file 'removed_file'
26 | --- removed_file	2013-10-13 23:53:13 +0000
27 | +++ removed_file	1970-01-01 00:00:00 +0000
28 | @@ -1,3 +0,0 @@
29 | -This content shouldn't be here.
30 | -
31 | -This file will be removed.
32 | \ No newline at end of file
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/tests/samples/sample5.diff:
--------------------------------------------------------------------------------
 1 | === modified file 'modified_file1'
 2 | --- modified_file1	2013-10-13 23:53:13 +0000
 3 | +++ modified_file1	2013-10-13 23:53:26 +0000
 4 | @@ -1,5 +1,7 @@
 5 |  This is the original content.
 6 |  
 7 | -This should be updated.
 8 | +This is now updated.
 9 | +
10 | +This is a new line.
11 | 
12 |  This will stay.
13 | \ No newline at end of file
14 | 
15 | === modified file 'modified_file2'
16 | --- modified_file2	2013-10-13 23:53:13 +0000
17 | +++ modified_file2	2013-10-13 23:53:26 +0000
18 | @@ -1,5 +1,7 @@
19 |  This is the original content.
20 |  
21 | -This should be updated.
22 | +This is now updated.
23 | +
24 | +This is a new line.
25 | 
26 |  This will stay.
27 | \ No newline at end of file
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/tests/samples/sample6.diff:
--------------------------------------------------------------------------------
 1 | --- /path/to/original	''timestamp''
 2 | +++ /path/to/new	''timestamp''
 3 | @@ -1,3 +1,9 @@
 4 | +This is an important
 5 | +notice! It should
 6 | +therefore be located at
 7 | +the beginning of this
 8 | +document!
 9 | +
10 |  This part of the
11 |  document has stayed the
12 |  same from version to
13 | @@ -5,16 +11,13 @@
14 |  be shown if it doesn't
15 |  change.  Otherwise, that
16 |  would not be helping to
17 | -compress the size of the
18 | -changes.
19 | -
20 | -This paragraph contains
21 | -text that is outdated.
22 | -It will be deleted in the
23 | -near future.
24 | +compress anything.
25 |  
26 |  It is important to spell
27 | -check this dokument. On
28 | +check this document. On
29 |  the other hand, a
30 |  misspelled word isn't
31 |  the end of the world.
32 |  this paragraph needs to
33 |  be changed. Things can
34 |  be added after it.
35 | +
36 | +This paragraph contains
37 | +important new additions
38 | +to this document.
39 | 


--------------------------------------------------------------------------------
/tests/samples/sample7.diff:
--------------------------------------------------------------------------------
 1 | --- /path/to/original	''timestamp''
 2 | +++ /path/to/new	''timestamp''
 3 | @@ -1,3 +1,9 @@
 4 | +This is an important
 5 | +notice! It should
 6 | +therefore be located at
 7 | +the beginning of this
 8 | +document!
 9 | +
10 |  This part of the
11 |  document has stayed the
12 |  same from version to
13 | @@ -5,16 +11,13 @@
14 |  be shown if it doesn't
15 |  change.  Otherwise, that
16 |  would not be helping to
17 | -compress the size of the
18 | -changes.
19 | -
20 | -This paragraph contains
21 | -text that is outdated.
22 | +compress anything.
23 |  
24 |  It is important to spell
25 | -check this dokument. On
26 | +check this document. On
27 |  the other hand, a
28 |  misspelled word isn't
29 |  the end of the world.
30 | 


--------------------------------------------------------------------------------
/tests/samples/sample8.diff:
--------------------------------------------------------------------------------
 1 | diff --git a/boo.bin b/boo.bin
 2 | new file mode 100644
 3 | index 0000000..ae000000
 4 | diff --git a/foo.bin b/foo.bin
 5 | new file mode 100644
 6 | index 0000000..af000000
 7 | Binary files /dev/null and b/foo.bin differ
 8 | diff --git a/bar.bin b/bar.bin
 9 | index ad000000..ac000000 100644
10 | Binary files a/bar.bin and b/bar.bin differ
11 | diff --git a/baz.bin b/baz.bin
12 | deleted file mode 100644
13 | index af000000..0000000
14 | Binary files a/baz.bin and /dev/null differ
15 | diff --git a/fuz.bin b/fuz.bin
16 | new file mode 100644
17 | index 0000000..ae000000
18 | 


--------------------------------------------------------------------------------
/tests/samples/svn.diff:
--------------------------------------------------------------------------------
 1 | Index: modified_file
 2 | ===================================================================
 3 | --- modified_file	(revision 191)
 4 | +++ modified_file	(working copy)
 5 | @@ -1,5 +1,7 @@
 6 |  This is the original content.
 7 |  
 8 | -This should be updated.
 9 | +This is now updated.
10 |  
11 | +This is a new line.
12 | +
13 |  This will stay.
14 | \ No newline at end of file
15 | Index: removed_file
16 | ===================================================================
17 | --- removed_file	(revision 188)
18 | +++ removed_file	(working copy)
19 | @@ -1,3 +0,0 @@
20 | -This content shouldn't be here.
21 | -
22 | -This file will be removed.
23 | \ No newline at end of file
24 | Index: added_file
25 | ===================================================================
26 | --- added_file	(revision 0)
27 | +++ added_file	(revision 0)
28 | @@ -0,0 +1,4 @@
29 | +This was missing!
30 | +Adding it now.
31 | +
32 | +Only for testing purposes.
33 | \ No newline at end of file
34 | 


--------------------------------------------------------------------------------
/tests/test_hunks.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # The MIT License (MIT)
 4 | # Copyright (c) 2014-2017 Matias Bordese
 5 | #
 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | # of this software and associated documentation files (the "Software"), to deal
 8 | # in the Software without restriction, including without limitation the rights
 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
22 | # OR OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | 
25 | """Tests for Hunk."""
26 | 
27 | from __future__ import unicode_literals
28 | 
29 | import unittest
30 | 
31 | from unidiff.patch import (
32 |     LINE_TYPE_ADDED,
33 |     LINE_TYPE_CONTEXT,
34 |     LINE_TYPE_REMOVED,
35 |     Hunk,
36 |     Line,
37 | )
38 | 
39 | 
40 | class TestHunk(unittest.TestCase):
41 |     """Tests for Hunk."""
42 | 
43 |     def setUp(self):
44 |         super(TestHunk, self).setUp()
45 |         self.context_line = Line('Sample line', line_type=LINE_TYPE_CONTEXT)
46 |         self.added_line = Line('Sample line', line_type=LINE_TYPE_ADDED)
47 |         self.removed_line = Line('Sample line', line_type=LINE_TYPE_REMOVED)
48 | 
49 |     def test_missing_length(self):
50 |         hunk = Hunk(src_len=None, tgt_len=None)
51 |         hunk.append(self.context_line)
52 |         self.assertTrue(hunk.is_valid())
53 | 
54 |     def test_default_is_valid(self):
55 |         hunk = Hunk()
56 |         self.assertTrue(hunk.is_valid())
57 | 
58 |     def test_missing_data_is_not_valid(self):
59 |         hunk = Hunk(src_len=1, tgt_len=1)
60 |         self.assertFalse(hunk.is_valid())
61 | 
62 |     def test_append_context(self):
63 |         hunk = Hunk(src_len=1, tgt_len=1)
64 |         hunk.append(self.context_line)
65 |         self.assertTrue(hunk.is_valid())
66 |         self.assertEqual(len(hunk.source), 1)
67 |         self.assertEqual(hunk.target, hunk.source)
68 |         self.assertIn(str(self.context_line), hunk.source)
69 |         source_lines = list(hunk.source_lines())
70 |         target_lines = list(hunk.target_lines())
71 |         self.assertEqual(target_lines, source_lines)
72 |         self.assertEqual(target_lines, [self.context_line])
73 | 
74 |     def test_append_added_line(self):
75 |         hunk = Hunk(src_len=0, tgt_len=1)
76 |         hunk.append(self.added_line)
77 |         self.assertTrue(hunk.is_valid())
78 |         self.assertEqual(len(hunk.target), 1)
79 |         self.assertEqual(hunk.source, [])
80 |         self.assertIn(str(self.added_line), hunk.target)
81 |         target_lines = list(hunk.target_lines())
82 |         self.assertEqual(target_lines, [self.added_line])
83 | 
84 |     def test_append_deleted_line(self):
85 |         hunk = Hunk(src_len=1, tgt_len=0)
86 |         hunk.append(self.removed_line)
87 |         self.assertTrue(hunk.is_valid())
88 |         self.assertEqual(len(hunk.source), 1)
89 |         self.assertEqual(hunk.target, [])
90 |         self.assertIn(str(self.removed_line), hunk.source)
91 |         source_lines = list(hunk.source_lines())
92 |         self.assertEqual(source_lines, [self.removed_line])
93 | 


--------------------------------------------------------------------------------
/tests/test_line.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # The MIT License (MIT)
 4 | # Copyright (c) 2017 Matias Bordese
 5 | #
 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | # of this software and associated documentation files (the "Software"), to deal
 8 | # in the Software without restriction, including without limitation the rights
 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
22 | # OR OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | 
25 | """Tests for Line."""
26 | 
27 | from __future__ import unicode_literals
28 | 
29 | import unittest
30 | 
31 | from unidiff.patch import (
32 |     LINE_TYPE_ADDED,
33 |     LINE_TYPE_CONTEXT,
34 |     LINE_TYPE_REMOVED,
35 |     Line,
36 | )
37 | 
38 | 
39 | class TestLine(unittest.TestCase):
40 |     """Tests for Line."""
41 | 
42 |     def setUp(self):
43 |         super(TestLine, self).setUp()
44 |         self.context_line = Line('Sample line', line_type=LINE_TYPE_CONTEXT)
45 |         self.added_line = Line('Sample line', line_type=LINE_TYPE_ADDED)
46 |         self.removed_line = Line('Sample line', line_type=LINE_TYPE_REMOVED)
47 | 
48 |     def test_str(self):
49 |         self.assertEqual(str(self.added_line), '+Sample line')
50 | 
51 |     def test_repr(self):
52 |         self.assertEqual(repr(self.added_line), '<Line: +Sample line>')
53 | 
54 |     def test_equal(self):
55 |         other = Line('Sample line', line_type=LINE_TYPE_ADDED)
56 |         self.assertEqual(self.added_line, other)
57 | 
58 |     def test_not_equal(self):
59 |         self.assertNotEqual(self.added_line, self.removed_line)
60 | 
61 |     def test_is_added(self):
62 |         self.assertTrue(self.added_line.is_added)
63 |         self.assertFalse(self.context_line.is_added)
64 |         self.assertFalse(self.removed_line.is_added)
65 | 
66 |     def test_is_removed(self):
67 |         self.assertTrue(self.removed_line.is_removed)
68 |         self.assertFalse(self.added_line.is_removed)
69 |         self.assertFalse(self.context_line.is_removed)
70 | 
71 |     def test_is_context(self):
72 |         self.assertTrue(self.context_line.is_context)
73 |         self.assertFalse(self.added_line.is_context)
74 |         self.assertFalse(self.removed_line.is_context)
75 | 


--------------------------------------------------------------------------------
/tests/test_parser.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # The MIT License (MIT)
  4 | # Copyright (c) 2014-2023 Matias Bordese
  5 | #
  6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  7 | # of this software and associated documentation files (the "Software"), to deal
  8 | # in the Software without restriction, including without limitation the rights
  9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 | # copies of the Software, and to permit persons to whom the Software is
 11 | # furnished to do so, subject to the following conditions:
 12 | #
 13 | # The above copyright notice and this permission notice shall be included in
 14 | # all copies or substantial portions of the Software.
 15 | #
 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 21 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
 22 | # OR OTHER DEALINGS IN THE SOFTWARE.
 23 | 
 24 | 
 25 | """Tests for the unified diff parser process."""
 26 | 
 27 | from __future__ import unicode_literals
 28 | 
 29 | import codecs
 30 | import os.path
 31 | import unittest
 32 | 
 33 | from unidiff import PatchSet
 34 | from unidiff.patch import PY2
 35 | from unidiff.errors import UnidiffParseError
 36 | 
 37 | if not PY2:
 38 |     unicode = str
 39 | 
 40 | class TestUnidiffParser(unittest.TestCase):
 41 |     """Tests for Unified Diff Parser."""
 42 | 
 43 |     def setUp(self):
 44 |         super(TestUnidiffParser, self).setUp()
 45 |         self.samples_dir = os.path.dirname(os.path.realpath(__file__))
 46 |         self.sample_file = os.path.join(
 47 |             self.samples_dir, 'samples/sample0.diff')
 48 |         self.sample_bad_file = os.path.join(
 49 |             self.samples_dir, 'samples/sample1.diff')
 50 | 
 51 |     def test_missing_encoding(self):
 52 |         utf8_file = os.path.join(self.samples_dir, 'samples/sample3.diff')
 53 |         # read bytes
 54 |         with open(utf8_file, 'rb') as diff_file:
 55 |             if PY2:
 56 |                 self.assertRaises(UnicodeDecodeError, PatchSet, diff_file)
 57 |             else:
 58 |                 # unicode expected
 59 |                 self.assertRaises(TypeError, PatchSet, diff_file)
 60 | 
 61 |     def test_encoding_param(self):
 62 |         utf8_file = os.path.join(self.samples_dir, 'samples/sample3.diff')
 63 |         with open(utf8_file, 'rb') as diff_file:
 64 |             res = PatchSet(diff_file, encoding='utf-8')
 65 | 
 66 |         # 3 files updated by diff
 67 |         self.assertEqual(len(res), 3)
 68 |         added_unicode_line = res.added_files[0][0][1]
 69 |         self.assertEqual(added_unicode_line.value, 'holá mundo!\n')
 70 | 
 71 |     def test_no_newline_at_end_of_file(self):
 72 |         utf8_file = os.path.join(self.samples_dir, 'samples/sample3.diff')
 73 |         with open(utf8_file, 'rb') as diff_file:
 74 |             res = PatchSet(diff_file, encoding='utf-8')
 75 | 
 76 |         # 3 files updated by diff
 77 |         self.assertEqual(len(res), 3)
 78 |         added_unicode_line = res.added_files[0][0][4]
 79 |         self.assertEqual(added_unicode_line.line_type, '\\')
 80 |         self.assertEqual(added_unicode_line.value, ' No newline at end of file\n')
 81 |         added_unicode_line = res.modified_files[0][0][8]
 82 |         self.assertEqual(added_unicode_line.line_type, '\\')
 83 |         self.assertEqual(added_unicode_line.value, ' No newline at end of file\n')
 84 | 
 85 |     def test_preserve_dos_line_endings(self):
 86 |         utf8_file = os.path.join(self.samples_dir, 'samples/sample4.diff')
 87 |         with open(utf8_file, 'rb') as diff_file:
 88 |             res = PatchSet(diff_file, encoding='utf-8')
 89 | 
 90 |         # 3 files updated by diff
 91 |         self.assertEqual(len(res), 3)
 92 |         added_unicode_line = res.added_files[0][0][1]
 93 |         self.assertEqual(added_unicode_line.value, 'holá mundo!\r\n')
 94 | 
 95 |     def test_preserve_dos_line_endings_empty_line_type(self):
 96 |         utf8_file = os.path.join(self.samples_dir, 'samples/sample5.diff')
 97 |         with open(utf8_file, 'rb') as diff_file:
 98 |             res = PatchSet(diff_file, encoding='utf-8')
 99 | 
100 |         # 2 files updated by diff
101 |         self.assertEqual(len(res), 2)
102 |         modified_unicode_line = res.modified_files[0][0][6]
103 |         self.assertEqual(modified_unicode_line.value, '\r\n')
104 |         self.assertEqual(modified_unicode_line.line_type, ' ')
105 | 
106 |         modified_unicode_line = res.modified_files[1][0][6]
107 |         self.assertEqual(modified_unicode_line.value, '\n')
108 |         self.assertEqual(modified_unicode_line.line_type, ' ')
109 | 
110 |     def test_print_hunks_without_gaps(self):
111 |         with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file:
112 |             res = PatchSet(diff_file)
113 |         lines = unicode(res).splitlines()
114 |         self.assertEqual(lines[12], '@@ -5,16 +11,10 @@')
115 |         self.assertEqual(lines[31], '@@ -22,3 +22,7 @@')
116 | 
117 |     def _test_parse_sample(self, metadata_only):
118 |         """Parse sample file."""
119 |         with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file:
120 |             res = PatchSet(diff_file, metadata_only=metadata_only)
121 | 
122 |         # three file in the patch
123 |         self.assertEqual(len(res), 3)
124 |         # three hunks
125 |         self.assertEqual(len(res[0]), 3)
126 | 
127 |         # first file is modified
128 |         self.assertTrue(res[0].is_modified_file)
129 |         self.assertFalse(res[0].is_removed_file)
130 |         self.assertFalse(res[0].is_added_file)
131 |         self.assertFalse(res[0].is_binary_file)
132 | 
133 |         # Hunk 1: five additions, no deletions, a section header
134 |         self.assertEqual(res[0][0].added, 6)
135 |         self.assertEqual(res[0][0].removed, 0)
136 |         self.assertEqual(res[0][0].section_header, 'Section Header')
137 | 
138 |         # Hunk 2: 2 additions, 8 deletions, no section header
139 |         self.assertEqual(res[0][1].added, 2)
140 |         self.assertEqual(res[0][1].removed, 8)
141 |         self.assertEqual(res[0][1].section_header, '')
142 | 
143 |         # Hunk 3: four additions, no deletions, no section header
144 |         self.assertEqual(res[0][2].added, 4)
145 |         self.assertEqual(res[0][2].removed, 0)
146 |         self.assertEqual(res[0][2].section_header, '')
147 | 
148 |         # Check file totals
149 |         self.assertEqual(res[0].added, 12)
150 |         self.assertEqual(res[0].removed, 8)
151 | 
152 |         # second file is added
153 |         self.assertFalse(res[1].is_modified_file)
154 |         self.assertFalse(res[1].is_removed_file)
155 |         self.assertTrue(res[1].is_added_file)
156 |         self.assertFalse(res[1].is_binary_file)
157 | 
158 |         # third file is removed
159 |         self.assertFalse(res[2].is_modified_file)
160 |         self.assertTrue(res[2].is_removed_file)
161 |         self.assertFalse(res[2].is_added_file)
162 |         self.assertFalse(res[2].is_binary_file)
163 | 
164 |         self.assertEqual(res.added, 21)
165 |         self.assertEqual(res.removed, 17)
166 | 
167 |     def test_parse_sample_full(self):
168 |         self._test_parse_sample(metadata_only=False)
169 | 
170 |     def test_parse_sample_metadata_only(self):
171 |         self._test_parse_sample(metadata_only=True)
172 | 
173 |     def test_patchset_compare(self):
174 |         with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file:
175 |             ps1 = PatchSet(diff_file)
176 | 
177 |         with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file:
178 |             ps2 = PatchSet(diff_file)
179 | 
180 |         other_file = os.path.join(self.samples_dir, 'samples/sample3.diff')
181 |         with open(other_file, 'rb') as diff_file:
182 |             ps3 = PatchSet(diff_file, encoding='utf-8')
183 | 
184 |         self.assertEqual(ps1, ps2)
185 |         self.assertNotEqual(ps1, ps3)
186 | 
187 |     def test_patchset_from_string(self):
188 |         with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file:
189 |             diff_data = diff_file.read()
190 |             ps1 = PatchSet.from_string(diff_data)
191 | 
192 |         with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file:
193 |             ps2 = PatchSet(diff_file)
194 | 
195 |         self.assertEqual(ps1, ps2)
196 | 
197 |     def test_patchset_from_bytes_string(self):
198 |         with codecs.open(self.sample_file, 'rb') as diff_file:
199 |             diff_data = diff_file.read()
200 |             ps1 = PatchSet.from_string(diff_data, encoding='utf-8')
201 | 
202 |         with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file:
203 |             ps2 = PatchSet(diff_file)
204 | 
205 |         self.assertEqual(ps1, ps2)
206 | 
207 |     def test_patchset_string_input(self):
208 |         with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file:
209 |             diff_data = diff_file.read()
210 |             ps1 = PatchSet(diff_data)
211 | 
212 |         with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file:
213 |             ps2 = PatchSet(diff_file)
214 | 
215 |         self.assertEqual(ps1, ps2)
216 | 
217 |     def test_parse_malformed_diff(self):
218 |         """Parse malformed file."""
219 |         with open(self.sample_bad_file) as diff_file:
220 |             self.assertRaises(UnidiffParseError, PatchSet, diff_file)
221 | 
222 |     def test_parse_malformed_diff_longer_than_expected(self):
223 |         """Parse malformed file with non-terminated hunk."""
224 |         utf8_file = os.path.join(self.samples_dir, 'samples/sample6.diff')
225 |         with open(utf8_file, 'r') as diff_file:
226 |             self.assertRaises(UnidiffParseError, PatchSet, diff_file)
227 | 
228 |     def test_parse_malformed_diff_shorter_than_expected(self):
229 |         """Parse malformed file with non-terminated hunk."""
230 |         utf8_file = os.path.join(self.samples_dir, 'samples/sample7.diff')
231 |         with open(utf8_file, 'r') as diff_file:
232 |             self.assertRaises(UnidiffParseError, PatchSet, diff_file)
233 | 
234 |     def test_from_filename_with_cr_in_diff_text_files(self):
235 |         """Parse git diff text files that contain CR"""
236 |         utf8_file = os.path.join(self.samples_dir, 'samples/git_cr.diff')
237 |         self.assertRaises(UnidiffParseError, PatchSet.from_filename, utf8_file)
238 | 
239 |         ps1 = PatchSet.from_filename(utf8_file, newline='\n')
240 |         import io
241 |         with io.open(utf8_file, 'r', newline='\n') as diff_file:
242 |             ps2 = PatchSet(diff_file)
243 | 
244 |         self.assertEqual(ps1, ps2)
245 | 
246 |     def test_parse_diff_with_new_and_modified_binary_files(self):
247 |         """Parse git diff file with newly added and modified binaries files."""
248 |         utf8_file = os.path.join(self.samples_dir, 'samples/sample8.diff')
249 |         with open(utf8_file, 'r') as diff_file:
250 |             res = PatchSet(diff_file)
251 | 
252 |         # three file in the patch
253 |         self.assertEqual(len(res), 5)
254 | 
255 |         # first empty file is added
256 |         self.assertFalse(res[0].is_modified_file)
257 |         self.assertFalse(res[0].is_removed_file)
258 |         self.assertTrue(res[0].is_added_file)
259 |         self.assertFalse(res[0].is_binary_file)
260 | 
261 |         # second file is added
262 |         self.assertFalse(res[1].is_modified_file)
263 |         self.assertFalse(res[1].is_removed_file)
264 |         self.assertTrue(res[1].is_added_file)
265 |         self.assertTrue(res[1].is_binary_file)
266 | 
267 |         # third file is modified
268 |         self.assertTrue(res[2].is_modified_file)
269 |         self.assertFalse(res[2].is_removed_file)
270 |         self.assertFalse(res[2].is_added_file)
271 |         self.assertTrue(res[2].is_binary_file)
272 | 
273 |         # fourth file is removed
274 |         self.assertFalse(res[3].is_modified_file)
275 |         self.assertTrue(res[3].is_removed_file)
276 |         self.assertFalse(res[3].is_added_file)
277 |         self.assertTrue(res[3].is_binary_file)
278 | 
279 |         # fifth empty file is added
280 |         self.assertFalse(res[4].is_modified_file)
281 |         self.assertFalse(res[4].is_removed_file)
282 |         self.assertTrue(res[4].is_added_file)
283 |         self.assertFalse(res[4].is_binary_file)
284 | 
285 |     def test_parse_round_trip_with_binary_files_in_diff(self):
286 |         """Parse git diff with binary files though round trip"""
287 |         utf8_file = os.path.join(self.samples_dir, 'samples/sample8.diff')
288 |         with open(utf8_file, 'r') as diff_file:
289 |             res1 = PatchSet(diff_file)
290 | 
291 |         res2 = PatchSet(str(res1))
292 |         self.assertEqual(res1, res2)
293 | 
294 |     def test_parse_diff_git_no_prefix(self):
295 |         utf8_file = os.path.join(self.samples_dir, 'samples/git_no_prefix.diff')
296 |         with open(utf8_file, 'r') as diff_file:
297 |             res = PatchSet(diff_file)
298 | 
299 |         self.assertEqual(len(res), 3)
300 | 
301 |         self.assertEqual(res[0].source_file, 'file1')
302 |         self.assertEqual(res[0].target_file, '/dev/null')
303 |         self.assertTrue(res[0].is_removed_file)
304 |         self.assertEqual(res[0].path, 'file1')
305 | 
306 |         self.assertEqual(res[1].source_file, 'file2')
307 |         self.assertEqual(res[1].target_file, 'file2')
308 |         self.assertTrue(res[1].is_modified_file)
309 |         self.assertEqual(res[1].path, 'file2')
310 | 
311 |         self.assertEqual(res[2].source_file, '/dev/null')
312 |         self.assertEqual(res[2].target_file, 'file3')
313 |         self.assertTrue(res[2].is_added_file)
314 |         self.assertEqual(res[2].path, 'file3')
315 | 
316 |     def test_parse_filename_with_spaces(self):
317 |         filename = os.path.join(self.samples_dir, 'samples/git_filenames_with_spaces.diff')
318 |         with open(filename) as f:
319 |             res = PatchSet(f)
320 | 
321 |         self.assertEqual(len(res), 1)
322 | 
323 |         self.assertEqual(res[0].source_file, '/dev/null')
324 |         self.assertEqual(res[0].target_file, 'b/has spaces/t.sql')
325 |         self.assertTrue(res[0].is_added_file)
326 |         self.assertEqual(res[0].path, 'has spaces/t.sql')
327 | 
328 |     def test_parse_filename_prefix_with_spaces(self):
329 |         filename = os.path.join(self.samples_dir, 'samples/git_filenames_with_spaces_prefix.diff')
330 |         with open(filename) as f:
331 |             res = PatchSet(f)
332 | 
333 |         self.assertEqual(len(res), 1)
334 | 
335 |         self.assertEqual(res[0].source_file, '/dev/null')
336 |         self.assertEqual(res[0].target_file, 'dst://foo bar/baz')
337 |         self.assertTrue(res[0].is_added_file)
338 |         self.assertEqual(res[0].path, 'dst://foo bar/baz')
339 | 
340 |     def test_parse_quoted_filename(self):
341 |         filename = os.path.join(self.samples_dir, 'samples/git_quoted_filename.diff')
342 |         with open(filename) as f:
343 |             res = PatchSet(f)
344 | 
345 |         self.assertEqual(len(res), 1)
346 | 
347 |         self.assertEqual(res[0].source_file, '/dev/null')
348 |         self.assertEqual(res[0].target_file, '"b/A \\303\\242 B.py"')
349 |         self.assertTrue(res[0].is_added_file)
350 |         self.assertEqual(res[0].path, '"A \\303\\242 B.py"')
351 | 
352 | 
353 |     def test_deleted_file(self):
354 |         filename = os.path.join(self.samples_dir, 'samples/git_delete.diff')
355 |         with open(filename) as f:
356 |             res = PatchSet(f)
357 | 
358 |         self.assertEqual(len(res), 1)
359 |         self.assertEqual(res[0].source_file, 'a/somefile.c')
360 |         self.assertEqual(res[0].target_file, '/dev/null')
361 |         self.assertTrue(res[0].is_removed_file)
362 | 
363 |     def test_diff_lines_linenos(self):
364 |         with open(self.sample_file, 'rb') as diff_file:
365 |             res = PatchSet(diff_file, encoding='utf-8')
366 | 
367 |         target_line_nos = []
368 |         source_line_nos = []
369 |         diff_line_nos = []
370 |         for diff_file in res:
371 |             for hunk in diff_file:
372 |                 for line in hunk:
373 |                     target_line_nos.append(line.target_line_no)
374 |                     source_line_nos.append(line.source_line_no)
375 |                     diff_line_nos.append(line.diff_line_no)
376 | 
377 |         expected_target_line_nos = [
378 |             # File: 1, Hunk: 1
379 |             1, 2, 3, 4, 5, 6, 7, 8, 9,
380 |             # File: 1, Hunk: 2
381 |             11, 12, 13, None, None, None, None, None, None, None, 14, 15, 16, None, 17, 18, 19, 20,
382 |             # File: 1, Hunk: 3
383 |             22, 23, 24, 25, 26, 27, 28,
384 |             # File: 2, Hunk 1
385 |             1, 2, 3, 4, 5, 6, 7, 8, 9,
386 |             # File: 3, Hunk 1
387 |             None, None, None, None, None, None, None, None, None,
388 |         ]
389 |         expected_source_line_nos = [
390 |             # File: 1, Hunk: 1
391 |             None, None, None, None, None, None, 1, 2, 3,
392 |             # File: 1, Hunk: 2
393 |             5, 6, 7, 8, 9, 10, 11, 12, 13, 14, None, 15, 16, 17, None, 18, 19, 20,
394 |             # File: 1, Hunk: 3
395 |             22, 23, 24, None, None, None, None,
396 |             # File: 2, Hunk 1
397 |             None, None, None, None, None, None, None, None, None,
398 |             # File: 3, Hunk 1
399 |             1, 2, 3, 4, 5, 6, 7, 8, 9,
400 |         ]
401 |         expected_diff_line_nos = [
402 |             # File: 1, Hunk: 1
403 |             4, 5, 6, 7, 8, 9, 10, 11, 12,
404 |             # File: 1, Hunk: 2
405 |             14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
406 |             # File: 1, Hunk: 3
407 |             33, 34, 35, 36, 37, 38, 39,
408 |             # File: 2, Hunk 1
409 |             43, 44, 45, 46, 47, 48, 49, 50, 51,
410 |             # File: 3, Hunk 1
411 |             55, 56, 57, 58, 59, 60, 61, 62, 63,
412 |         ]
413 | 
414 |         self.assertEqual(target_line_nos, expected_target_line_nos)
415 |         self.assertEqual(source_line_nos, expected_source_line_nos)
416 |         self.assertEqual(diff_line_nos, expected_diff_line_nos)
417 | 
418 |     def test_diff_hunk_positions(self):
419 |         with open(self.sample_file, 'rb') as diff_file:
420 |             res = PatchSet(diff_file, encoding='utf-8')
421 |         self.do_test_diff_hunk_positions(res)
422 | 
423 |     def test_diff_metadata_only(self):
424 |         with open(self.sample_file, 'rb') as diff_file:
425 |             res = PatchSet(diff_file, encoding='utf-8', metadata_only=True)
426 |         self.do_test_diff_hunk_positions(res)
427 | 
428 |     def do_test_diff_hunk_positions(self, res):
429 |         hunk_positions = []
430 |         for diff_file in res:
431 |             for hunk in diff_file:
432 |                 hunk_positions.append((hunk.source_start, hunk.target_start,
433 |                                        hunk.source_length, hunk.target_length))
434 | 
435 |         expected_hunk_positions = [
436 |             # File: 1, Hunk: 1
437 |             (1, 1, 3, 9),
438 |             # File: 1, Hunk: 2
439 |             (5, 11, 16, 10),
440 |             # File: 1, Hunk: 3
441 |             (22, 22, 3, 7),
442 |             # File: 2, Hunk: 1
443 |             (0, 1, 0, 9),
444 |             # File: 3, Hunk: 1
445 |             (1, 0, 9, 0)
446 |         ]
447 | 
448 |         self.assertEqual(hunk_positions, expected_hunk_positions)
449 | 
450 |     def test_binary_patch(self):
451 |         utf8_file = os.path.join(self.samples_dir, 'samples/binary.diff')
452 |         with open(utf8_file, 'r') as diff_file:
453 |             res = PatchSet(diff_file)
454 |             self.assertEqual(len(res), 1)
455 |             patch = res[0]
456 |             self.assertEqual(patch.source_file, '/dev/null')
457 |             self.assertEqual(patch.target_file, 'b/1x1.png')
458 |             self.assertTrue(patch.is_binary_file)
459 |             self.assertTrue(patch.is_added_file)
460 | 
461 | class TestVCSSamples(unittest.TestCase):
462 |     """Tests for real examples from VCS."""
463 | 
464 |     samples = ['bzr.diff', 'git.diff', 'hg.diff', 'svn.diff']
465 | 
466 |     def test_samples(self):
467 |         tests_dir = os.path.dirname(os.path.realpath(__file__))
468 |         for fname in self.samples:
469 |             file_path = os.path.join(tests_dir, 'samples', fname)
470 |             with codecs.open(file_path, 'r', encoding='utf-8') as diff_file:
471 |                 res = PatchSet(diff_file)
472 | 
473 |             # 3 files updated by diff
474 |             self.assertEqual(len(res), 3)
475 | 
476 |             # 1 added file
477 |             added_files = res.added_files
478 |             self.assertEqual(len(added_files), 1)
479 |             self.assertEqual(added_files[0].path, 'added_file')
480 |             # 1 hunk, 4 lines
481 |             self.assertEqual(len(added_files[0]), 1)
482 |             self.assertEqual(added_files[0].added, 4)
483 |             self.assertEqual(added_files[0].removed, 0)
484 | 
485 |             # 1 removed file
486 |             removed_files = res.removed_files
487 |             self.assertEqual(len(removed_files), 1)
488 |             self.assertEqual(removed_files[0].path, 'removed_file')
489 |             # 1 hunk, 3 removed lines
490 |             self.assertEqual(len(removed_files[0]), 1)
491 |             self.assertEqual(removed_files[0].added, 0)
492 |             self.assertEqual(removed_files[0].removed, 3)
493 | 
494 |             # 1 modified file
495 |             modified_files = res.modified_files
496 |             self.assertEqual(len(modified_files), 1)
497 |             self.assertEqual(modified_files[0].path, 'modified_file')
498 |             # 1 hunk, 3 added lines, 1 removed line
499 |             self.assertEqual(len(modified_files[0]), 1)
500 |             self.assertEqual(modified_files[0].added, 3)
501 |             self.assertEqual(modified_files[0].removed, 1)
502 | 
503 |             self.assertEqual(res.added, 7)
504 |             self.assertEqual(res.removed, 4)
505 | 
506 |             # check that original diffs and those produced
507 |             # by unidiff are the same
508 |             with codecs.open(file_path, 'r', encoding='utf-8') as diff_file:
509 |                 self.assertEqual(diff_file.read(), str(res))
510 | 
511 |     def test_git_renaming(self):
512 |         tests_dir = os.path.dirname(os.path.realpath(__file__))
513 |         file_path = os.path.join(tests_dir, 'samples/git_rename.diff')
514 |         with codecs.open(file_path, 'r', encoding='utf-8') as diff_file:
515 |             res = PatchSet(diff_file)
516 | 
517 |         self.assertEqual(len(res), 3)
518 |         self.assertEqual(len(res.modified_files), 3)
519 |         self.assertEqual(len(res.added_files), 0)
520 |         self.assertEqual(len(res.removed_files), 0)
521 | 
522 |         # renamed and modified files
523 |         for patch in res[:2]:
524 |             self.assertTrue(patch.is_rename)
525 |             self.assertEqual(patch.added, 1)
526 |             self.assertEqual(patch.removed, 1)
527 |         # renamed file under sub-path
528 |         patch = res[2]
529 |         self.assertTrue(patch.is_rename)
530 |         self.assertEqual(patch.added, 0)
531 |         self.assertEqual(patch.removed, 0)
532 |         # confirm the full path is in source/target filenames
533 |         self.assertEqual(patch.source_file, 'a/sub/onefile')
534 |         self.assertEqual(patch.target_file, 'b/sub/otherfile')
535 |         # check path is the target path
536 |         self.assertEqual(patch.path, 'sub/otherfile')
537 | 
538 |         # check that original diffs and those produced
539 |         # by unidiff are the same
540 |         with codecs.open(file_path, 'r', encoding='utf-8') as diff_file:
541 |             self.assertEqual(diff_file.read(), str(res))
542 | 


--------------------------------------------------------------------------------
/tests/test_patchedfile.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # The MIT License (MIT)
 4 | # Copyright (c) 2014-2017 Matias Bordese
 5 | #
 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | # of this software and associated documentation files (the "Software"), to deal
 8 | # in the Software without restriction, including without limitation the rights
 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
22 | # OR OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | 
25 | """Tests for PatchedFile."""
26 | 
27 | from __future__ import unicode_literals
28 | 
29 | import unittest
30 | 
31 | from unidiff.patch import PatchedFile, Hunk
32 | 
33 | 
34 | class TestPatchedFile(unittest.TestCase):
35 |     """Tests for PatchedFile."""
36 | 
37 |     def setUp(self):
38 |         super(TestPatchedFile, self).setUp()
39 |         self.patched_file = PatchedFile()
40 | 
41 |     def test_is_added_file(self):
42 |         hunk = Hunk(src_start=0, src_len=0, tgt_start=1, tgt_len=10)
43 |         self.patched_file.append(hunk)
44 |         self.assertTrue(self.patched_file.is_added_file)
45 | 
46 |     def test_is_removed_file(self):
47 |         hunk = Hunk(src_start=1, src_len=10, tgt_start=0, tgt_len=0)
48 |         self.patched_file.append(hunk)
49 |         self.assertTrue(self.patched_file.is_removed_file)
50 | 
51 |     def test_is_modified_file(self):
52 |         hunk = Hunk(src_start=1, src_len=10, tgt_start=1, tgt_len=8)
53 |         self.patched_file.append(hunk)
54 |         self.assertTrue(self.patched_file.is_modified_file)
55 | 


--------------------------------------------------------------------------------
/unidiff/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # The MIT License (MIT)
 4 | # Copyright (c) 2014-2017 Matias Bordese
 5 | #
 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | # of this software and associated documentation files (the "Software"), to deal
 8 | # in the Software without restriction, including without limitation the rights
 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
22 | # OR OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | 
25 | """Unidiff parsing library."""
26 | 
27 | from __future__ import unicode_literals
28 | 
29 | from unidiff import __version__
30 | from unidiff.patch import (
31 |     DEFAULT_ENCODING,
32 |     LINE_TYPE_ADDED,
33 |     LINE_TYPE_CONTEXT,
34 |     LINE_TYPE_REMOVED,
35 |     Hunk,
36 |     PatchedFile,
37 |     PatchSet,
38 |     UnidiffParseError,
39 | )
40 | 
41 | VERSION = __version__.__version__
42 | 


--------------------------------------------------------------------------------
/unidiff/__version__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # The MIT License (MIT)
 4 | # Copyright (c) 2014-2023 Matias Bordese
 5 | #
 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | # of this software and associated documentation files (the "Software"), to deal
 8 | # in the Software without restriction, including without limitation the rights
 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
22 | # OR OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | __version__ = '0.7.5'
25 | 


--------------------------------------------------------------------------------
/unidiff/constants.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # The MIT License (MIT)
 4 | # Copyright (c) 2014-2023 Matias Bordese
 5 | #
 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | # of this software and associated documentation files (the "Software"), to deal
 8 | # in the Software without restriction, including without limitation the rights
 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
22 | # OR OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | 
25 | """Useful constants and regexes used by the package."""
26 | 
27 | from __future__ import unicode_literals
28 | 
29 | import re
30 | 
31 | 
32 | RE_SOURCE_FILENAME = re.compile(
33 |     r'^--- (?P<filename>"?[^\t\n]+"?)(?:\t(?P<timestamp>[^\n]+))?')
34 | RE_TARGET_FILENAME = re.compile(
35 |     r'^\+\+\+ (?P<filename>"?[^\t\n]+"?)(?:\t(?P<timestamp>[^\n]+))?')
36 | 
37 | 
38 | # check diff git line for git renamed files support
39 | RE_DIFF_GIT_HEADER = re.compile(
40 |     r'^diff --git (?P<source>"?a/[^\t\n]+"?) (?P<target>"?b/[^\t\n]+"?)')
41 | RE_DIFF_GIT_HEADER_URI_LIKE = re.compile(
42 |     r'^diff --git (?P<source>.*://[^\t\n]+) (?P<target>.*://[^\t\n]+)')
43 | RE_DIFF_GIT_HEADER_NO_PREFIX = re.compile(
44 |     r'^diff --git (?P<source>[^\t\n]+) (?P<target>[^\t\n]+)')
45 | 
46 | # check diff git new file marker `deleted file mode 100644`
47 | RE_DIFF_GIT_DELETED_FILE = re.compile(r'^deleted file mode \d+$')
48 | 
49 | # check diff git new file marker `new file mode 100644`
50 | RE_DIFF_GIT_NEW_FILE = re.compile(r'^new file mode \d+$')
51 | 
52 | 
53 | # @@ (source offset, length) (target offset, length) @@ (section header)
54 | RE_HUNK_HEADER = re.compile(
55 |     r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))?\ @@[ ]?(.*)")
56 | 
57 | #    kept line (context)
58 | # \n empty line (treat like context)
59 | # +  added line
60 | # -  deleted line
61 | # \  No newline case
62 | RE_HUNK_BODY_LINE = re.compile(
63 |     r'^(?P<line_type>[- \+\\])(?P<value>.*)', re.DOTALL)
64 | RE_HUNK_EMPTY_BODY_LINE = re.compile(
65 |     r'^(?P<line_type>[- \+\\]?)(?P<value>[\r\n]{1,2})', re.DOTALL)
66 | 
67 | RE_NO_NEWLINE_MARKER = re.compile(r'^\\ No newline at end of file')
68 | 
69 | RE_BINARY_DIFF = re.compile(
70 |     r'^Binary files? '
71 |     r'(?P<source_filename>[^\t]+?)(?:\t(?P<source_timestamp>[\s0-9:\+-]+))?'
72 |     r'(?: and (?P<target_filename>[^\t]+?)(?:\t(?P<target_timestamp>[\s0-9:\+-]+))?)? (differ|has changed)')
73 | 
74 | DEFAULT_ENCODING = 'UTF-8'
75 | 
76 | DEV_NULL = '/dev/null'
77 | LINE_TYPE_ADDED = '+'
78 | LINE_TYPE_REMOVED = '-'
79 | LINE_TYPE_CONTEXT = ' '
80 | LINE_TYPE_EMPTY = ''
81 | LINE_TYPE_NO_NEWLINE = '\\'
82 | LINE_VALUE_NO_NEWLINE = ' No newline at end of file'
83 | 


--------------------------------------------------------------------------------
/unidiff/errors.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # The MIT License (MIT)
 4 | # Copyright (c) 2014-2017 Matias Bordese
 5 | #
 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | # of this software and associated documentation files (the "Software"), to deal
 8 | # in the Software without restriction, including without limitation the rights
 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
22 | # OR OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | 
25 | """Errors and exceptions raised by the package."""
26 | 
27 | from __future__ import unicode_literals
28 | 
29 | 
30 | class UnidiffParseError(Exception):
31 |     """Exception when parsing the unified diff data."""
32 | 


--------------------------------------------------------------------------------
/unidiff/patch.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # The MIT License (MIT)
  4 | # Copyright (c) 2014-2023 Matias Bordese
  5 | #
  6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  7 | # of this software and associated documentation files (the "Software"), to deal
  8 | # in the Software without restriction, including without limitation the rights
  9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 | # copies of the Software, and to permit persons to whom the Software is
 11 | # furnished to do so, subject to the following conditions:
 12 | #
 13 | # The above copyright notice and this permission notice shall be included in
 14 | # all copies or substantial portions of the Software.
 15 | #
 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 21 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
 22 | # OR OTHER DEALINGS IN THE SOFTWARE.
 23 | 
 24 | 
 25 | """Classes used by the unified diff parser to keep the diff data."""
 26 | 
 27 | from __future__ import unicode_literals
 28 | 
 29 | import codecs
 30 | import sys
 31 | 
 32 | from unidiff.constants import (
 33 |     DEFAULT_ENCODING,
 34 |     DEV_NULL,
 35 |     LINE_TYPE_ADDED,
 36 |     LINE_TYPE_CONTEXT,
 37 |     LINE_TYPE_EMPTY,
 38 |     LINE_TYPE_REMOVED,
 39 |     LINE_TYPE_NO_NEWLINE,
 40 |     LINE_VALUE_NO_NEWLINE,
 41 |     RE_DIFF_GIT_DELETED_FILE,
 42 |     RE_DIFF_GIT_HEADER,
 43 |     RE_DIFF_GIT_HEADER_URI_LIKE,
 44 |     RE_DIFF_GIT_HEADER_NO_PREFIX,
 45 |     RE_DIFF_GIT_NEW_FILE,
 46 |     RE_HUNK_BODY_LINE,
 47 |     RE_HUNK_EMPTY_BODY_LINE,
 48 |     RE_HUNK_HEADER,
 49 |     RE_SOURCE_FILENAME,
 50 |     RE_TARGET_FILENAME,
 51 |     RE_NO_NEWLINE_MARKER,
 52 |     RE_BINARY_DIFF,
 53 | )
 54 | from unidiff.errors import UnidiffParseError
 55 | 
 56 | 
 57 | PY2 = sys.version_info[0] == 2
 58 | if PY2:
 59 |     import io
 60 |     from StringIO import StringIO
 61 |     open_file = io.open
 62 |     make_str = lambda x: x.encode(DEFAULT_ENCODING)
 63 | 
 64 |     def implements_to_string(cls):
 65 |         cls.__unicode__ = cls.__str__
 66 |         cls.__str__ = lambda x: x.__unicode__().encode(DEFAULT_ENCODING)
 67 |         return cls
 68 | else:
 69 |     from io import StringIO
 70 |     from typing import Iterable, Optional, Union
 71 |     open_file = open
 72 |     make_str = str
 73 |     implements_to_string = lambda x: x
 74 |     unicode = str
 75 |     basestring = str
 76 | 
 77 | 
 78 | @implements_to_string
 79 | class Line(object):
 80 |     """A diff line."""
 81 | 
 82 |     def __init__(self, value, line_type,
 83 |                  source_line_no=None, target_line_no=None, diff_line_no=None):
 84 |         # type: (str, str, Optional[int], Optional[int], Optional[int]) -> None
 85 |         super(Line, self).__init__()
 86 |         self.source_line_no = source_line_no
 87 |         self.target_line_no = target_line_no
 88 |         self.diff_line_no = diff_line_no
 89 |         self.line_type = line_type
 90 |         self.value = value
 91 | 
 92 |     def __repr__(self):
 93 |         # type: () -> str
 94 |         return make_str("<Line: %s%s>") % (self.line_type, self.value)
 95 | 
 96 |     def __str__(self):
 97 |         # type: () -> str
 98 |         return "%s%s" % (self.line_type, self.value)
 99 | 
100 |     def __eq__(self, other):
101 |         # type: (Line) -> bool
102 |         return (self.source_line_no == other.source_line_no and
103 |                 self.target_line_no == other.target_line_no and
104 |                 self.diff_line_no == other.diff_line_no and
105 |                 self.line_type == other.line_type and
106 |                 self.value == other.value)
107 | 
108 |     @property
109 |     def is_added(self):
110 |         # type: () -> bool
111 |         return self.line_type == LINE_TYPE_ADDED
112 | 
113 |     @property
114 |     def is_removed(self):
115 |         # type: () -> bool
116 |         return self.line_type == LINE_TYPE_REMOVED
117 | 
118 |     @property
119 |     def is_context(self):
120 |         # type: () -> bool
121 |         return self.line_type == LINE_TYPE_CONTEXT
122 | 
123 | 
124 | @implements_to_string
125 | class PatchInfo(list):
126 |     """Lines with extended patch info.
127 | 
128 |     Format of this info is not documented and it very much depends on
129 |     patch producer.
130 | 
131 |     """
132 | 
133 |     def __repr__(self):
134 |         # type: () -> str
135 |         value = "<PatchInfo: %s>" % self[0].strip()
136 |         return make_str(value)
137 | 
138 |     def __str__(self):
139 |         # type: () -> str
140 |         return ''.join(unicode(line) for line in self)
141 | 
142 | 
143 | @implements_to_string
144 | class Hunk(list):
145 |     """Each of the modified blocks of a file."""
146 | 
147 |     def __init__(self, src_start=0, src_len=0, tgt_start=0, tgt_len=0,
148 |                  section_header=''):
149 |         # type: (int, int, int, int, str) -> None
150 |         super(Hunk, self).__init__()
151 |         if src_len is None:
152 |             src_len = 1
153 |         if tgt_len is None:
154 |             tgt_len = 1
155 |         self.source_start = int(src_start)
156 |         self.source_length = int(src_len)
157 |         self.target_start = int(tgt_start)
158 |         self.target_length = int(tgt_len)
159 |         self.section_header = section_header
160 |         self._added = None  # Optional[int]
161 |         self._removed = None  # Optional[int]
162 | 
163 |     def __repr__(self):
164 |         # type: () -> str
165 |         value = "<Hunk: @@ %d,%d %d,%d @@ %s>" % (self.source_start,
166 |                                                   self.source_length,
167 |                                                   self.target_start,
168 |                                                   self.target_length,
169 |                                                   self.section_header)
170 |         return make_str(value)
171 | 
172 |     def __str__(self):
173 |         # type: () -> str
174 |         # section header is optional and thus we output it only if it's present
175 |         head = "@@ -%d,%d +%d,%d @@%s\n" % (
176 |             self.source_start, self.source_length,
177 |             self.target_start, self.target_length,
178 |             ' ' + self.section_header if self.section_header else '')
179 |         content = ''.join(unicode(line) for line in self)
180 |         return head + content
181 | 
182 |     def append(self, line):
183 |         # type: (Line) -> None
184 |         """Append the line to hunk, and keep track of source/target lines."""
185 |         # Make sure the line is encoded correctly. This is a no-op except for
186 |         # potentially raising a UnicodeDecodeError.
187 |         str(line)
188 |         super(Hunk, self).append(line)
189 | 
190 |     @property
191 |     def added(self):
192 |         # type: () -> Optional[int]
193 |         if self._added is not None:
194 |             return self._added
195 |         # re-calculate each time to allow for hunk modifications
196 |         # (which should mean metadata_only switch wasn't used)
197 |         return sum(1 for line in self if line.is_added)
198 | 
199 |     @property
200 |     def removed(self):
201 |         # type: () -> Optional[int]
202 |         if self._removed is not None:
203 |             return self._removed
204 |         # re-calculate each time to allow for hunk modifications
205 |         # (which should mean metadata_only switch wasn't used)
206 |         return sum(1 for line in self if line.is_removed)
207 | 
208 |     def is_valid(self):
209 |         # type: () -> bool
210 |         """Check hunk header data matches entered lines info."""
211 |         return (len(self.source) == self.source_length and
212 |                 len(self.target) == self.target_length)
213 | 
214 |     def source_lines(self):
215 |         # type: () -> Iterable[Line]
216 |         """Hunk lines from source file (generator)."""
217 |         return (l for l in self if l.is_context or l.is_removed)
218 | 
219 |     @property
220 |     def source(self):
221 |         # type: () -> Iterable[str]
222 |         return [str(l) for l in self.source_lines()]
223 | 
224 |     def target_lines(self):
225 |         # type: () -> Iterable[Line]
226 |         """Hunk lines from target file (generator)."""
227 |         return (l for l in self if l.is_context or l.is_added)
228 | 
229 |     @property
230 |     def target(self):
231 |         # type: () -> Iterable[str]
232 |         return [str(l) for l in self.target_lines()]
233 | 
234 | 
235 | class PatchedFile(list):
236 |     """Patch updated file, it is a list of Hunks."""
237 | 
238 |     def __init__(self, patch_info=None, source='', target='',
239 |                  source_timestamp=None, target_timestamp=None,
240 |                  is_binary_file=False):
241 |         # type: (Optional[PatchInfo], str, str, Optional[str], Optional[str], bool, bool) -> None
242 |         super(PatchedFile, self).__init__()
243 |         self.patch_info = patch_info
244 |         self.source_file = source
245 |         self.source_timestamp = source_timestamp
246 |         self.target_file = target
247 |         self.target_timestamp = target_timestamp
248 |         self.is_binary_file = is_binary_file
249 | 
250 |     def __repr__(self):
251 |         # type: () -> str
252 |         return make_str("<PatchedFile: %s>") % make_str(self.path)
253 | 
254 |     def __str__(self):
255 |         # type: () -> str
256 |         source = ''
257 |         target = ''
258 |         # patch info is optional
259 |         info = '' if self.patch_info is None else str(self.patch_info)
260 |         if not self.is_binary_file and self:
261 |             source = "--- %s%s\n" % (
262 |                 self.source_file,
263 |                 '\t' + self.source_timestamp if self.source_timestamp else '')
264 |             target = "+++ %s%s\n" % (
265 |                 self.target_file,
266 |                 '\t' + self.target_timestamp if self.target_timestamp else '')
267 |         hunks = ''.join(unicode(hunk) for hunk in self)
268 |         return info + source + target + hunks
269 | 
270 |     def _parse_hunk(self, header, diff, encoding, metadata_only):
271 |         # type: (str, enumerate[str], Optional[str], bool) -> None
272 |         """Parse hunk details."""
273 |         header_info = RE_HUNK_HEADER.match(header)
274 |         hunk_info = header_info.groups()
275 |         hunk = Hunk(*hunk_info)
276 | 
277 |         source_line_no = hunk.source_start
278 |         target_line_no = hunk.target_start
279 |         expected_source_end = source_line_no + hunk.source_length
280 |         expected_target_end = target_line_no + hunk.target_length
281 |         added = 0
282 |         removed = 0
283 | 
284 |         for diff_line_no, line in diff:
285 |             if encoding is not None:
286 |                 line = line.decode(encoding)
287 | 
288 |             if metadata_only:
289 |                 # quick line type detection, no regex required
290 |                 line_type = line[0] if line else LINE_TYPE_CONTEXT
291 |                 if line_type not in (LINE_TYPE_ADDED,
292 |                                      LINE_TYPE_REMOVED,
293 |                                      LINE_TYPE_CONTEXT,
294 |                                      LINE_TYPE_NO_NEWLINE):
295 |                     raise UnidiffParseError(
296 |                         'Hunk diff line expected: %s' % line)
297 | 
298 |                 if line_type == LINE_TYPE_ADDED:
299 |                     target_line_no += 1
300 |                     added += 1
301 |                 elif line_type == LINE_TYPE_REMOVED:
302 |                     source_line_no += 1
303 |                     removed += 1
304 |                 elif line_type == LINE_TYPE_CONTEXT:
305 |                     target_line_no += 1
306 |                     source_line_no += 1
307 | 
308 |                 # no file content tracking
309 |                 original_line = None
310 | 
311 |             else:
312 |                 # parse diff line content
313 |                 valid_line = RE_HUNK_BODY_LINE.match(line)
314 |                 if not valid_line:
315 |                     valid_line = RE_HUNK_EMPTY_BODY_LINE.match(line)
316 | 
317 |                 if not valid_line:
318 |                     raise UnidiffParseError(
319 |                         'Hunk diff line expected: %s' % line)
320 | 
321 |                 line_type = valid_line.group('line_type')
322 |                 if line_type == LINE_TYPE_EMPTY:
323 |                     line_type = LINE_TYPE_CONTEXT
324 | 
325 |                 value = valid_line.group('value')  # type: str
326 |                 original_line = Line(value, line_type=line_type)
327 | 
328 |                 if line_type == LINE_TYPE_ADDED:
329 |                     original_line.target_line_no = target_line_no
330 |                     target_line_no += 1
331 |                 elif line_type == LINE_TYPE_REMOVED:
332 |                     original_line.source_line_no = source_line_no
333 |                     source_line_no += 1
334 |                 elif line_type == LINE_TYPE_CONTEXT:
335 |                     original_line.target_line_no = target_line_no
336 |                     original_line.source_line_no = source_line_no
337 |                     target_line_no += 1
338 |                     source_line_no += 1
339 |                 elif line_type == LINE_TYPE_NO_NEWLINE:
340 |                     pass
341 |                 else:
342 |                     original_line = None
343 | 
344 |             # stop parsing if we got past expected number of lines
345 |             if (source_line_no > expected_source_end or
346 |                     target_line_no > expected_target_end):
347 |                 raise UnidiffParseError('Hunk is longer than expected')
348 | 
349 |             if original_line:
350 |                 original_line.diff_line_no = diff_line_no
351 |                 hunk.append(original_line)
352 | 
353 |             # if hunk source/target lengths are ok, hunk is complete
354 |             if (source_line_no == expected_source_end and
355 |                     target_line_no == expected_target_end):
356 |                 break
357 | 
358 |         # report an error if we haven't got expected number of lines
359 |         if (source_line_no < expected_source_end or
360 |                 target_line_no < expected_target_end):
361 |             raise UnidiffParseError('Hunk is shorter than expected')
362 | 
363 |         if metadata_only:
364 |             # HACK: set fixed calculated values when metadata_only is enabled
365 |             hunk._added = added
366 |             hunk._removed = removed
367 | 
368 |         self.append(hunk)
369 | 
370 |     def _add_no_newline_marker_to_last_hunk(self):
371 |         # type: () -> None
372 |         if not self:
373 |             raise UnidiffParseError(
374 |                 'Unexpected marker:' + LINE_VALUE_NO_NEWLINE)
375 |         last_hunk = self[-1]
376 |         last_hunk.append(
377 |             Line(LINE_VALUE_NO_NEWLINE + '\n', line_type=LINE_TYPE_NO_NEWLINE))
378 | 
379 |     def _append_trailing_empty_line(self):
380 |         # type: () -> None
381 |         if not self:
382 |             raise UnidiffParseError('Unexpected trailing newline character')
383 |         last_hunk = self[-1]
384 |         last_hunk.append(Line('\n', line_type=LINE_TYPE_EMPTY))
385 | 
386 |     @property
387 |     def path(self):
388 |         # type: () -> str
389 |         """Return the file path abstracted from VCS."""
390 |         filepath = self.source_file
391 |         if filepath in (None, DEV_NULL) or (
392 |                 self.is_rename and self.target_file not in (None, DEV_NULL)):
393 |             # if this is a rename, prefer the target filename
394 |             filepath = self.target_file
395 | 
396 |         quoted = filepath.startswith('"') and filepath.endswith('"')
397 |         if quoted:
398 |             filepath = filepath[1:-1]
399 | 
400 |         if filepath.startswith('a/') or filepath.startswith('b/'):
401 |             filepath = filepath[2:]
402 | 
403 |         if quoted:
404 |             filepath = '"{}"'.format(filepath)
405 | 
406 |         return filepath
407 | 
408 |     @property
409 |     def added(self):
410 |         # type: () -> int
411 |         """Return the file total added lines."""
412 |         return sum([hunk.added for hunk in self])
413 | 
414 |     @property
415 |     def removed(self):
416 |         # type: () -> int
417 |         """Return the file total removed lines."""
418 |         return sum([hunk.removed for hunk in self])
419 | 
420 |     @property
421 |     def is_rename(self):
422 |         return (self.source_file != DEV_NULL
423 |             and self.target_file != DEV_NULL
424 |             and self.source_file[2:] != self.target_file[2:])
425 | 
426 |     @property
427 |     def is_added_file(self):
428 |         # type: () -> bool
429 |         """Return True if this patch adds the file."""
430 |         if self.source_file == DEV_NULL:
431 |             return True
432 |         return (len(self) == 1 and self[0].source_start == 0 and
433 |                 self[0].source_length == 0)
434 | 
435 |     @property
436 |     def is_removed_file(self):
437 |         # type: () -> bool
438 |         """Return True if this patch removes the file."""
439 |         if self.target_file == DEV_NULL:
440 |             return True
441 |         return (len(self) == 1 and self[0].target_start == 0 and
442 |                 self[0].target_length == 0)
443 | 
444 |     @property
445 |     def is_modified_file(self):
446 |         # type: () -> bool
447 |         """Return True if this patch modifies the file."""
448 |         return not (self.is_added_file or self.is_removed_file)
449 | 
450 | 
451 | @implements_to_string
452 | class PatchSet(list):
453 |     """A list of PatchedFiles."""
454 | 
455 |     def __init__(self, f, encoding=None, metadata_only=False):
456 |         # type: (Union[StringIO, str], Optional[str], bool) -> None
457 |         super(PatchSet, self).__init__()
458 | 
459 |         # convert string inputs to StringIO objects
460 |         if isinstance(f, basestring):
461 |             f = self._convert_string(f, encoding)  # type: StringIO
462 | 
463 |         # make sure we pass an iterator object to parse
464 |         data = iter(f)
465 |         # if encoding is None, assume we are reading unicode data
466 |         # when metadata_only is True, only perform a minimal metadata parsing
467 |         # (ie. hunks without content) which is around 2.5-6 times faster;
468 |         # it will still validate the diff metadata consistency and get counts
469 |         self._parse(data, encoding=encoding, metadata_only=metadata_only)
470 | 
471 |     def __repr__(self):
472 |         # type: () -> str
473 |         return make_str('<PatchSet: %s>') % super(PatchSet, self).__repr__()
474 | 
475 |     def __str__(self):
476 |         # type: () -> str
477 |         return ''.join(unicode(patched_file) for patched_file in self)
478 | 
479 |     def _parse(self, diff, encoding, metadata_only):
480 |         # type: (StringIO, Optional[str], bool) -> None
481 |         current_file = None
482 |         patch_info = None
483 | 
484 |         diff = enumerate(diff, 1)
485 |         for unused_diff_line_no, line in diff:
486 |             if encoding is not None:
487 |                 line = line.decode(encoding)
488 | 
489 |             # check for a git file rename
490 |             is_diff_git_header = RE_DIFF_GIT_HEADER.match(line) or \
491 |                 RE_DIFF_GIT_HEADER_URI_LIKE.match(line) or \
492 |                 RE_DIFF_GIT_HEADER_NO_PREFIX.match(line)
493 |             if is_diff_git_header:
494 |                 patch_info = PatchInfo()
495 |                 source_file = is_diff_git_header.group('source')
496 |                 target_file = is_diff_git_header.group('target')
497 |                 current_file = PatchedFile(
498 |                     patch_info, source_file, target_file, None, None)
499 |                 self.append(current_file)
500 |                 patch_info.append(line)
501 |                 continue
502 | 
503 |             # check for a git new file
504 |             is_diff_git_new_file = RE_DIFF_GIT_NEW_FILE.match(line)
505 |             if is_diff_git_new_file:
506 |                 if current_file is None or patch_info is None:
507 |                     raise UnidiffParseError('Unexpected new file found: %s' % line)
508 |                 current_file.source_file = DEV_NULL
509 |                 patch_info.append(line)
510 |                 continue
511 | 
512 |             # check for a git deleted file
513 |             is_diff_git_deleted_file = RE_DIFF_GIT_DELETED_FILE.match(line)
514 |             if is_diff_git_deleted_file:
515 |                 if current_file is None or patch_info is None:
516 |                     raise UnidiffParseError('Unexpected deleted file found: %s' % line)
517 |                 current_file.target_file = DEV_NULL
518 |                 patch_info.append(line)
519 |                 continue
520 | 
521 |             # check for source file header
522 |             is_source_filename = RE_SOURCE_FILENAME.match(line)
523 |             if is_source_filename:
524 |                 source_file = is_source_filename.group('filename')
525 |                 source_timestamp = is_source_filename.group('timestamp')
526 |                 # reset current file, unless we are processing a rename
527 |                 # (in that case, source files should match)
528 |                 if current_file is not None and not (
529 |                         current_file.source_file == source_file):
530 |                     current_file = None
531 |                 elif current_file is not None:
532 |                     current_file.source_timestamp = source_timestamp
533 |                 continue
534 | 
535 |             # check for target file header
536 |             is_target_filename = RE_TARGET_FILENAME.match(line)
537 |             if is_target_filename:
538 |                 target_file = is_target_filename.group('filename')
539 |                 target_timestamp = is_target_filename.group('timestamp')
540 |                 if current_file is not None and not (current_file.target_file == target_file):
541 |                     raise UnidiffParseError('Target without source: %s' % line)
542 |                 if current_file is None:
543 |                     # add current file to PatchSet
544 |                     current_file = PatchedFile(
545 |                         patch_info, source_file, target_file,
546 |                         source_timestamp, target_timestamp)
547 |                     self.append(current_file)
548 |                     patch_info = None
549 |                 else:
550 |                     current_file.target_timestamp = target_timestamp
551 |                 continue
552 | 
553 |             # check for hunk header
554 |             is_hunk_header = RE_HUNK_HEADER.match(line)
555 |             if is_hunk_header:
556 |                 patch_info = None
557 |                 if current_file is None:
558 |                     raise UnidiffParseError('Unexpected hunk found: %s' % line)
559 |                 current_file._parse_hunk(line, diff, encoding, metadata_only)
560 |                 continue
561 | 
562 |             # check for no newline marker
563 |             is_no_newline = RE_NO_NEWLINE_MARKER.match(line)
564 |             if is_no_newline:
565 |                 if current_file is None:
566 |                     raise UnidiffParseError('Unexpected marker: %s' % line)
567 |                 current_file._add_no_newline_marker_to_last_hunk()
568 |                 continue
569 | 
570 |             # sometimes hunks can be followed by empty lines
571 |             if line == '\n' and current_file is not None:
572 |                 current_file._append_trailing_empty_line()
573 |                 continue
574 | 
575 |             # if nothing has matched above then this line is a patch info
576 |             if patch_info is None:
577 |                 current_file = None
578 |                 patch_info = PatchInfo()
579 | 
580 |             is_binary_diff = RE_BINARY_DIFF.match(line)
581 |             if is_binary_diff:
582 |                 source_file = is_binary_diff.group('source_filename')
583 |                 target_file = is_binary_diff.group('target_filename')
584 |                 patch_info.append(line)
585 |                 if current_file is not None:
586 |                     current_file.is_binary_file = True
587 |                 else:
588 |                     current_file = PatchedFile(
589 |                         patch_info, source_file, target_file, is_binary_file=True)
590 |                     self.append(current_file)
591 |                 patch_info = None
592 |                 current_file = None
593 |                 continue
594 | 
595 |             if line == 'GIT binary patch\n':
596 |                 current_file.is_binary_file = True
597 |                 patch_info = None
598 |                 current_file = None
599 |                 continue
600 | 
601 |             patch_info.append(line)
602 | 
603 |     @classmethod
604 |     def from_filename(cls, filename, encoding=DEFAULT_ENCODING, errors=None, newline=None):
605 |         # type: (str, str, Optional[str]) -> PatchSet
606 |         """Return a PatchSet instance given a diff filename."""
607 |         with open_file(filename, 'r', encoding=encoding, errors=errors, newline=newline) as f:
608 |             instance = cls(f)
609 |         return instance
610 | 
611 |     @staticmethod
612 |     def _convert_string(data, encoding=None, errors='strict'):
613 |         # type: (Union[str, bytes], str, str) -> StringIO
614 |         if encoding is not None:
615 |             # if encoding is given, assume bytes and decode
616 |             data = unicode(data, encoding=encoding, errors=errors)
617 |         return StringIO(data)
618 | 
619 |     @classmethod
620 |     def from_string(cls, data, encoding=None, errors='strict'):
621 |         # type: (str, str, Optional[str]) -> PatchSet
622 |         """Return a PatchSet instance given a diff string."""
623 |         return cls(cls._convert_string(data, encoding, errors))
624 | 
625 |     @property
626 |     def added_files(self):
627 |         # type: () -> list[PatchedFile]
628 |         """Return patch added files as a list."""
629 |         return [f for f in self if f.is_added_file]
630 | 
631 |     @property
632 |     def removed_files(self):
633 |         # type: () -> list[PatchedFile]
634 |         """Return patch removed files as a list."""
635 |         return [f for f in self if f.is_removed_file]
636 | 
637 |     @property
638 |     def modified_files(self):
639 |         # type: () -> list[PatchedFile]
640 |         """Return patch modified files as a list."""
641 |         return [f for f in self if f.is_modified_file]
642 | 
643 |     @property
644 |     def added(self):
645 |         # type: () -> int
646 |         """Return the patch total added lines."""
647 |         return sum([f.added for f in self])
648 | 
649 |     @property
650 |     def removed(self):
651 |         # type: () -> int
652 |         """Return the patch total removed lines."""
653 |         return sum([f.removed for f in self])
654 | 


--------------------------------------------------------------------------------