├── README.md ├── LICENSE ├── lcs.py └── diff.py /README.md: -------------------------------------------------------------------------------- 1 | # diff 2 | 3 | Simple diff utility written in Python. It's based on a trivial solution of the 4 | Longest Common Subsequence problem. 5 | 6 | Launch it via `diff.py` script. 7 | 8 | ``` 9 | $ python3 diff.py f1 f2 10 | + """Simple diff based on LCS solution""" 11 | + 12 | + import sys 13 | from lcs import lcslen 14 | 15 | def print_diff(c, x, y, i, j): 16 | + """Print the diff using LCS length matrix by backtracking it""" 17 | + 18 | if i >= 0 and j >= 0 and x[i] == y[j]: 19 | print_diff(c, x, y, i-1, j-1) 20 | print(" " + x[i]) 21 | elif j >= 0 and (i == 0 or c[i][j-1] >= c[i-1][j]): 22 | print_diff(c, x, y, i, j-1) 23 | - print("+ " + y[j]) 24 | + print("+ " + y[j]) 25 | elif i >= 0 and (j == 0 or c[i][j-1] < c[i-1][j]): 26 | print_diff(c, x, y, i-1, j) 27 | print("- " + x[i]) 28 | else: 29 | - print("") 30 | - 31 | + print("") # pass? 32 | ``` 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Alex Dzyoba Corp 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /lcs.py: -------------------------------------------------------------------------------- 1 | """Longest common subsequence module""" 2 | 3 | def lcslen(x, y): 4 | """Build a matrix of LCS length. 5 | 6 | This matrix will be used later to backtrack the real LCS. 7 | """ 8 | 9 | # This is our matrix comprised of list of lists. 10 | # We allocate extra row and column with zeroes for the base case of empty 11 | # sequence. Extra row and column is appended to the end and exploit 12 | # Python's ability of negative indices: x[-1] is the last elem. 13 | c = [[0 for _ in range(len(y) + 1)] for _ in range(len(x) + 1)] 14 | 15 | for i, xi in enumerate(x): 16 | for j, yj in enumerate(y): 17 | if xi == yj: 18 | c[i][j] = 1 + c[i-1][j-1] 19 | else: 20 | c[i][j] = max(c[i][j-1], c[i-1][j]) 21 | return c 22 | 23 | def backtrack(c, x, y, i, j): 24 | """Backtrack the LCS length matrix to get the actual LCS""" 25 | if i == -1 or j == -1: 26 | return "" 27 | elif x[i] == y[j]: 28 | return backtrack(c, x, y, i-1, j-1) + x[i] 29 | elif c[i][j-1] >= c[i-1][j]: 30 | return backtrack(c, x, y, i, j-1) 31 | elif c[i][j-1] < c[i-1][j]: 32 | return backtrack(c, x, y, i-1, j) 33 | 34 | def lcs(x, y): 35 | """Get the longest common subsequence of x and y""" 36 | c = lcslen(x, y) 37 | return backtrack(c, x, y, len(x)-1, len(y)-1) 38 | -------------------------------------------------------------------------------- /diff.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """Simple diff based on LCS solution""" 4 | 5 | import sys 6 | from functools import partial 7 | 8 | from lcs import lcslen 9 | 10 | # Print without newline because input files already have it 11 | _print = partial(print, end='') 12 | 13 | def print_diff(c, x, y, i, j): 14 | """Print the diff using LCS length matrix by backtracking it""" 15 | 16 | if i < 0 and j < 0: 17 | return "" 18 | elif i < 0: 19 | print_diff(c, x, y, i, j-1) 20 | _print("+ " + y[j]) 21 | elif j < 0: 22 | print_diff(c, x, y, i-1, j) 23 | _print("- " + x[i]) 24 | elif x[i] == y[j]: 25 | print_diff(c, x, y, i-1, j-1) 26 | _print(" " + x[i]) 27 | elif c[i][j-1] >= c[i-1][j]: 28 | print_diff(c, x, y, i, j-1) 29 | _print("+ " + y[j]) 30 | elif c[i][j-1] < c[i-1][j]: 31 | print_diff(c, x, y, i-1, j) 32 | _print("- " + x[i]) 33 | 34 | def diff(x, y): 35 | c = lcslen(x, y) 36 | return print_diff(c, x, y, len(x)-1, len(y)-1) 37 | 38 | def usage(): 39 | print("Usage: {} ".format(sys.argv[0])) 40 | 41 | def main(): 42 | if len(sys.argv) != 3: 43 | usage() 44 | sys.exit(1) 45 | 46 | with open(sys.argv[1], 'r') as f1, open(sys.argv[2], 'r') as f2: 47 | diff(f1.readlines(), f2.readlines()) 48 | 49 | if __name__ == '__main__': 50 | main() 51 | --------------------------------------------------------------------------------