├── LICENSE ├── README.md ├── file1.txt ├── file2.txt └── piff.py /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2023 Alexey Kutepov 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Piff 2 | 3 | Simple File Diff Tool in Python. It's very slow (not only because it's written in Python, but also because it uses O(N²) algorithm) and implemented for educational purposes. Don't use it for anything real. 4 | 5 | ## Quick Start 6 | 7 | ```console 8 | $ ./piff.py diff file1.txt file2.txt > file.patch 9 | $ ./piff.py patch file1.txt file.patch 10 | $ diff -u file1.txt file2.txt # verify that file1.txt was actually turned into file2.txt 11 | ``` 12 | 13 | ## Patch Format 14 | 15 | Piff uses custom patch format. Here is its [ABNF](https://en.wikipedia.org/wiki/Augmented_Backus%E2%80%93Naur_form): 16 | 17 | ```c 18 | patch = *(action SP row SP line LF) 19 | action = 'A' / 'R' 20 | row = 1*DIGIT 21 | line = *OCTET 22 | ``` 23 | 24 | - `action` `'A'` means add the `line` after index `row` 25 | - `action` `'R'` means remove the `line` after index `row` 26 | 27 | Here is an example of how it usually looks like: 28 | 29 | ``` 30 | A 4 Duis aute irure in dolor reprehenderit in voluptate velit 31 | R 4 Duis aute irure dolor in reprehenderit in voluptate velit 32 | A 7 asjdklaskldja 33 | R 7 mollit anim id est laborum. 34 | ``` 35 | 36 | ## References 37 | 38 | - https://en.wikipedia.org/wiki/Levenshtein_distance 39 | - https://www.nathaniel.ai/myers-diff/ 40 | -------------------------------------------------------------------------------- /file1.txt: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, consectetur adipiscing elit, 2 | sed do eiusmod tempor incididunt ut labore et dolore magna 3 | aliqua. Ut enim ad minim veniam, quis nostrud exercitation 4 | ullamco laboris nisi ut aliquip ex ea commodo consequat. 5 | Duis aute irure dolor in reprehenderit in voluptate velit 6 | esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat 7 | cupidatat non proident, sunt in culpa qui officia deserunt 8 | mollit anim id est laborum. 9 | -------------------------------------------------------------------------------- /file2.txt: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, consectetur adipiscing elit, 2 | sed do eiusmod tempor incididunt ut labore et dolore magna 3 | aliqua. Ut enim ad minim veniam, quis nostrud exercitation 4 | ullamco laboris nisi ut aliquip ex ea commodo consequat. 5 | Duis aute irure in dolor reprehenderit in voluptate velit 6 | esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat 7 | cupidatat non proident, sunt in culpa qui officia deserunt 8 | asjdklaskldja 9 | -------------------------------------------------------------------------------- /piff.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import re 5 | from typing import TypeVar, List, Sequence, Tuple, Optional 6 | from typing_extensions import Literal 7 | 8 | def read_entire_file(file_path: str) -> str: 9 | with open(file_path) as f: 10 | return f.read() 11 | 12 | Action = Literal['I', 'A', 'R'] 13 | 14 | # TODO: can we get rid of IGNORE? It's not used in the final patches anyway... 15 | IGNORE: Action = 'I' 16 | ADD: Action = 'A' 17 | REMOVE: Action = 'R' 18 | 19 | T = TypeVar("T") 20 | 21 | # TODO: can we make T comparable? 22 | def edit_distance(s1: Sequence[T], s2: Sequence[T]) -> List[Tuple[Action, int, T]]: 23 | m1 = len(s1) 24 | m2 = len(s2) 25 | 26 | distances = [] 27 | actions = [] 28 | for _ in range(m1 + 1): 29 | distances.append([0]*(m2 + 1)) 30 | actions.append(['-']*(m2 + 1)) 31 | 32 | distances[0][0] = 0 33 | actions[0][0] = IGNORE 34 | 35 | for n2 in range(1, m2 + 1): 36 | n1 = 0 37 | distances[n1][n2] = n2 38 | actions[n1][n2] = ADD 39 | 40 | for n1 in range(1, m1 + 1): 41 | n2 = 0 42 | distances[n1][n2] = n1 43 | actions[n1][n2] = REMOVE 44 | 45 | for n1 in range(1, m1 + 1): 46 | for n2 in range(1, m2 + 1): 47 | if s1[n1-1] == s2[n2-1]: 48 | distances[n1][n2] = distances[n1-1][n2-1] 49 | actions[n1][n2] = IGNORE 50 | continue # ignore 51 | 52 | remove = distances[n1-1][n2] 53 | add = distances[n1][n2-1] 54 | 55 | distances[n1][n2] = remove 56 | actions[n1][n2] = REMOVE 57 | 58 | if distances[n1][n2] > add: 59 | distances[n1][n2] = add 60 | actions[n1][n2] = ADD 61 | 62 | distances[n1][n2] += 1 63 | 64 | patch = [] 65 | n1 = m1 66 | n2 = m2 67 | while n1 > 0 or n2 > 0: 68 | action = actions[n1][n2] 69 | if action == ADD: 70 | n2 -= 1 71 | patch.append((ADD, n2, s2[n2])) 72 | elif action == REMOVE: 73 | n1 -= 1 74 | patch.append((REMOVE, n1, s1[n1])) 75 | elif action == IGNORE: 76 | n1 -= 1 77 | n2 -= 1 78 | else: 79 | assert False, "unreachable" 80 | patch.reverse() 81 | return patch 82 | 83 | PATCH_LINE_REGEXP: re.Pattern = re.compile("([AR]) (\d+) (.*)") 84 | 85 | class Subcommand: 86 | name: str 87 | signatures: str 88 | description: str 89 | 90 | def __init__(self, name: str, signature: str, description: str): 91 | self.name = name 92 | self.signature = signature 93 | self.description = description 94 | 95 | def run(self, program: str, args: List[str]) -> int: 96 | assert False, "not implemented" 97 | return 0 98 | 99 | class DiffSubcommand(Subcommand): 100 | def __init__(self): 101 | super().__init__("diff", " ", "print the difference between the files to stdout") 102 | 103 | def run(self, program: str, args: List[str]) -> int: 104 | if len(args) < 2: 105 | print(f"Usage: {program} {self.name} {self.signature}") 106 | print(f"ERROR: not enough files were provided to {self.name}") 107 | return 1 108 | 109 | file_path1, *args = args 110 | file_path2, *args = args 111 | lines1 = read_entire_file(file_path1).splitlines() 112 | lines2 = read_entire_file(file_path2).splitlines() 113 | 114 | patch = edit_distance(lines1, lines2) 115 | 116 | for (action, n, line) in patch: 117 | print(f"{action} {n} {line}") 118 | return 0 119 | 120 | class PatchSubcommand(Subcommand): 121 | def __init__(self): 122 | super().__init__("patch", " ", "patch the file with the given patch") 123 | 124 | def run(self, program: str, args: List[str]) -> int: 125 | if len(args) < 2: 126 | print(f"Usage: {program} {self.name} {self.signature}") 127 | print(f"ERROR: not enough arguments were provided to {self.name} a file") 128 | return 1 129 | 130 | file_path, *args = args 131 | patch_path, *args = args 132 | 133 | lines = read_entire_file(file_path).splitlines() 134 | patch = [] 135 | ok = True 136 | for (row, line) in enumerate(read_entire_file(patch_path).splitlines()): 137 | if len(line) == 0: 138 | continue 139 | m = PATCH_LINE_REGEXP.match(line) 140 | if m is None: 141 | print(f"{patch_path}:{row + 1}: Invalid patch action: {line}") 142 | ok = False 143 | continue 144 | patch.append((m.group(1), int(m.group(2)), m.group(3))) 145 | if not ok: 146 | return 1 147 | 148 | for (action, row, line) in reversed(patch): 149 | if action == ADD: 150 | lines.insert(row, line) 151 | elif action == REMOVE: 152 | lines.pop(row) 153 | else: 154 | assert False, "unreachable" 155 | 156 | with open(file_path, 'w') as f: 157 | for line in lines: 158 | f.write(line) 159 | f.write('\n') 160 | return 0 161 | 162 | class HelpSubcommand(Subcommand): 163 | def __init__(self): 164 | super().__init__("help", "[subcommand]", "print this help message") 165 | 166 | def run(self, program: str, args: List[str]) -> int: 167 | if len(args) == 0: 168 | usage(program) 169 | return 0 170 | 171 | subcmd_name, *args = args 172 | 173 | subcmd = find_subcommand(subcmd_name) 174 | if subcmd is not None: 175 | print(f"Usage: {program} {subcmd.name} {subcmd.signature}") 176 | print(f" {subcmd.description}") 177 | return 0 178 | 179 | usage(program) 180 | print(f"ERROR: unknown subcommand {subcmd_name}") 181 | suggest_closest_subcommand_if_exists(subcmd_name) 182 | return 1 183 | 184 | SUBCOMMANDS: List[Subcommand] = [ 185 | DiffSubcommand(), 186 | PatchSubcommand(), 187 | HelpSubcommand(), 188 | ] 189 | 190 | def usage(program: str) -> None: 191 | print(f"Usage: {program} [OPTIONS]") 192 | print(f"Subcommands:") 193 | width = max([len(f'{subcmd.name} {subcmd.signature}') 194 | for subcmd in SUBCOMMANDS]) 195 | for subcmd in SUBCOMMANDS: 196 | command = f'{subcmd.name} {subcmd.signature}'.ljust(width) 197 | print(f' {command} {subcmd.description}') 198 | 199 | def suggest_closest_subcommand_if_exists(subcmd_name: str) -> None: 200 | candidates = [subcmd.name 201 | for subcmd in SUBCOMMANDS 202 | if len(edit_distance(subcmd_name, subcmd.name)) < 3] 203 | if len(candidates) > 0: 204 | print("Maybe you meant:") 205 | for name in candidates: 206 | print(f" {name}") 207 | 208 | def find_subcommand(subcmd_name: str) -> Optional[Subcommand]: 209 | for subcmd in SUBCOMMANDS: 210 | if subcmd.name == subcmd_name: 211 | return subcmd 212 | return None 213 | 214 | def main() -> int: 215 | assert len(sys.argv) > 0 216 | program, *args = sys.argv 217 | 218 | if len(args) == 0: 219 | usage(program) 220 | print(f"ERROR: no subcommand is provided") 221 | return 1 222 | 223 | subcmd_name, *args = args 224 | 225 | subcmd = find_subcommand(subcmd_name) 226 | if subcmd is not None: 227 | return subcmd.run(program, args) 228 | 229 | usage(program) 230 | print(f"ERROR: unknown subcommand {subcmd_name}") 231 | suggest_closest_subcommand_if_exists(subcmd_name) 232 | return 1 233 | 234 | # TODO: some sort of automatic testing 235 | # TODO: verify the lines of R actions 236 | 237 | if __name__ == '__main__': 238 | exit(main()) 239 | --------------------------------------------------------------------------------