├── README.md ├── ddmin.py └── test.sh /README.md: -------------------------------------------------------------------------------- 1 | # ddmin: Delta Debugging Minimizer 2 | 3 | ## Overview 4 | ddmin is a Python library and command line tool designed for minimizing a data set while still retaining a specific property, typically for the purpose of debugging. It employs the technique of delta debugging, a systematic approach to isolate the minimal cause of a bug in complex input data. This library is especially useful for developers and testers who work on debugging software and need to identify the minimal test case that reproduces a bug. 5 | 6 | ## Requirements 7 | - Python 3.x 8 | 9 | ## Installation 10 | Clone the repository or download the source code. No external dependencies are required. 11 | 12 | ## Usage 13 | 14 | ### As a Library 15 | Import `delta_debug` from ddmin and use it in your Python scripts: 16 | 17 | ```python 18 | from ddmin import delta_debug 19 | 20 | # Define your 'interesting_test' function 21 | def interesting_test(input_data): 22 | for line in lines: 23 | if "bug" in line: 24 | return True 25 | return False 26 | minimized_data = delta_debug(interesting_test, ["a", "b", "bug", "c", "bug", "bug"]) 27 | # minimized data is now a single bug causing input: ["bug"] 28 | ``` 29 | 30 | ### Command-Line Tool 31 | ddmin can be used as a command-line tool to minimize files: 32 | 33 | ```bash 34 | python ddmin.py --interesting [path_to_interesting_test_script] --to-minimize [path_to_file_to_minimize] [--bytes] 35 | ``` 36 | 37 | - `--interesting`: Path to the script that returns exit code 0 if the current state of the file is interesting. 38 | - `--to-minimize`: Path to the file that you want to minimize. The file will be modified in place. 39 | - `--bytes`: Optional flag to minimize by bytes instead of lines. 40 | 41 | ### Example 42 | To minimize a text file `example.txt` using a test script `test_script.py`: 43 | 44 | ```bash 45 | python ddmin.py --interesting test_script.py --to-minimize example.txt 46 | ``` 47 | 48 | ## Contributing 49 | Contributions to ddmin are welcome! Please read the contributing guidelines before submitting pull requests. 50 | 51 | -------------------------------------------------------------------------------- /ddmin.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, List, TypeVar, Any 2 | 3 | # Define a generic type for the elements of the interesting_input list 4 | T = TypeVar("T") 5 | 6 | 7 | def delta_debug( 8 | interesting_test: Callable[[List[T]], bool], 9 | interesting_input: List[T], 10 | granularity: int = 2, 11 | ) -> List[T]: 12 | assert granularity >= 2, "granularity must be at least 2" 13 | assert isinstance(interesting_input, list), "interesting_input must be a list." 14 | assert interesting_test( 15 | interesting_input 16 | ), "The initial interesting_input must pass the 'interesting_test'." 17 | 18 | # From now on we represent each input item as a sorted list of their indices. 19 | interesting_indicies = [i for i in range(len(interesting_input))] 20 | 21 | def reconstruct_from_indicies(indices: List[int]) -> List[T]: 22 | return [interesting_input[i] for i in indices] 23 | 24 | def _interesting_test(to_check: List[int]) -> bool: 25 | return interesting_test(reconstruct_from_indicies(to_check)) 26 | 27 | while len(interesting_indicies) > 1: 28 | chunk_size = (len(interesting_indicies) + granularity - 1) // granularity 29 | subsets = [ 30 | interesting_indicies[i : i + chunk_size] 31 | for i in range(0, len(interesting_indicies), chunk_size) 32 | ] 33 | temp_interesting_indicies = interesting_indicies 34 | some_subset_is_interesting = False 35 | 36 | for subset in subsets: 37 | if _interesting_test(subset): 38 | temp_interesting_indicies = subset 39 | some_subset_is_interesting = True 40 | break 41 | 42 | if not some_subset_is_interesting: 43 | for subset in subsets: 44 | complement = sorted(set(interesting_indicies) - set(subset)) 45 | if _interesting_test(complement): 46 | temp_interesting_indicies = complement 47 | some_subset_is_interesting = True 48 | break 49 | 50 | if some_subset_is_interesting: 51 | interesting_indicies = temp_interesting_indicies 52 | granularity = max(2, granularity - 1) 53 | else: 54 | if granularity == len(interesting_indicies): 55 | break 56 | granularity = min(len(interesting_indicies), granularity * 2) 57 | 58 | return reconstruct_from_indicies(interesting_indicies) 59 | 60 | 61 | if __name__ == "__main__": 62 | import argparse, subprocess, shutil, os 63 | 64 | parser = argparse.ArgumentParser( 65 | description="Minimize a file using delta debugging." 66 | ) 67 | parser.add_argument( 68 | "--interesting", required=True, help="Path to the interesting test script." 69 | ) 70 | parser.add_argument( 71 | "--to-minimize", required=True, help="Path to the file to minimize (it is minimized in destructively in place)." 72 | ) 73 | parser.add_argument( 74 | "--granularity", type=int, default=2, help="Starting granularity (typically not needed, more means finer grained reductions)." 75 | ) 76 | parser.add_argument( 77 | "-b", "--bytes", action="store_true", help="Minimize by bytes instead of lines." 78 | ) 79 | args = parser.parse_args() 80 | 81 | def write_to_minimize(data: List[Any]) -> None: 82 | with open(args.to_minimize, "wb") as f: 83 | if args.bytes: 84 | f.write(bytes(data)) 85 | else: 86 | for line in data: 87 | f.write(line) 88 | 89 | def is_interesting(to_check: List[Any]) -> bool: 90 | write_to_minimize(to_check) 91 | result = subprocess.run([args.interesting]) 92 | interesting = result.returncode == 0 93 | if interesting: 94 | shutil.copy( 95 | args.to_minimize, args.to_minimize + ".ddmin_most_interesting_so_far" 96 | ) 97 | return interesting 98 | 99 | # Read the file as bytes 100 | with open(args.to_minimize, "rb") as f: 101 | input_bytes = f.read() 102 | 103 | # Split into lines if not in byte mode, otherwise treat as individual bytes 104 | input_data: List[Any] 105 | if args.bytes: 106 | input_data = list(input_bytes) 107 | else: 108 | input_data = input_bytes.splitlines(keepends=True) 109 | 110 | minimized = delta_debug( 111 | is_interesting, 112 | input_data, 113 | granularity=args.granularity, 114 | ) 115 | write_to_minimize(minimized) 116 | os.remove(args.to_minimize + ".ddmin_most_interesting_so_far") 117 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -eu 4 | 5 | ddmin_script="./ddmin.py" 6 | test_input="./test_input.txt" 7 | test_script="./test_interesting.sh" 8 | 9 | 10 | if ! test -f "$ddmin_script" 11 | then 12 | echo "please run from the directory containing $ddmin_script." 13 | exit 1 14 | fi 15 | 16 | setup() { 17 | echo "This is a test file." > "$test_input" 18 | echo "It contains several lines." >> "$test_input" 19 | echo "One of these lines is interesting." >> "$test_input" 20 | echo "Others are not." >> "$test_input" 21 | echo '#!/bin/bash' > "$test_script" 22 | echo "grep -q 'interesting' \"$test_input\"" >> "$test_script" 23 | chmod +x "$test_script" 24 | } 25 | 26 | cleanup() { 27 | rm "$test_input" "$test_script" 28 | } 29 | 30 | test_line_wise() { 31 | setup 32 | python3 "$ddmin_script" --interesting "$test_script" --to-minimize "$test_input" 33 | if test "$(cat $test_input)" = "One of these lines is interesting."; then 34 | echo "Line-wise minimization test passed." 35 | else 36 | echo "Line-wise minimization test failed." 37 | fi 38 | cleanup 39 | } 40 | 41 | test_byte_wise() { 42 | setup 43 | python3 "$ddmin_script" -b --interesting "$test_script" --to-minimize "$test_input" 44 | if test "$(cat $test_input)" = "interesting"; then 45 | echo "Byte-wise minimization test passed." 46 | else 47 | echo "Byte-wise minimization test failed." 48 | fi 49 | cleanup 50 | } 51 | 52 | test_line_wise 53 | test_byte_wise 54 | --------------------------------------------------------------------------------