├── .gitignore ├── README.md ├── rebar ├── rebar.config ├── src ├── ddmin.app.src ├── ddmin.erl └── naive_recorder.erl └── test └── record_replay_test.erl /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | /ebin 3 | /.eunit 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Minimizing Delta Debugging Algorithm 2 | ==================================== 3 | 4 | Erlang implementation of the minimizing delta debugging algorithm (ddmin) 5 | as described in [Simplifying and Isolating Failure-Inducing Input](http://www.st.cs.uni-saarland.de/papers/tse2002/tse2002.pdf) (PDF) 6 | from the fine folks of the software engineering chair of [Saarland University](http://www.st.cs.uni-saarland.de/). 7 | 8 | No worries, you don't have to read the paper to understand what's going on. 9 | 10 | What does it do? 11 | ---------------- 12 | 13 | In a nutshell, the delta debugging algorithm is supposed to find the minimal difference 14 | between a passing and a failing test case for a given input. That means having a failing test for a given input, 15 | the ddmin will produce a *minimal* test case to reproduce the error. 16 | Therefore, simplifying the debugging work required to fix the cause of the error. 17 | 18 | 19 | How does it do it? 20 | ------------------ 21 | 22 | The ddmin uses a divide and conquer approach by splitting input data into smaller chunks and checking if 23 | a smaller input reproduces the error the same way as the larger input does. 24 | Ultimately, the ddmin is supposed to find the minimal input to reproduce the error. 25 | 26 | Let's look at an example: 27 | 28 | foo([7|_]) -> throw(expected_error); 29 | foo([_|T]) -> foo(T); 30 | foo([]) -> done. 31 | 32 | This inherently useless function shall serve us for demonstration purposes. For the input `[1,2,3,4,5,6,7,8]` 33 | ddmin proceeds the following way by applying `foo` on different input configurations: 34 | 35 | step | delta | test case | test 36 | ------------------------------------------------ 37 | 1 | 1 | [1, 2, 3, 4] . . . . | pass 38 | 2 | 2 | . . . . [5, 6, 7, 8] | fail 39 | ------------------------------------------------ 40 | 3 | 1 | . . . . [5, 6] . . | pass 41 | 4 | 2 | . . . . . . [7, 8] | fail 42 | ------------------------------------------------ 43 | 5 | 1 | . . . . . . [7] . | fail (minimal input) 44 | ------------------------------------------------ 45 | result | [7] 46 | 47 | 48 | In order to run ddmin you only have to implement a test function. The test function for the `foo` case could look like: 49 | 50 | TestFun = fun(Circumstances) -> 51 | try 52 | foo(Circumstances), 53 | pass 54 | catch _:expected_error -> fail; 55 | _:_ -> unresolved 56 | end 57 | end. 58 | 59 | During execution ddmin applies `TestFun` to each delta seeking the smallest failing input. 60 | Note that the test function must have the following type: 61 | 62 | -type circumstance() :: term(). 63 | -type test() :: fun(([circumstance()] | []) -> pass | fail | unresolved). 64 | 65 | The `unresolved` return value helps to determine cases where an unexpected error occurred. 66 | 67 | Furthermore, ddmin resizes the chunks in case it cannot find a smaller failing test case. 68 | In the worst case almost all combination of chunks will be exercised but in the best case the 69 | overall complexity is that of a binary search. 70 | 71 | Run ddmin like this: 72 | 73 | > ddmin:ddmin(TestFun, [1,2,3,4,5,6,7,8]). 74 | [7] 75 | 76 | How is that different from what QuickCheck/PropEr does? 77 | -------------------------------------------------------- 78 | 79 | It's not that different. The quickcheck approach lets you write generators for input data and 80 | automatically reduces this generated data to find minimal counterexamples that fail the properties 81 | you have defined. For the quickcheck approach you need a clear understanding how to model 82 | the inner workings of what you want to test. 83 | 84 | Delta debugging allows a more exploratory approach. 85 | You can use delta debugging on any input data that you can chunk, including: 86 | 87 | * plain text (e.g. lines, words, characters), 88 | * structured data like HTML/XML tags, YAML, binary formats, 89 | * messages to a certain process collected from a trace to facilitate a record-replay approach. 90 | This approach is show-cased with a simple test (see `test/record_replay_test.erl`). 91 | 92 | It remains up to the user to find a reasonable chunking method for input data. 93 | 94 | Furthermore, a minimal test case can help to improve the quickcheck generators and properties you have defined so far. 95 | 96 | Feedback 97 | -------- 98 | 99 | Let me know if you find this interesting. Feedback is more than welcome. 100 | 101 | -------------------------------------------------------------------------------- /rebar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spawnfest/ddmin/235aca7929e2780d3cc45d4f23bcf5ef803ce525/rebar -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | %% Erlang options 2 | {erl_opts, [debug_info]}. 3 | 4 | %% Test options 5 | {eunit_opts, [verbose, {report,{eunit_surefire,[{dir,"."}]}}]}. 6 | -------------------------------------------------------------------------------- /src/ddmin.app.src: -------------------------------------------------------------------------------- 1 | {application, ddmin, 2 | [ 3 | {description, "Minimizing Delta Debugging Algorithm"}, 4 | {vsn, "0.0.1"}, 5 | {applications, [ 6 | kernel, 7 | stdlib 8 | ]} 9 | ]}. 10 | -------------------------------------------------------------------------------- /src/ddmin.erl: -------------------------------------------------------------------------------- 1 | %% @doc Minimizing Delta Debugging Algorithm 2 | -module(ddmin). 3 | 4 | -export([ddmin/2]). 5 | 6 | %% @doc A cirumstance can be any input data to a function or message to a process. 7 | %% It is up to the user how to cut circumstances, e.g. into text, tuples, numbers, 8 | %% HTML/XML tags, other structured input etc. 9 | -type circumstance() :: term(). 10 | 11 | %% @doc A test function must accept a list of circumstances and returns: 12 | %% fail - with the given (sub)set of circumstances the test reproduces the expected error 13 | %% pass - with the given (sub)set of circumstances does not reproduce the expected error, 14 | %% the test passes 15 | %% unresolved - with the given circumstances the test produced an unexpected error 16 | -type test() :: fun(([circumstance()] | []) -> pass | fail | unresolved). 17 | 18 | %% @doc The ddmin function. Takes a test function and 19 | %% a list of possible inputs for the test function. 20 | -spec ddmin(test(), [circumstance()]) -> [circumstance()]. 21 | ddmin(Test, Circumstances) when is_function(Test, 1), is_list(Circumstances) -> 22 | %% Test function must fulfill the following preconditions 23 | pass = Test([]), 24 | fail = Test(Circumstances), 25 | ddmin(Test, Circumstances, 2). 26 | 27 | ddmin(Test, Circumstances, N) when N =< length(Circumstances), length(Circumstances) >= 2 -> 28 | %% split given circumstances into subsets and check if maybe a smaller subset fails as well 29 | Subsets = split(Circumstances, length(Circumstances) div N), 30 | %% pick smallest subset 31 | lists:min([ddmin(Test, Subset, Circumstances, N) || Subset <- Subsets]); 32 | ddmin(_, Circumstances, _) -> 33 | Circumstances. 34 | 35 | ddmin(Test, Subset, Circumstances, N) -> 36 | Complement = lists:subtract(Circumstances, Subset), 37 | case {Test(Subset), Test(Complement)} of 38 | {fail, _} -> ddmin(Test, Subset, 2); 39 | {_, fail} -> ddmin(Test, Complement, max(N-1, 2)); 40 | _ when N < length(Circumstances) -> ddmin(Test, Circumstances, min(length(Circumstances), 2*N)); 41 | _ -> Circumstances 42 | end. 43 | 44 | split([], _Len) -> 45 | []; 46 | split(Circumstances, Len) when Len =< length(Circumstances) -> 47 | {Subset, Rest} = lists:split(Len, Circumstances), 48 | [Subset | split(Rest, Len)]; 49 | split(Rest, _Len) -> 50 | [Rest]. 51 | 52 | 53 | -ifdef(TEST). 54 | -include_lib("eunit/include/eunit.hrl"). 55 | 56 | split_test() -> 57 | ?assertEqual([[1,2,3,4,5],[6,7,8,9,10]], split(lists:seq(1,10), 5)), 58 | ?assertEqual([[1,2,3],[4,5,6],[7,8,9],[10,11]], split(lists:seq(1,11), 3)). 59 | 60 | 61 | foo([6,7|_]) -> throw(expected_error); 62 | foo([_|T]) -> foo(T); 63 | foo([]) -> done. 64 | 65 | foo_test() -> 66 | Test = 67 | fun(Circumstances) -> 68 | try 69 | foo(Circumstances), 70 | pass 71 | catch _:expected_error -> fail; 72 | _:_ -> unresolved 73 | end 74 | end, 75 | % test checks if ddmin can find the minimum input for foo to reproduce the expected error 76 | ?assertEqual([6,7], ddmin(Test, [1,2,3,4,5,6,7,8,9,10])). 77 | 78 | 79 | bar([2|_]) -> throw(expected_error); 80 | bar([_|T]) -> bar(T); 81 | bar([]) -> done. 82 | 83 | bar_test() -> 84 | Test = 85 | fun(Circumstances) -> 86 | try 87 | bar(Circumstances), 88 | pass 89 | catch _:expected_error -> fail; 90 | _:_ -> unresolved 91 | end 92 | end, 93 | % variation of foo_test 94 | ?assertEqual([2], ddmin(Test, [1,2,3,4,5,6,7,8,9,10,11])). 95 | 96 | 97 | foo_loop(N) when N < 0 -> 98 | error(expected_error); 99 | foo_loop(N) -> 100 | receive 101 | yay -> foo_loop(N+1); 102 | nay -> foo_loop(N-1) 103 | after 200 -> done 104 | end. 105 | 106 | foo_loop_test() -> 107 | Test = 108 | fun(Circumstances) -> 109 | %% setup process under test 110 | Pid = spawn(fun() -> foo_loop(0) end), 111 | erlang:monitor(process, Pid), 112 | [Pid ! Circumstance || Circumstance <- Circumstances], 113 | receive %% wait for expected error to happen 114 | {'DOWN', _, _, _, {expected_error, _}} -> fail; 115 | {'DOWN', _, _, _, normal} -> pass; 116 | _ -> unresolved 117 | end 118 | end, 119 | % this test checks for the smallest input sequence of messages to produce the expected error 120 | ?assertEqual([nay], ddmin(Test, [yay,yay,nay,yay,nay,nay,yay,nay,nay])). 121 | 122 | 123 | -endif. 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /src/naive_recorder.erl: -------------------------------------------------------------------------------- 1 | %% @doc Naive recorder of messages to a process using dbg tracer. 2 | %% Records all received messages to a pid and 3 | %% stores them ordered by arrivial in an ETS. 4 | -module(naive_recorder). 5 | 6 | -export([ record/1 7 | , pause/1 8 | , get_recorded_messages/1 9 | , stop/1 10 | ]). 11 | 12 | -record(recorder, {observe_pid, table_id}). 13 | 14 | -spec record(pid()) -> #recorder{}. 15 | record(Pid) when is_pid(Pid) -> 16 | dbg:stop_clear(), 17 | Tid = ets:new(?MODULE, [ordered_set, public]), 18 | dbg:tracer(process, { 19 | fun({trace_ts,_,'receive', ReceivedMsg, TS}, Tab) -> 20 | ets:insert(Tab, {TS, ReceivedMsg}), 21 | Tab; 22 | (_, Tab) -> Tab 23 | end, 24 | Tid}), 25 | {ok, [{matched, _, 1}]} = dbg:p(Pid, [r, timestamp]), 26 | #recorder{observe_pid=Pid, table_id=Tid}. 27 | 28 | -spec pause(#recorder{}) -> ok. 29 | pause(#recorder{}) -> 30 | dbg:stop(). 31 | 32 | -spec stop(#recorder{}) -> ok. 33 | stop(#recorder{table_id=Tid}) -> 34 | ets:delete(Tid), 35 | dbg:stop_clear(). 36 | 37 | -spec get_recorded_messages(#recorder{}) -> [term()] | []. 38 | get_recorded_messages(#recorder{table_id=Tid}) -> 39 | ets:foldr(fun({_TS, Msg}, Acc) -> [Msg | Acc] end, [], Tid). 40 | 41 | -ifdef(TEST). 42 | -include_lib("eunit/include/eunit.hrl"). 43 | 44 | foo_loop() -> 45 | receive 46 | {hello,_} -> foo_loop(); 47 | bye -> ok; 48 | _ -> throw(exepected_error) 49 | end. 50 | 51 | simple_test() -> 52 | Pid = spawn(fun foo_loop/0), 53 | Rec = naive_recorder:record(Pid), 54 | Pid ! {hello,2}, 55 | Pid ! {hello,1}, 56 | Pid ! {hello,3}, 57 | Pid ! bye, 58 | naive_recorder:pause(Rec), 59 | ?assertEqual([{hello,2}, {hello,1}, {hello,3}, bye], naive_recorder:get_recorded_messages(Rec)), 60 | naive_recorder:stop(Rec), 61 | ok. 62 | 63 | -endif. 64 | -------------------------------------------------------------------------------- /test/record_replay_test.erl: -------------------------------------------------------------------------------- 1 | -module(record_replay_test). 2 | 3 | -include_lib("eunit/include/eunit.hrl"). 4 | 5 | foo_loop() -> 6 | receive 7 | {hello,_} -> foo_loop(); 8 | _ -> error(expected_error) 9 | after 100 -> ok 10 | end. 11 | 12 | setup_fun() -> 13 | spawn(fun() -> foo_loop() end). 14 | 15 | test_fun(Messages) when is_list(Messages) -> 16 | Pid = setup_fun(), 17 | erlang:monitor(process, Pid), 18 | [Pid ! Msg || Msg <- Messages], 19 | receive %% wait for expected error to happen 20 | {'DOWN', _, _, _, {expected_error, _}} -> fail; 21 | {'DOWN', _, _, _, normal} -> pass; 22 | _ -> unresolved 23 | end. 24 | 25 | record_replay_test() -> 26 | Pid = setup_fun(), 27 | Recorder = naive_recorder:record(Pid), 28 | Pid ! {hello, 1}, 29 | Pid ! {hello, 2}, 30 | Pid ! {hello, 3}, 31 | Pid ! {hello, 4}, 32 | Pid ! {hello, 5}, 33 | Pid ! boom, 34 | naive_recorder:pause(Recorder), 35 | ?assertEqual([boom], ddmin:ddmin(fun test_fun/1, naive_recorder:get_recorded_messages(Recorder))), 36 | naive_recorder:stop(Recorder). 37 | --------------------------------------------------------------------------------