├── .gitignore
├── README.md
├── rebar
├── rebar.config
├── src
    ├── ddmin.app.src
    ├── ddmin.erl
    └── naive_recorder.erl
└── test
    └── record_replay_test.erl


/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | /ebin
3 | /.eunit
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Minimizing Delta Debugging Algorithm
  2 | ====================================
  3 | 
  4 | Erlang implementation of the minimizing delta debugging algorithm (ddmin) 
  5 | as described in [Simplifying and Isolating Failure-Inducing Input](http://www.st.cs.uni-saarland.de/papers/tse2002/tse2002.pdf) (PDF)
  6 | from the fine folks of the software engineering chair of [Saarland University](http://www.st.cs.uni-saarland.de/).
  7 | 
  8 | No worries, you don't have to read the paper to understand what's going on.
  9 | 
 10 | What does it do?
 11 | ----------------
 12 | 
 13 | In a nutshell, the delta debugging algorithm is supposed to find the minimal difference 
 14 | between a passing and a failing test case for a given input. That means having a failing test for a given input,
 15 | the ddmin will produce a *minimal* test case to reproduce the error. 
 16 | Therefore, simplifying the debugging work required to fix the cause of the error. 
 17 | 
 18 | 
 19 | How does it do it?
 20 | ------------------
 21 | 
 22 | The ddmin uses a divide and conquer approach by splitting input data into smaller chunks and checking if 
 23 | a smaller input reproduces the error the same way as the larger input does. 
 24 | Ultimately, the ddmin is supposed to find the minimal input to reproduce the error.
 25 | 
 26 | Let's look at an example:
 27 | 
 28 |     foo([7|_]) -> throw(expected_error);
 29 |     foo([_|T]) -> foo(T);
 30 |     foo([]) -> done.
 31 | 
 32 | This inherently useless function shall serve us for demonstration purposes. For the input `[1,2,3,4,5,6,7,8]` 
 33 | ddmin proceeds the following way by applying `foo` on different input configurations:
 34 | 
 35 |       step | delta | test case                | test
 36 |     ------------------------------------------------
 37 |        1   |   1   | [1, 2, 3, 4] .  .  .  .  | pass
 38 |        2   |   2   |  .  .  .  . [5, 6, 7, 8] | fail
 39 |     ------------------------------------------------
 40 |        3   |   1   |  .  .  .  . [5, 6] .  .  | pass
 41 |        4   |   2   |  .  .  .  .  .  . [7, 8] | fail
 42 |     ------------------------------------------------
 43 |        5   |   1   |  .  .  .  .  .  . [7] .  | fail  (minimal input)
 44 |     ------------------------------------------------
 45 |     result |                           [7] 
 46 | 
 47 | 
 48 | In order to run ddmin you only have to implement a test function. The test function for the `foo` case could look like:
 49 | 
 50 |     TestFun = fun(Circumstances) ->
 51 |                 try
 52 |                   foo(Circumstances),
 53 |                   pass
 54 |                 catch _:expected_error -> fail;
 55 |                       _:_ -> unresolved
 56 |                 end
 57 |               end.
 58 | 
 59 | During execution ddmin applies `TestFun` to each delta seeking the smallest failing input. 
 60 | Note that the test function must have the following type:
 61 | 
 62 |     -type circumstance() :: term().
 63 |     -type test() :: fun(([circumstance()] | []) -> pass | fail | unresolved).
 64 | 
 65 | The `unresolved` return value helps to determine cases where an unexpected error occurred.
 66 | 
 67 | Furthermore, ddmin resizes the chunks in case it cannot find a smaller failing test case.
 68 | In the worst case almost all combination of chunks will be exercised but in the best case the 
 69 | overall complexity is that of a binary search.
 70 | 
 71 | Run ddmin like this:
 72 | 
 73 |     > ddmin:ddmin(TestFun, [1,2,3,4,5,6,7,8]).
 74 |     [7]
 75 | 
 76 | How is that different from what QuickCheck/PropEr does?
 77 | --------------------------------------------------------
 78 | 
 79 | It's not that different. The quickcheck approach lets you write generators for input data and 
 80 | automatically reduces this generated data to find minimal counterexamples that fail the properties
 81 | you have defined. For the quickcheck approach you need a clear understanding how to model 
 82 | the inner workings of what you want to test. 
 83 | 
 84 | Delta debugging allows a more exploratory approach. 
 85 | You can use delta debugging on any input data that you can chunk, including: 
 86 | 
 87 | * plain text (e.g. lines, words, characters),
 88 | * structured data like HTML/XML tags, YAML, binary formats,
 89 | * messages to a certain process collected from a trace to facilitate a record-replay approach. 
 90 |   This approach is show-cased with a simple test (see `test/record_replay_test.erl`).
 91 | 
 92 | It remains up to the user to find a reasonable chunking method for input data.
 93 | 
 94 | Furthermore, a minimal test case can help to improve the quickcheck generators and properties you have defined so far.
 95 | 
 96 | Feedback
 97 | --------
 98 | 
 99 | Let me know if you find this interesting. Feedback is more than welcome.
100 | 
101 | 


--------------------------------------------------------------------------------
/rebar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/spawnfest/ddmin/235aca7929e2780d3cc45d4f23bcf5ef803ce525/rebar


--------------------------------------------------------------------------------
/rebar.config:
--------------------------------------------------------------------------------
1 | %% Erlang options
2 | {erl_opts, [debug_info]}.
3 | 
4 | %% Test options
5 | {eunit_opts, [verbose, {report,{eunit_surefire,[{dir,"."}]}}]}.
6 | 


--------------------------------------------------------------------------------
/src/ddmin.app.src:
--------------------------------------------------------------------------------
 1 | {application, ddmin,
 2 |  [
 3 |   {description, "Minimizing Delta Debugging Algorithm"},
 4 |   {vsn, "0.0.1"},
 5 |   {applications, [
 6 |                   kernel,
 7 |                   stdlib
 8 |                  ]}
 9 |  ]}.
10 | 


--------------------------------------------------------------------------------
/src/ddmin.erl:
--------------------------------------------------------------------------------
  1 | %% @doc Minimizing Delta Debugging Algorithm
  2 | -module(ddmin).
  3 | 
  4 | -export([ddmin/2]).
  5 | 
  6 | %% @doc A cirumstance can be any input data to a function or message to a process.
  7 | %%      It is up to the user how to cut circumstances, e.g. into text, tuples, numbers,
  8 | %%      HTML/XML tags, other structured input etc.
  9 | -type circumstance() :: term().
 10 | 
 11 | %% @doc A test function must accept a list of circumstances and returns:
 12 | %%      fail - with the given (sub)set of circumstances the test reproduces the expected error
 13 | %%      pass - with the given (sub)set of circumstances does not reproduce the expected error,
 14 | %%             the test passes
 15 | %%      unresolved - with the given circumstances the test produced an unexpected error 
 16 | -type test() :: fun(([circumstance()] | []) -> pass | fail | unresolved).
 17 | 
 18 | %% @doc The ddmin function. Takes a test function and 
 19 | %%      a list of possible inputs for the test function.
 20 | -spec ddmin(test(), [circumstance()]) -> [circumstance()].
 21 | ddmin(Test, Circumstances) when is_function(Test, 1), is_list(Circumstances) ->
 22 |   %% Test function must fulfill the following preconditions
 23 |   pass = Test([]),
 24 |   fail = Test(Circumstances),  
 25 |   ddmin(Test, Circumstances, 2).
 26 | 
 27 | ddmin(Test, Circumstances, N) when N =< length(Circumstances), length(Circumstances) >= 2 ->
 28 |   %% split given circumstances into subsets and check if maybe a smaller subset fails as well
 29 |   Subsets = split(Circumstances, length(Circumstances) div N),
 30 |   %% pick smallest subset
 31 |   lists:min([ddmin(Test, Subset, Circumstances, N) || Subset <- Subsets]);
 32 | ddmin(_, Circumstances, _) ->
 33 |   Circumstances.
 34 | 
 35 | ddmin(Test, Subset, Circumstances, N) ->
 36 |   Complement = lists:subtract(Circumstances, Subset),
 37 |   case {Test(Subset), Test(Complement)} of
 38 |     {fail, _} -> ddmin(Test, Subset, 2);
 39 |     {_, fail} -> ddmin(Test, Complement, max(N-1, 2));
 40 |     _ when N < length(Circumstances) -> ddmin(Test, Circumstances, min(length(Circumstances), 2*N));
 41 |     _ -> Circumstances
 42 |   end.
 43 | 
 44 | split([], _Len) ->
 45 |   [];
 46 | split(Circumstances, Len) when Len =< length(Circumstances) ->
 47 |   {Subset, Rest} = lists:split(Len, Circumstances),
 48 |   [Subset | split(Rest, Len)];
 49 | split(Rest, _Len) ->
 50 |   [Rest].
 51 | 
 52 | 
 53 | -ifdef(TEST).
 54 | -include_lib("eunit/include/eunit.hrl").
 55 | 
 56 | split_test() ->
 57 |   ?assertEqual([[1,2,3,4,5],[6,7,8,9,10]], split(lists:seq(1,10), 5)),
 58 |   ?assertEqual([[1,2,3],[4,5,6],[7,8,9],[10,11]], split(lists:seq(1,11), 3)).
 59 | 
 60 | 
 61 | foo([6,7|_]) -> throw(expected_error);
 62 | foo([_|T]) -> foo(T);
 63 | foo([]) -> done.
 64 | 
 65 | foo_test() ->
 66 |   Test = 
 67 |     fun(Circumstances) ->
 68 |       try 
 69 |         foo(Circumstances),
 70 |         pass
 71 |       catch _:expected_error -> fail;
 72 |             _:_ -> unresolved
 73 |       end
 74 |     end,
 75 |   % test checks if ddmin can find the minimum input for foo to reproduce the expected error
 76 |   ?assertEqual([6,7], ddmin(Test, [1,2,3,4,5,6,7,8,9,10])).
 77 | 
 78 | 
 79 | bar([2|_]) -> throw(expected_error);
 80 | bar([_|T]) -> bar(T);
 81 | bar([]) -> done.
 82 | 
 83 | bar_test() ->
 84 |   Test = 
 85 |     fun(Circumstances) ->
 86 |       try 
 87 |         bar(Circumstances),
 88 |         pass
 89 |       catch _:expected_error -> fail;
 90 |             _:_ -> unresolved
 91 |       end
 92 |     end,
 93 |   % variation of foo_test 
 94 |   ?assertEqual([2], ddmin(Test, [1,2,3,4,5,6,7,8,9,10,11])).
 95 | 
 96 | 
 97 | foo_loop(N) when N < 0 -> 
 98 |   error(expected_error);
 99 | foo_loop(N) ->
100 |   receive 
101 |     yay -> foo_loop(N+1);
102 |     nay -> foo_loop(N-1)
103 |   after 200 -> done
104 |   end.
105 | 
106 | foo_loop_test() ->
107 |   Test = 
108 |     fun(Circumstances) ->
109 |         %% setup process under test
110 |         Pid = spawn(fun() -> foo_loop(0) end),
111 |         erlang:monitor(process, Pid),
112 |         [Pid ! Circumstance || Circumstance <- Circumstances],
113 |         receive    %% wait for expected error to happen
114 |           {'DOWN', _, _, _, {expected_error, _}} -> fail;
115 |           {'DOWN', _, _, _, normal} -> pass;
116 |           _ -> unresolved
117 |         end
118 |     end,
119 |   % this test checks for the smallest input sequence of messages to produce the expected error
120 |   ?assertEqual([nay], ddmin(Test, [yay,yay,nay,yay,nay,nay,yay,nay,nay])).
121 | 
122 | 
123 | -endif.
124 | 
125 | 
126 | 
127 | 


--------------------------------------------------------------------------------
/src/naive_recorder.erl:
--------------------------------------------------------------------------------
 1 | %% @doc Naive recorder of messages to a process using dbg tracer.
 2 | %%      Records all received messages to a pid and 
 3 | %%      stores them ordered by arrivial in an ETS.
 4 | -module(naive_recorder).
 5 | 
 6 | -export([ record/1
 7 |         , pause/1
 8 |         , get_recorded_messages/1
 9 |         , stop/1
10 |         ]).
11 | 
12 | -record(recorder, {observe_pid, table_id}).
13 | 
14 | -spec record(pid()) -> #recorder{}.
15 | record(Pid) when is_pid(Pid) ->
16 |   dbg:stop_clear(),
17 |   Tid = ets:new(?MODULE, [ordered_set, public]),
18 |   dbg:tracer(process, {
19 |       fun({trace_ts,_,'receive', ReceivedMsg, TS}, Tab) ->
20 |           ets:insert(Tab, {TS, ReceivedMsg}),
21 |           Tab;
22 |          (_, Tab) -> Tab
23 |       end,
24 |       Tid}),
25 |   {ok, [{matched, _, 1}]} = dbg:p(Pid, [r, timestamp]),
26 |   #recorder{observe_pid=Pid, table_id=Tid}.
27 | 
28 | -spec pause(#recorder{}) -> ok.
29 | pause(#recorder{}) ->
30 |   dbg:stop().
31 | 
32 | -spec stop(#recorder{}) -> ok.
33 | stop(#recorder{table_id=Tid}) ->
34 |   ets:delete(Tid),
35 |   dbg:stop_clear().
36 | 
37 | -spec get_recorded_messages(#recorder{}) -> [term()] | [].
38 | get_recorded_messages(#recorder{table_id=Tid}) ->
39 |   ets:foldr(fun({_TS, Msg}, Acc) -> [Msg | Acc] end, [], Tid).
40 | 
41 | -ifdef(TEST).
42 | -include_lib("eunit/include/eunit.hrl").
43 | 
44 | foo_loop() ->
45 |   receive 
46 |     {hello,_} -> foo_loop();
47 |     bye -> ok;
48 |     _ -> throw(exepected_error)
49 |   end.
50 | 
51 | simple_test() ->
52 |   Pid = spawn(fun foo_loop/0),
53 |   Rec = naive_recorder:record(Pid),
54 |   Pid ! {hello,2},
55 |   Pid ! {hello,1},
56 |   Pid ! {hello,3},
57 |   Pid ! bye,
58 |   naive_recorder:pause(Rec),
59 |   ?assertEqual([{hello,2}, {hello,1}, {hello,3}, bye], naive_recorder:get_recorded_messages(Rec)),
60 |   naive_recorder:stop(Rec),
61 |   ok.
62 | 
63 | -endif.
64 | 


--------------------------------------------------------------------------------
/test/record_replay_test.erl:
--------------------------------------------------------------------------------
 1 | -module(record_replay_test).
 2 | 
 3 | -include_lib("eunit/include/eunit.hrl").
 4 | 
 5 | foo_loop() ->
 6 |   receive 
 7 |     {hello,_} -> foo_loop();
 8 |     _ -> error(expected_error)
 9 |   after 100 -> ok
10 |   end.
11 | 
12 | setup_fun() -> 
13 |   spawn(fun() -> foo_loop() end).
14 | 
15 | test_fun(Messages) when is_list(Messages) ->
16 |   Pid = setup_fun(),
17 |   erlang:monitor(process, Pid),
18 |   [Pid ! Msg || Msg <- Messages],
19 |   receive    %% wait for expected error to happen
20 |     {'DOWN', _, _, _, {expected_error, _}} -> fail;
21 |     {'DOWN', _, _, _, normal} -> pass;
22 |     _ -> unresolved
23 |   end.
24 | 
25 | record_replay_test() ->
26 |   Pid = setup_fun(),
27 |   Recorder = naive_recorder:record(Pid),
28 |   Pid ! {hello, 1},
29 |   Pid ! {hello, 2},
30 |   Pid ! {hello, 3},
31 |   Pid ! {hello, 4},
32 |   Pid ! {hello, 5},
33 |   Pid ! boom,
34 |   naive_recorder:pause(Recorder),
35 |   ?assertEqual([boom], ddmin:ddmin(fun test_fun/1, naive_recorder:get_recorded_messages(Recorder))),
36 |   naive_recorder:stop(Recorder).
37 | 


--------------------------------------------------------------------------------