├── .github └── workflows │ └── check.yml ├── .gitignore ├── .ocamlformat ├── Makefile ├── README.md ├── build.sh ├── dune ├── dune-project ├── script └── check-format ├── src ├── bugDesc.ml ├── cmdline.ml ├── coverage.ml ├── dune ├── instrument.ml ├── localizer.ml ├── logging.ml ├── main.ml ├── scenario.ml ├── utils.ml └── visualizer.ml ├── test └── simple1 │ ├── Makefile │ ├── bug_desc.json │ ├── compile.sh │ ├── src │ └── bug.c │ └── test.sh └── unival-docker ├── Dockerfile └── src ├── ProcessDataProfile.java ├── RFC.R └── StructuredDataCollector.java /.github/workflows/check.yml: -------------------------------------------------------------------------------- 1 | name: check 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | check: 7 | name: check-format 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout code 11 | uses: actions/checkout@v2 12 | 13 | - name: Set up OCaml 14 | uses: ocaml/setup-ocaml@v2 15 | with: 16 | ocaml-compiler: 4.13.0 17 | 18 | - name: Install dependencies 19 | run: opam install ocamlformat.0.20.1 20 | 21 | - name: Check format 22 | run: 23 | eval $(opam env) 24 | script/check-format 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | localizer 3 | localizer-out 4 | sparrow-out 5 | coverage.xml 6 | *.gc* 7 | bug 8 | *.i 9 | *.o 10 | *.s 11 | CausalMap.txt 12 | *.dot 13 | *.svg 14 | visualizer 15 | -------------------------------------------------------------------------------- /.ocamlformat: -------------------------------------------------------------------------------- 1 | profile=default 2 | quiet=false 3 | max-iters=10 4 | comment-check=true 5 | wrap-fun-args=true 6 | wrap-comments=false 7 | type-decl-indent=2 8 | type-decl=compact 9 | stritem-extension-indent=0 10 | space-around-variants=true 11 | space-around-records=true 12 | space-around-lists=true 13 | space-around-arrays=true 14 | single-case=compact 15 | sequence-style=terminator 16 | sequence-blank-line=preserve-one 17 | parse-docstrings=false 18 | parens-tuple-patterns=multi-line-only 19 | parens-tuple=always 20 | parens-ite=false 21 | ocp-indent-compat=false 22 | nested-match=wrap 23 | module-item-spacing=sparse 24 | max-indent=68 25 | match-indent-nested=never 26 | match-indent=0 27 | margin=80 28 | let-module=compact 29 | let-binding-spacing=compact 30 | let-binding-indent=2 31 | let-and=compact 32 | leading-nested-match-parens=false 33 | infix-precedence=indent 34 | indicate-nested-or-patterns=unsafe-no 35 | indicate-multiline-delimiters=no 36 | indent-after-in=0 37 | if-then-else=compact 38 | function-indent-nested=never 39 | function-indent=2 40 | field-space=loose 41 | extension-indent=2 42 | exp-grouping=parens 43 | dock-collection-brackets=true 44 | doc-comments-tag-only=default 45 | doc-comments-padding=2 46 | doc-comments=after-when-possible 47 | disambiguate-non-breaking-match=false 48 | disable=false 49 | cases-matching-exp-indent=normal 50 | cases-exp-indent=4 51 | break-struct=force 52 | break-string-literals=auto 53 | break-sequences=true 54 | break-separators=after 55 | break-infix-before-func=false 56 | break-infix=wrap 57 | break-fun-sig=wrap 58 | break-fun-decl=wrap 59 | break-collection-expressions=fit-or-vertical 60 | break-cases=fit 61 | break-before-in=fit-or-vertical 62 | assignment-operator=end-line 63 | align-variants-decl=false 64 | align-constructors-decl=false 65 | align-cases=false 66 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | MAKE=@make 2 | DUNE=@dune 3 | LN=@ln -sf 4 | RM=@rm 5 | EXE=localizer 6 | 7 | all: 8 | $(DUNE) build src/main.exe 9 | $(DUNE) build src/visualizer.exe 10 | $(LN) _build/default/src/main.exe $(EXE) 11 | $(LN) _build/default/src/visualizer.exe visualizer 12 | 13 | test: all 14 | $(MAKE) -C test 15 | $(DUNE) test 16 | 17 | clean: 18 | $(MAKE) -C test clean 19 | $(DUNE) clean 20 | $(RM) -rf $(EXE) 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bug-localizer 2 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | export OPAMYES=1 6 | 7 | NCPU="$(getconf _NPROCESSORS_ONLN 2>/dev/null || echo 1)" 8 | OCAML_VERSION="4.13.0" 9 | OPAM_SWITCH=program-repair-project-$OCAML_VERSION 10 | 11 | opam init --compiler=$OCAML_VERSION -j $NCPU --no-setup 12 | 13 | switch_exists=no 14 | for installed_switch in $(opam switch list --short); do 15 | if [[ "$installed_switch" == "$OPAM_SWITCH" ]]; then 16 | switch_exists=yes 17 | break 18 | fi 19 | done 20 | 21 | if [ "$switch_exists" = "no" ]; then 22 | opam switch create $OPAM_SWITCH $OCAML_VERSION 23 | else 24 | opam switch $OPAM_SWITCH 25 | fi 26 | 27 | eval $(SHELL=bash opam config env --switch=$SPARROW_OPAM_SWITCH) 28 | 29 | opam pin add cil https://github.com/prosyslab/cil.git -n 30 | opam install -j $NCPU dune batteries cil ppx_compare ocamlformat merlin yojson xmlm 31 | 32 | make 33 | -------------------------------------------------------------------------------- /dune: -------------------------------------------------------------------------------- 1 | (env 2 | (dev 3 | (flags 4 | (:standard -warn-error -A)))) 5 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 2.3) 2 | -------------------------------------------------------------------------------- /script/check-format: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | PROJECT_HOME="$(cd "$(dirname "${BASH_SOURCE[0]}")" && cd ../ && pwd)" 6 | 7 | for file in $(find $PROJECT_HOME/src -name "*.ml"); do 8 | echo "Checking $file" 9 | ocamlformat $file | diff $file - || 10 | if [ $? -ne 0 ]; then 11 | echo "Failed: Please check json format" 12 | echo "See error message after running ocamlformat $file" 13 | exit 1 14 | fi 15 | done 16 | -------------------------------------------------------------------------------- /src/bugDesc.ml: -------------------------------------------------------------------------------- 1 | module F = Format 2 | 3 | type t = { 4 | program : string; 5 | compiler_type : string; 6 | test_cases : string list; 7 | test_time_limit : int; 8 | } 9 | 10 | let find : string -> Yojson.Safe.t -> Yojson.Safe.t = 11 | fun name -> function 12 | | `Assoc l -> List.find (function n, _ -> n = name) l |> snd 13 | | _ -> raise Not_found 14 | 15 | let to_string = function `String s -> s | _ -> raise Not_found 16 | 17 | let to_int = function `Int i -> i | _ -> raise Not_found 18 | 19 | let compiler_type_of desc = find "compiler" desc |> find "type" |> to_string 20 | 21 | let program_of desc = find "program" desc |> to_string 22 | 23 | let test_cases_of desc = 24 | let test_info = find "test-harness" desc in 25 | let num_of_passing = find "passing" test_info |> to_int in 26 | let num_of_failing = find "failing" test_info |> to_int in 27 | List.init num_of_passing (fun n -> "p" ^ string_of_int (n + 1)) 28 | @ List.init num_of_failing (fun n -> "n" ^ string_of_int (n + 1)) 29 | 30 | let test_limit_of desc = find "test-harness" desc |> find "time-limit" |> to_int 31 | 32 | let read work_dir = 33 | let json = 34 | let fn = Filename.concat work_dir "bug_desc.json" in 35 | if Sys.file_exists fn then Yojson.Safe.from_file fn 36 | else 37 | let fn = Filename.concat "/bugfixer" "bug_desc.json" in 38 | if Sys.file_exists fn then Yojson.Safe.from_file fn 39 | else failwith "Bug description not found" 40 | in 41 | Logging.log "Bug desc: %a" Yojson.Safe.pp json; 42 | let program = program_of json in 43 | let compiler_type = compiler_type_of json in 44 | let test_cases = test_cases_of json in 45 | let test_time_limit = test_limit_of json in 46 | { program; compiler_type; test_cases; test_time_limit } 47 | 48 | let pp_test_cases fmt l = 49 | F.fprintf fmt "["; 50 | List.iter (fun x -> F.fprintf fmt "%s," x) l; 51 | F.fprintf fmt "]" 52 | 53 | let pp fmt desc = 54 | F.fprintf fmt 55 | "{program: %s, compiler_type: %s, test_cases: %a, test_time_limit: %d}" 56 | desc.program desc.compiler_type pp_test_cases desc.test_cases 57 | desc.test_time_limit 58 | -------------------------------------------------------------------------------- /src/cmdline.ml: -------------------------------------------------------------------------------- 1 | let work_dir : string option ref = ref None 2 | 3 | let out_dir = ref "localizer-out" 4 | 5 | let faulty_func = ref false 6 | 7 | type instrument = DfSan | GSA | Coverage | Nothing 8 | 9 | let instrument = ref Nothing 10 | 11 | let select_instrument s = 12 | match s with 13 | | "dfsan" -> instrument := DfSan 14 | | "gsa" -> instrument := GSA 15 | | "coverage" -> instrument := Coverage 16 | | _ -> failwith "Unknown instrument" 17 | 18 | let skip_compile = ref false 19 | 20 | type engine = 21 | | Tarantula 22 | | Prophet 23 | | Jaccard 24 | | Ochiai 25 | | Dummy 26 | | UniVal 27 | | Coverage 28 | | All 29 | 30 | let engine = ref Dummy 31 | 32 | let select_engine s = 33 | match s with 34 | | "tarantula" -> engine := Tarantula 35 | | "prophet" -> engine := Prophet 36 | | "jaccard" -> engine := Jaccard 37 | | "ochiai" -> engine := Ochiai 38 | | "dummy" -> engine := Dummy 39 | | "unival" -> 40 | engine := UniVal; 41 | instrument := GSA 42 | | "coverage" -> 43 | engine := Coverage; 44 | instrument := Coverage 45 | | "all" -> engine := All 46 | | _ -> failwith "Unknown engine" 47 | 48 | let jobs = ref 0 (* i.e., #cpus *) 49 | 50 | let blacklist = ref [] 51 | 52 | let gnu_source = ref false 53 | 54 | let bic = ref false 55 | 56 | let no_seg = ref false 57 | 58 | let gcov = ref false 59 | 60 | let options = 61 | [ 62 | ("-outdir", Arg.Set_string out_dir, "Output directory"); 63 | ( "-instrument", 64 | Arg.String select_instrument, 65 | "Specify instrument method (default: Nothing)" ); 66 | ("-faulty_func", Arg.Set faulty_func, "Set faulty functions"); 67 | ("-skip_compile", Arg.Set skip_compile, "Skip compilation"); 68 | ( "-engine", 69 | Arg.String select_engine, 70 | "Specify localization engine (default: Dummy)" ); 71 | ("-j", Arg.Set_int jobs, "Number of parallel jobs for make (default: -j)"); 72 | ( "-blacklist", 73 | Arg.String (fun x -> blacklist := x :: !blacklist), 74 | "Blacklist for instrumentation" ); 75 | ( "-gnu_source", 76 | Arg.Set gnu_source, 77 | "Add #define _GNU_SOURCE when instrumentation for some programs (e.g., \ 78 | gimp)" ); 79 | ("-bic", Arg.Set bic, "Select whether using bic or not"); 80 | ( "-no_seg", 81 | Arg.Set no_seg, 82 | "Do not instrument fflush after every line if there is no segfault" ); 83 | ("-gcov", Arg.Set gcov, "Use gcov when extracting coverage"); 84 | ] 85 | 86 | let parse_arg x = 87 | work_dir := Some x; 88 | () 89 | -------------------------------------------------------------------------------- /src/coverage.ml: -------------------------------------------------------------------------------- 1 | module F = Format 2 | module StrMap = Map.Make (String) 3 | 4 | (* Line-level coverage using gcov *) 5 | module LineCoverage = struct 6 | (* reference: https://github.com/squaresLab/BugZoo/blob/a87f03b2e33c2097c21c0175e613f4e95d9825eb/bugzoo/core/coverage.py#L106 *) 7 | type elem = { 8 | test : string; 9 | coverage : int list StrMap.t; 10 | linehistory : (int * int) list; 11 | } 12 | 13 | type t = elem list 14 | 15 | let empty = [] 16 | 17 | type tree = E of Xmlm.tag * tree list | D of string 18 | 19 | let elem_of test = { test; coverage = StrMap.empty; linehistory = [] } 20 | 21 | let read_xml file = 22 | let ic = open_in file in 23 | let i = Xmlm.make_input (`Channel ic) in 24 | let el tag childs = E (tag, childs) in 25 | let data d = D d in 26 | Xmlm.input_doc_tree ~el ~data i 27 | 28 | let find_filename al = 29 | match 30 | List.find_map 31 | (function (_, "filename"), data -> Some data | _ -> None) 32 | al 33 | with 34 | | Some data -> data 35 | | _ -> failwith "Unknown filename" 36 | 37 | let elem_of_attr filename elem al = 38 | if List.exists (function (_, "hits"), hits -> hits <> "0" | _ -> false) al 39 | then 40 | match 41 | List.find_map 42 | (function 43 | | (_, "number"), data -> Some (int_of_string data) | _ -> None) 44 | al 45 | with 46 | | Some line -> 47 | { 48 | elem with 49 | coverage = 50 | StrMap.update filename 51 | (function Some l -> Some (line :: l) | None -> Some [ line ]) 52 | elem.coverage; 53 | } 54 | | _ -> elem 55 | else elem 56 | 57 | let rec elem_of_xml ?(filename = "") elem xml = 58 | match xml with 59 | | E (((_, "coverage"), _), l) -> 60 | List.find 61 | (function E (((_, "packages"), _), _) -> true | _ -> false) 62 | l 63 | |> elem_of_xml elem 64 | | E (((_, "packages"), _), l) 65 | | E (((_, "package"), _), l) 66 | | E (((_, "classes"), _), l) -> 67 | List.fold_left (fun elem e -> elem_of_xml elem e) elem l 68 | | E (((_, "class"), al), cl) -> 69 | let filename = find_filename al in 70 | List.fold_left (fun elem e -> elem_of_xml ~filename elem e) elem cl 71 | | E (((_, "lines"), _), l) -> 72 | List.fold_left (fun elem e -> elem_of_xml ~filename elem e) elem l 73 | | E (((_, "line"), al), _) -> elem_of_attr filename elem al 74 | | _ -> elem 75 | 76 | let pp_lines fmt lines = List.iter (fun l -> F.fprintf fmt "%d, " l) lines 77 | 78 | let pp_coverage fmt cov = 79 | StrMap.iter 80 | (fun file lines -> F.fprintf fmt "%s: %a\n" file pp_lines lines) 81 | cov 82 | 83 | let pp_elem fmt { test; coverage; _ } = 84 | F.fprintf fmt "test: %s\ncoverage:\n%a\n" test pp_coverage coverage 85 | 86 | let pp fmt cov = List.iter (fun elem -> pp_elem fmt elem) cov 87 | 88 | let compute_coverage coverage_data = 89 | if Sys.file_exists coverage_data then Unix.unlink coverage_data; 90 | Unix.create_process "gcovr" 91 | [| "gcovr"; "-o"; coverage_data; "-x"; "-d"; "-r"; "." |] 92 | Unix.stdin Unix.stdout Unix.stderr 93 | |> ignore; 94 | match Unix.wait () |> snd with 95 | | Unix.WEXITED 0 -> () 96 | | Unix.WEXITED n -> 97 | failwith ("Error " ^ string_of_int n ^ ": coverage failed") 98 | | _ -> failwith "Coverage failed" 99 | 100 | let update_coverage coverage_data test coverage = 101 | let xml = read_xml coverage_data |> snd in 102 | let elem = elem_of_xml (elem_of test) xml in 103 | elem :: coverage 104 | 105 | let run work_dir bug_desc = 106 | let scenario = Scenario.init work_dir in 107 | Unix.chdir scenario.work_dir; 108 | if not !Cmdline.skip_compile then ( 109 | Logging.log "Start compile"; 110 | Scenario.compile scenario bug_desc.BugDesc.compiler_type; 111 | Unix.chdir scenario.work_dir); 112 | Logging.log "Start test"; 113 | List.fold_left 114 | (fun coverage test -> 115 | Scenario.run_test scenario.test_script test; 116 | compute_coverage scenario.coverage_data; 117 | update_coverage scenario.coverage_data test coverage) 118 | empty bug_desc.BugDesc.test_cases 119 | end 120 | 121 | (* Line-level coverage using our own implementation *) 122 | module LineCoverage2 = struct 123 | include LineCoverage 124 | 125 | module IntSet = Set.Make (struct 126 | type t = int 127 | 128 | let compare = compare 129 | end) 130 | 131 | (* reference: https://github.com/squaresLab/BugZoo/blob/a87f03b2e33c2097c21c0175e613f4e95d9825eb/bugzoo/core/coverage.py#L106 *) 132 | type elem_internal = { 133 | test : string; 134 | coverage_set : IntSet.t StrMap.t; 135 | linehistory : (int * int) list; 136 | } 137 | 138 | let elem_of test = { test; coverage_set = StrMap.empty; linehistory = [] } 139 | 140 | let elem_of_internal { test; coverage_set; linehistory } = 141 | { test; coverage = StrMap.map IntSet.elements coverage_set; linehistory } 142 | 143 | let compute_coverage coverage_data = 144 | if Sys.file_exists coverage_data then Unix.unlink coverage_data; 145 | Unix.create_process "gcovr" 146 | [| "gcovr"; "-o"; coverage_data; "-x"; "-d"; "-r"; "." |] 147 | Unix.stdin Unix.stdout Unix.stderr 148 | |> ignore; 149 | match Unix.wait () |> snd with 150 | | Unix.WEXITED 0 -> () 151 | | Unix.WEXITED n -> 152 | failwith ("Error " ^ string_of_int n ^ ": coverage failed") 153 | | _ -> failwith "Coverage failed" 154 | 155 | let read_whole_file filename = 156 | let ch = open_in filename in 157 | let s = really_input_string ch (in_channel_length ch) in 158 | close_in ch; 159 | s 160 | 161 | let update_coverage coverage_data test coverage = 162 | let data = 163 | try read_whole_file coverage_data |> String.split_on_char '\n' 164 | with Sys_error _ -> [] 165 | in 166 | let elem = 167 | List.fold_left 168 | (fun elem line -> 169 | if List.mem line [ ""; "__START_NEW_EXECUTION__" ] then elem 170 | else 171 | let lst = String.split_on_char ':' line in 172 | try 173 | let filename, lineno = 174 | (List.nth lst 0, List.nth lst 1 |> int_of_string) 175 | in 176 | { 177 | elem with 178 | coverage_set = 179 | StrMap.update filename 180 | (function 181 | | Some s -> Some (IntSet.add lineno s) 182 | | None -> Some (IntSet.singleton lineno)) 183 | elem.coverage_set; 184 | linehistory = 185 | [] 186 | (*( lineno, 187 | if elem.linehistory = [] then 0 188 | else snd (List.hd elem.linehistory) + 1 ) 189 | :: elem.linehistory;*); 190 | } 191 | with _ -> elem) 192 | (elem_of test) data 193 | in 194 | elem :: coverage 195 | 196 | let run work_dir bug_desc = 197 | let scenario = Scenario.init ~stdio_only:true work_dir in 198 | Unix.chdir scenario.work_dir; 199 | (* compile to extract *.i *) 200 | Scenario.compile scenario bug_desc.BugDesc.compiler_type; 201 | let src_dir = Filename.concat scenario.work_dir "src" in 202 | Instrument.Coverage.run scenario.work_dir src_dir; 203 | Unix.chdir scenario.Scenario.work_dir; 204 | (* compile instrumented files *) 205 | Scenario.compile scenario bug_desc.BugDesc.compiler_type; 206 | Unix.chdir scenario.Scenario.work_dir; 207 | Logging.log "Start test"; 208 | let _cov_path = Filename.concat scenario.work_dir "coverage.txt" in 209 | List.fold_left 210 | (fun coverage test -> 211 | (*let regexp_pos = Str.regexp "p.*" in 212 | if Str.string_match regexp_pos test 0 then coverage 213 | else*) 214 | Scenario.run_test scenario.test_script test; 215 | Unix.system 216 | "cat /experiment/coverage_data/tmp/*.txt > \ 217 | /experiment/coverage_data/coverage.txt" 218 | |> ignore; 219 | Unix.system "rm -f /experiment/coverage_data/tmp/*.txt" |> ignore; 220 | let cur_cov_path = 221 | (* Filename.concat "coverage_data" ("coverage." ^ test ^ ".txt") *) 222 | Filename.concat "coverage_data" "coverage.txt" 223 | in 224 | (*Unix.system ("mv " ^ cov_path ^ " " ^ cur_cov_path) |> ignore;*) 225 | update_coverage cur_cov_path test coverage) 226 | empty bug_desc.BugDesc.test_cases 227 | |> List.map elem_of_internal 228 | end 229 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name main) 3 | (link_flags -ccopt -static) 4 | (modules 5 | main 6 | cmdline 7 | logging 8 | localizer 9 | coverage 10 | bugDesc 11 | scenario 12 | instrument 13 | utils) 14 | (libraries str cil cil.all-features xmlm unix yojson)) 15 | 16 | (executable 17 | (name visualizer) 18 | (link_flags -ccopt -static) 19 | (modules visualizer) 20 | (libraries ocamlgraph)) 21 | -------------------------------------------------------------------------------- /src/instrument.ml: -------------------------------------------------------------------------------- 1 | module DfSan = struct 2 | module NodeInfo = struct 3 | type t = Yojson.Safe.t 4 | 5 | let cmd_of t = 6 | ((t |> function 7 | | `Assoc l -> List.assoc "cmd" l 8 | | _ -> failwith "Invalid format") 9 | |> function 10 | | `List l -> List.hd l 11 | | _ -> failwith "Invalid format") 12 | |> function 13 | | `String s -> s 14 | | _ -> failwith "Invalid format" 15 | 16 | let filename_of t = 17 | t 18 | |> (function 19 | | `Assoc l -> List.assoc "loc" l | _ -> failwith "Invalid format") 20 | |> (function `String s -> s | _ -> failwith "Invalid format") 21 | |> String.split_on_char ':' |> Fun.flip List.nth 0 22 | end 23 | 24 | module NodeInfoMap = struct 25 | module M = Map.Make (String) 26 | 27 | type t = NodeInfo.t M.t 28 | 29 | let empty = M.empty 30 | 31 | let add = M.add 32 | 33 | let find = M.find 34 | end 35 | 36 | module LineSet = Set.Make (String) 37 | module FileToEdges = Map.Make (String) 38 | 39 | let read_nodes file = 40 | let ic = open_in file in 41 | Yojson.Safe.from_channel ic 42 | |> (function 43 | | `Assoc l -> List.assoc "nodes" l | _ -> failwith "Invalid format") 44 | |> (function `Assoc l -> l | _ -> failwith "Invalid format") 45 | |> List.fold_left 46 | (fun map (name, info) -> NodeInfoMap.add name info map) 47 | NodeInfoMap.empty 48 | |> fun x -> 49 | close_in ic; 50 | x 51 | 52 | let read_covered_lines file = 53 | let ic = open_in file in 54 | let rec loop lst = 55 | match input_line ic with 56 | | line -> ( 57 | String.split_on_char '\t' line |> function 58 | | h :: _ -> loop (LineSet.add h lst) 59 | | _ -> failwith "Invalid format") 60 | | exception End_of_file -> lst 61 | in 62 | loop LineSet.empty |> fun x -> 63 | close_in ic; 64 | x 65 | 66 | let read_duedges nodes file = 67 | let ic = open_in file in 68 | let rec loop map = 69 | match input_line ic with 70 | | line -> ( 71 | String.split_on_char '\t' line |> function 72 | | src :: dst :: _ -> 73 | let file = NodeInfoMap.find src nodes |> NodeInfo.filename_of in 74 | FileToEdges.update file 75 | (function 76 | | None -> Some [ (src, dst) ] 77 | | Some l -> Some ((src, dst) :: l)) 78 | map 79 | |> loop 80 | | _ -> failwith "Invalid format") 81 | | exception End_of_file -> map 82 | in 83 | loop FileToEdges.empty |> fun x -> 84 | close_in ic; 85 | x 86 | 87 | type dfsan_funs = { 88 | create_label : Cil.varinfo; 89 | set_label : Cil.varinfo; 90 | get_label : Cil.varinfo; 91 | has_label : Cil.varinfo; 92 | } 93 | 94 | let initialize work_dir = 95 | let result_file = Filename.concat work_dir "localizer-out/result.txt" in 96 | let sparrow_out_dir = Filename.concat work_dir "sparrow-out" in 97 | let node_file = Filename.concat sparrow_out_dir "node.json" in 98 | let duedge_file = 99 | Filename.concat sparrow_out_dir "interval/datalog/DUEdge.facts" 100 | in 101 | let nodes = read_nodes node_file in 102 | let lines = read_covered_lines result_file in 103 | let duedges = read_duedges nodes duedge_file in 104 | (nodes, lines, duedges) 105 | 106 | let rec instrument_instr dfsan_funs edges instrs results = 107 | match instrs with 108 | | (Cil.Set ((Var vi, NoOffset), _, loc) as i) :: tl -> 109 | let name = Cil.mkString vi.vname in 110 | Cil.Call 111 | ( None, 112 | Cil.Lval (Cil.Var dfsan_funs.create_label, Cil.NoOffset), 113 | [ name; Cil.zero ], 114 | loc ) 115 | :: i :: results 116 | |> instrument_instr dfsan_funs edges tl 117 | | i :: tl -> i :: results |> instrument_instr dfsan_funs edges tl 118 | | [] -> List.rev results 119 | 120 | class assignVisitor dfsan_funs edges = 121 | object 122 | inherit Cil.nopCilVisitor 123 | 124 | method! vstmt s = 125 | match s.Cil.skind with 126 | | Cil.Instr i -> 127 | s.Cil.skind <- Cil.Instr (instrument_instr dfsan_funs edges i []); 128 | DoChildren 129 | | _ -> DoChildren 130 | end 131 | 132 | let instrument file pp_file _ edges = 133 | Logging.log "Instrument %s (%s)" file pp_file; 134 | let cil = Frontc.parse pp_file () in 135 | let dfsan_funs = 136 | { 137 | create_label = 138 | Cil.findOrCreateFunc cil "dfsan_create_label" 139 | (Cil.TFun (Cil.voidType, None, false, [])); 140 | set_label = 141 | Cil.findOrCreateFunc cil "dfsan_set_label" 142 | (Cil.TFun (Cil.voidType, None, false, [])); 143 | get_label = 144 | Cil.findOrCreateFunc cil "dfsan_get_label" 145 | (Cil.TFun (Cil.voidType, None, false, [])); 146 | has_label = 147 | Cil.findOrCreateFunc cil "dfsan_has_label" 148 | (Cil.TFun (Cil.voidType, None, false, [])); 149 | } 150 | in 151 | Cil.visitCilFile (new assignVisitor dfsan_funs edges) cil; 152 | let oc = open_out pp_file in 153 | Cil.dumpFile !Cil.printerForMaincil oc "" cil; 154 | close_out oc 155 | 156 | let run work_dir src_dir = 157 | let nodes, _, duedges = initialize work_dir in 158 | FileToEdges.iter 159 | (fun file edges -> 160 | if file = "" then () 161 | else 162 | let name = Filename.remove_extension file in 163 | let pp_file = Filename.concat src_dir (name ^ ".i") in 164 | if Sys.file_exists pp_file then instrument file pp_file nodes edges 165 | else Logging.log "%s not found" file) 166 | duedges 167 | end 168 | 169 | let preamble src_dir mode = 170 | String.concat "" 171 | ([ 172 | "/* COVERAGE :: INSTRUMENTATION :: START */\n"; 173 | "typedef struct _IO_FILE FILE;\n"; 174 | "struct _IO_FILE *__inst_stream ;\n"; 175 | "extern FILE *fopen(char const * __restrict __filename , char \ 176 | const * __restrict __modes ) ;\n"; 177 | "extern int fclose(FILE *__stream ) ;\n"; 178 | "static void coverage_ctor (void) __attribute__ ((constructor));\n"; 179 | "static void coverage_ctor (void) {\n"; 180 | ] 181 | @ (if mode = "output" then 182 | [ "__inst_stream = fopen(\"" ^ src_dir ^ "/output.txt\", \"a\");\n" ] 183 | else 184 | [ 185 | " int pid = getpid();\n"; 186 | " char filename[64];\n"; 187 | " sprintf(filename, \"" ^ src_dir ^ "/coverage_data" ^ "/tmp/" ^ mode 188 | ^ "-%d.txt\", pid);\n"; 189 | " __inst_stream = fopen(filename, \"a\");\n"; 190 | " fprintf(__inst_stream, \"__START_NEW_EXECUTION__\\n\");\n"; 191 | " fflush(__inst_stream);\n"; 192 | ]) 193 | @ [ 194 | "}\n"; 195 | "static void coverage_dtor (void) __attribute__ ((destructor));\n"; 196 | "static void coverage_dtor (void) {\n"; 197 | " fclose(__inst_stream);\n"; 198 | "}\n"; 199 | "/* COVERAGE :: INSTRUMENTATION :: END */\n"; 200 | ]) 201 | 202 | let found_type = ref None 203 | 204 | let found_gvar = ref None 205 | 206 | class findTypeVisitor name = 207 | object 208 | inherit Cil.nopCilVisitor 209 | 210 | method! vglob g = 211 | match g with 212 | | GCompTag (ci, _) -> 213 | if ci.Cil.cname = name then found_type := Some ci; 214 | SkipChildren 215 | | _ -> SkipChildren 216 | end 217 | 218 | class findGVarVisitor name = 219 | object 220 | inherit Cil.nopCilVisitor 221 | 222 | method! vglob g = 223 | match g with 224 | | GVarDecl (vi, _) -> 225 | if vi.Cil.vname = name then found_gvar := Some vi; 226 | SkipChildren 227 | | _ -> SkipChildren 228 | end 229 | 230 | let append_constructor work_dir filename mode = 231 | let read_whole_file filename = 232 | let ch = open_in filename in 233 | let s = really_input_string ch (in_channel_length ch) in 234 | close_in ch; 235 | s 236 | in 237 | let code = read_whole_file filename in 238 | if 239 | String.length code > 42 240 | && String.equal (String.sub code 0 42) 241 | "/* COVERAGE :: INSTRUMENTATION :: START */" 242 | then () 243 | else 244 | let instr_c_code = preamble work_dir mode ^ read_whole_file filename in 245 | let oc = open_out filename in 246 | Printf.fprintf oc "%s" instr_c_code; 247 | close_out oc 248 | 249 | module GSA = struct 250 | let pred_num = ref (-1) 251 | 252 | let new_pred () = 253 | pred_num := !pred_num + 1; 254 | "OOJAHOOO_PRED_" ^ string_of_int !pred_num 255 | 256 | class assignInitializer f = 257 | let add_predicate_var result stmt = 258 | match stmt.Cil.skind with 259 | | Cil.If (pred, then_branch, else_branch, loc) -> 260 | let pred_var = new_pred () in 261 | let vi = Cil.makeLocalVar f pred_var (Cil.TInt (Cil.IInt, [])) in 262 | stmt.Cil.skind <- 263 | Cil.If 264 | ( Cil.Lval (Cil.Var vi, Cil.NoOffset), 265 | then_branch, 266 | else_branch, 267 | loc ); 268 | let assign = 269 | Cil.mkStmtOneInstr (Cil.Set ((Cil.Var vi, Cil.NoOffset), pred, loc)) 270 | in 271 | let temp = assign.skind in 272 | assign.skind <- stmt.skind; 273 | stmt.skind <- temp; 274 | result @ [ stmt; assign ] 275 | | _ -> result @ [ stmt ] 276 | in 277 | object 278 | inherit Cil.nopCilVisitor 279 | 280 | method! vblock b = 281 | let new_stmts = List.fold_left add_predicate_var [] b.Cil.bstmts in 282 | b.bstmts <- new_stmts; 283 | DoChildren 284 | end 285 | 286 | class predicateVisitor faulty_func_list = 287 | object 288 | inherit Cil.nopCilVisitor 289 | 290 | method! vfunc f = 291 | if 292 | String.length f.svar.vname >= 6 293 | && (String.equal (String.sub f.svar.vname 0 6) "bugzoo" 294 | || String.equal (String.sub f.svar.vname 0 6) "unival") 295 | || List.length faulty_func_list > 0 296 | && not (List.mem f.svar.vname faulty_func_list) 297 | then SkipChildren 298 | else ChangeTo (Cil.visitCilFunction (new assignInitializer f) f) 299 | end 300 | 301 | let predicate_transform ?(faulty_func_list = []) pp_file = 302 | let origin_file = Filename.basename (Filename.basename pp_file) in 303 | Logging.log "Predicate transform %s (%s)" origin_file pp_file; 304 | let cil_opt = 305 | try Some (Frontc.parse pp_file ()) with Frontc.ParseError _ -> None 306 | in 307 | if Option.is_none cil_opt then pp_file 308 | else 309 | let cil = Option.get cil_opt in 310 | Cil.visitCilFile (new predicateVisitor faulty_func_list) cil; 311 | let oc = open_out pp_file in 312 | Cil.dumpFile !Cil.printerForMaincil oc "" cil; 313 | close_out oc; 314 | pp_file 315 | 316 | module CausalMap = Map.Make (String) 317 | module VarSet = Set.Make (String) 318 | module VarVerMap = Map.Make (String) 319 | module VarMap = Map.Make (String) 320 | 321 | let causal_map = ref CausalMap.empty 322 | 323 | let var_ver = ref VarVerMap.empty 324 | 325 | class assignVisitor (printf, flush, stream) f = 326 | let vname_of lv = 327 | match lv with Cil.Var vi, Cil.NoOffset -> vi.Cil.vname | _ -> "" 328 | in 329 | let varinfo_of lv = 330 | match lv with 331 | | Cil.Var vi, Cil.NoOffset -> vi 332 | | _ -> Cil.makeVarinfo false "" (Cil.TVoid []) 333 | in 334 | let rec var_names_of exp = 335 | let result = 336 | match exp with 337 | | Cil.Lval lv -> VarMap.singleton (vname_of lv) (varinfo_of lv) 338 | | Cil.SizeOfE e -> var_names_of e 339 | | Cil.AlignOfE e -> var_names_of e 340 | | Cil.UnOp (_, e, _) -> var_names_of e 341 | | Cil.BinOp (_, e1, e2, _) -> 342 | VarMap.union 343 | (fun _ va1 _ -> Some va1) 344 | (var_names_of e1) (var_names_of e2) 345 | | Cil.Question (e1, e2, e3, _) -> 346 | VarMap.union 347 | (fun _ va1 _ -> Some va1) 348 | (VarMap.union 349 | (fun _ va1 _ -> Some va1) 350 | (var_names_of e1) (var_names_of e2)) 351 | (var_names_of e3) 352 | | Cil.CastE (_, e) -> var_names_of e 353 | | _ -> VarMap.empty 354 | in 355 | VarMap.remove "" result 356 | in 357 | let is_pred vname = 358 | let pred_prefix = Str.regexp "OOJAHOOO_PRED_\\[0-9\\]\\+" in 359 | Str.string_match pred_prefix vname 0 360 | in 361 | let rec string_of_typ = function 362 | | Cil.TInt (Cil.IChar, _) -> "char" 363 | | Cil.TInt (Cil.ISChar, _) -> "signed char" 364 | | Cil.TInt (Cil.IUChar, _) -> "unsigned char" 365 | | Cil.TInt (Cil.IInt, _) -> "int" 366 | | Cil.TInt (Cil.IUInt, _) -> "unsigned int" 367 | | Cil.TInt (Cil.IShort, _) -> "short" 368 | | Cil.TInt (Cil.IUShort, _) -> "unsigned short" 369 | | Cil.TInt (Cil.ILong, _) -> "long" 370 | | Cil.TInt (Cil.IULong, _) -> "unsigned long" 371 | | Cil.TFloat (Cil.FFloat, _) -> "float" 372 | | Cil.TFloat (Cil.FDouble, _) -> "double" 373 | | Cil.TFloat (Cil.FLongDouble, _) -> "long double" 374 | | Cil.TPtr (Cil.TInt (Cil.IChar, _), _) -> "string" 375 | | Cil.TNamed (t, _) -> string_of_typ t.ttype 376 | | _ -> "NA" 377 | in 378 | let call_record var vname ver loc = 379 | let call_printf filename funcname line varname version typ var_exp = 380 | let fmt = 381 | match typ with 382 | | "char" | "signed char" | "unsigned char" -> "%c" 383 | | "unsigned int" -> "%u" 384 | | "int" | "short" -> "%d" 385 | | "unsigned short" -> "%hd" 386 | | "long" -> "%ld" 387 | | "unsigned long" -> "%lu" 388 | | "float" -> "%f" 389 | | "double" | "long double" -> "%lf" 390 | | "string" -> "%s" 391 | | "NA" -> "NA" 392 | | _ -> "" 393 | in 394 | Cil.Call 395 | ( None, 396 | Cil.Lval (Cil.Var printf, Cil.NoOffset), 397 | [ 398 | Cil.Lval (Cil.Var stream, Cil.NoOffset); 399 | Cil.Const 400 | (Cil.CStr 401 | (Printf.sprintf "%s,%s,%d,%s,%d" filename funcname line 402 | ("UNIVAL_" ^ funcname ^ "_" ^ varname) 403 | version 404 | ^ "," ^ fmt ^ "\n")); 405 | var_exp; 406 | ], 407 | loc ) 408 | in 409 | let call_flush loc = 410 | Cil.Call 411 | ( None, 412 | Cil.Lval (Cil.Var flush, Cil.NoOffset), 413 | [ Cil.Lval (Cil.Var stream, Cil.NoOffset) ], 414 | loc ) 415 | in 416 | let fun_name = f.Cil.svar.vname in 417 | let t = string_of_typ (Cil.typeOfLval var) in 418 | if 419 | String.length vname >= 13 420 | && String.equal (String.sub vname 0 13) "OOJAHOOO_PRED" 421 | then 422 | [ 423 | call_printf loc.Cil.file fun_name loc.Cil.line vname ver t 424 | (Cil.Question 425 | ( Cil.BinOp (Eq, Cil.Lval var, Cil.zero, Cil.intType), 426 | Cil.zero, 427 | Cil.one, 428 | Cil.intType )); 429 | call_flush loc; 430 | ] 431 | (* printf("%s,%s,%d,%s,%d,%d\n", filename, funcname, line, varname, version, i_val) *) 432 | else 433 | [ 434 | call_printf loc.Cil.file fun_name loc.Cil.line vname ver t (Lval var); 435 | call_flush loc; 436 | ] 437 | in 438 | let ass2gsa result instr = 439 | let gogo, lv, lval, exp_vars, loc = 440 | match instr with 441 | | Cil.Set (lv, exp, loc) -> 442 | let exp_vars = var_names_of exp in 443 | let lval = vname_of lv in 444 | (true, lv, lval, exp_vars, loc) 445 | | Call (lv_opt, _, params, loc) -> 446 | if Option.is_none lv_opt then 447 | ( false, 448 | (Var (Cil.makeVarinfo false "" (Cil.TVoid [])), Cil.NoOffset), 449 | "", 450 | VarMap.empty, 451 | loc ) 452 | else 453 | let lv = Option.get lv_opt in 454 | let exp_vars = 455 | List.fold_left 456 | (fun ev param -> 457 | VarMap.union 458 | (fun _ vi1 _ -> Some vi1) 459 | ev (var_names_of param)) 460 | VarMap.empty params 461 | in 462 | let lval = vname_of lv in 463 | (true, lv, lval, exp_vars, loc) 464 | | _ -> 465 | ( false, 466 | (Var (Cil.makeVarinfo false "" (Cil.TVoid [])), Cil.NoOffset), 467 | "", 468 | VarMap.empty, 469 | { line = -1; file = ""; byte = -1 } ) 470 | in 471 | if (not gogo) || lval = "" then result @ [ instr ] 472 | else if is_pred lval then ( 473 | let exp_vars_with_ver, exp_vars_with_new_ver = 474 | VarMap.fold 475 | (fun ev _ (vs, nvs) -> 476 | (* for debugging *) 477 | (* print_endline "a"; *) 478 | let unival_fn_ev = "UNIVAL_" ^ f.Cil.svar.vname ^ "_" ^ ev in 479 | let unival_ev = "UNIVAL_" ^ ev in 480 | if VarVerMap.mem unival_fn_ev !var_ver then 481 | let ver = VarVerMap.find unival_fn_ev !var_ver in 482 | ( (unival_fn_ev ^ "_" ^ string_of_int ver) :: vs, 483 | (unival_fn_ev ^ "_" ^ string_of_int (ver + 1)) :: nvs ) 484 | else if VarVerMap.mem unival_ev !var_ver then 485 | let ver = VarVerMap.find ev !var_ver in 486 | ( (unival_ev ^ "_" ^ string_of_int ver) :: vs, 487 | (unival_ev ^ "_" ^ string_of_int (ver + 1)) :: nvs ) 488 | else ( 489 | print_endline unival_fn_ev; 490 | raise (Failure "Not_Found_Var"))) 491 | exp_vars ([], []) 492 | in 493 | let unival_fn_lval = "UNIVAL_" ^ f.Cil.svar.vname ^ "_" ^ lval in 494 | causal_map := CausalMap.add unival_fn_lval exp_vars_with_ver !causal_map; 495 | let unival_fn_exp_vars = 496 | VarMap.fold 497 | (fun v _ fevs -> 498 | VarSet.add ("UNIVAL_" ^ f.Cil.svar.vname ^ "_" ^ v) fevs) 499 | exp_vars VarSet.empty 500 | in 501 | let unival_exp_vars = 502 | VarMap.fold 503 | (fun v _ fevs -> VarSet.add ("UNIVAL_" ^ v) fevs) 504 | exp_vars VarSet.empty 505 | in 506 | let new_var_ver = 507 | VarVerMap.mapi 508 | (fun v ver -> 509 | if VarSet.mem v unival_fn_exp_vars || VarSet.mem v unival_exp_vars 510 | then ver + 1 511 | else ver) 512 | !var_ver 513 | in 514 | List.iter2 515 | (fun old_ver new_ver -> 516 | causal_map := CausalMap.add new_ver [ old_ver ] !causal_map) 517 | exp_vars_with_ver exp_vars_with_new_ver; 518 | let pred_record = call_record lv lval 0 loc in 519 | let records = 520 | VarMap.fold 521 | (fun vname vi rs -> 522 | (* for debugging *) 523 | (* print_endline "b"; *) 524 | let unival_fn_vname = 525 | "UNIVAL_" ^ f.Cil.svar.vname ^ "_" ^ vname 526 | in 527 | let unival_vname = "UNIVAL_" ^ vname in 528 | call_record (Cil.Var vi, Cil.NoOffset) vname 529 | (if VarMap.mem unival_fn_vname new_var_ver then 530 | VarMap.find unival_fn_vname new_var_ver 531 | else VarMap.find unival_vname new_var_ver) 532 | loc 533 | @ rs) 534 | exp_vars [] 535 | in 536 | var_ver := new_var_ver; 537 | result @ (instr :: (pred_record @ records))) 538 | else 539 | let unival_fn_lval = "UNIVAL_" ^ f.Cil.svar.vname ^ "_" ^ lval in 540 | let unival_lval = "UNIVAL_" ^ lval in 541 | let new_var_ver = 542 | if VarVerMap.mem unival_fn_lval !var_ver then 543 | VarVerMap.update unival_fn_lval 544 | (fun ver -> Some (Option.get ver + 1)) 545 | !var_ver 546 | else if VarVerMap.mem unival_lval !var_ver then 547 | VarVerMap.update unival_lval 548 | (fun ver -> Some (Option.get ver + 1)) 549 | !var_ver 550 | else VarVerMap.add unival_fn_lval 0 !var_ver 551 | in 552 | let exp_vars_with_ver = 553 | VarMap.fold 554 | (fun ev _ vs -> 555 | (* for debugging *) 556 | (* print_endline ev; *) 557 | let unival_fn_ev = "UNIVAL_" ^ f.Cil.svar.vname ^ "_" ^ ev in 558 | let unival_ev = "UNIVAL_" ^ ev in 559 | if VarVerMap.mem unival_fn_ev !var_ver then 560 | let ver = VarVerMap.find unival_fn_ev !var_ver in 561 | (unival_fn_ev ^ "_" ^ string_of_int ver) :: vs 562 | else if VarVerMap.mem unival_ev !var_ver then 563 | let ver = VarVerMap.find unival_ev !var_ver in 564 | (unival_ev ^ "_" ^ string_of_int ver) :: vs 565 | else ( 566 | print_endline unival_fn_ev; 567 | raise (Failure "Not_Found_Var"))) 568 | exp_vars [] 569 | in 570 | (* for debugging *) 571 | (* print_endline "d"; *) 572 | let final_lval, ver_of_lval = 573 | if VarVerMap.mem unival_fn_lval new_var_ver then 574 | (unival_fn_lval, VarVerMap.find unival_fn_lval new_var_ver) 575 | else (unival_lval, VarVerMap.find unival_lval new_var_ver) 576 | in 577 | let lval_with_ver = final_lval ^ "_" ^ string_of_int ver_of_lval in 578 | causal_map := CausalMap.add lval_with_ver exp_vars_with_ver !causal_map; 579 | let lv_record = call_record lv lval ver_of_lval loc in 580 | var_ver := new_var_ver; 581 | result @ (instr :: lv_record) 582 | in 583 | object 584 | inherit Cil.nopCilVisitor 585 | 586 | method! vstmt s = 587 | match s.Cil.skind with 588 | | Instr is -> 589 | s.Cil.skind <- Instr (List.fold_left ass2gsa [] is); 590 | DoChildren 591 | | _ -> DoChildren 592 | end 593 | 594 | class funAssignVisitor (printf, flush, stream) faulty_func_list = 595 | object 596 | inherit Cil.nopCilVisitor 597 | 598 | method! vglob g = 599 | let loc = Cil.get_globalLoc g in 600 | if String.starts_with ~prefix:"/usr" loc.file then SkipChildren 601 | else DoChildren 602 | 603 | method! vfunc f = 604 | if 605 | String.length f.svar.vname >= 6 606 | && String.equal (String.sub f.svar.vname 0 6) "unival" 607 | || List.length faulty_func_list > 0 608 | && not (List.mem f.svar.vname faulty_func_list) 609 | then Cil.SkipChildren 610 | else ( 611 | List.iter 612 | (fun form -> 613 | var_ver := 614 | VarVerMap.add 615 | ("UNIVAL_" ^ f.Cil.svar.vname ^ "_" ^ form.Cil.vname) 616 | 0 !var_ver) 617 | f.Cil.sformals; 618 | List.iter 619 | (fun local -> 620 | var_ver := 621 | VarVerMap.add 622 | ("UNIVAL_" ^ f.Cil.svar.vname ^ "_" ^ local.Cil.vname) 623 | 0 !var_ver) 624 | f.Cil.slocals; 625 | ChangeTo 626 | (Cil.visitCilFunction 627 | (new assignVisitor (printf, flush, stream) f) 628 | f)) 629 | end 630 | 631 | let extract_gvar globals = 632 | List.filter_map 633 | (fun g -> 634 | match g with 635 | | Cil.GVarDecl (vi, _) | Cil.GVar (vi, _, _) -> Some vi.Cil.vname 636 | | _ -> None) 637 | globals 638 | 639 | let gsa_gen ?(faulty_func_list = []) work_dir origin_file_opt pt_file = 640 | Cil.resetCIL (); 641 | Cil.insertImplicitCasts := false; 642 | let cil_opt = 643 | try Some (Frontc.parse pt_file ()) with Frontc.ParseError _ -> None 644 | in 645 | if Option.is_none cil_opt then () 646 | else 647 | let cil = Option.get cil_opt in 648 | let origin_file_cand = Filename.remove_extension pt_file ^ ".c" in 649 | let origin_file = 650 | if Sys.file_exists origin_file_cand then origin_file_cand 651 | else if Option.is_some origin_file_opt then Option.get origin_file_opt 652 | else ( 653 | prerr_endline origin_file_cand; 654 | Utils.find_file (Filename.basename origin_file_cand) work_dir 655 | |> List.hd) 656 | in 657 | Logging.log "GSA_Gen %s (%s)" origin_file pt_file; 658 | Cil.visitCilFile (new findTypeVisitor "_IO_FILE") cil; 659 | Cil.visitCilFile (new findGVarVisitor "stderr") cil; 660 | if Option.is_none !found_type || Option.is_none !found_gvar then () 661 | else 662 | let fileptr = Cil.TPtr (Cil.TComp (Option.get !found_type, []), []) in 663 | let printf = 664 | Cil.findOrCreateFunc cil "fprintf" 665 | (Cil.TFun 666 | ( Cil.voidType, 667 | Some 668 | [ ("stream", fileptr, []); ("format", Cil.charPtrType, []) ], 669 | true, 670 | [] )) 671 | in 672 | let flush = 673 | Cil.findOrCreateFunc cil "fflush" 674 | (Cil.TFun (Cil.voidType, Some [ ("stream", fileptr, []) ], false, [])) 675 | in 676 | let stream = Cil.makeGlobalVar "__inst_stream" fileptr in 677 | cil.Cil.globals <- Cil.GVarDecl (stream, Cil.locUnknown) :: cil.globals; 678 | let global_vars = extract_gvar cil.Cil.globals in 679 | var_ver := 680 | List.fold_left 681 | (fun vv gv -> VarVerMap.add ("UNIVAL_" ^ gv) 0 vv) 682 | VarVerMap.empty global_vars; 683 | Cil.visitCilFile 684 | (new funAssignVisitor (printf, flush, stream) faulty_func_list) 685 | cil; 686 | Unix.system 687 | ("cp " ^ origin_file ^ " " 688 | ^ Filename.remove_extension origin_file 689 | ^ ".origin.c") 690 | |> ignore; 691 | (if List.mem (Filename.basename origin_file) [ "proc_open.c"; "cast.c" ] 692 | then () 693 | else 694 | let oc = open_out (Filename.remove_extension origin_file ^ ".c") in 695 | Cil.dumpFile !Cil.printerForMaincil oc "" cil; 696 | close_out oc); 697 | if 698 | List.mem 699 | (Filename.basename origin_file) 700 | [ 701 | "gzip.c"; 702 | "tif_unix.c"; 703 | "http_auth.c"; 704 | "main.c"; 705 | "version.c"; 706 | "grep.c"; 707 | "readelf.c"; 708 | "core_shntool.c"; 709 | "sed.c"; 710 | "tar.c"; 711 | ] 712 | then append_constructor work_dir origin_file "output" 713 | 714 | let print_cm work_dir causal_map = 715 | let output_file = Filename.concat work_dir "CausalMap.txt" in 716 | let oc = open_out output_file in 717 | let cm_str = 718 | Utils.join 719 | (CausalMap.fold 720 | (fun var parents res -> Utils.join (var :: parents) "," :: res) 721 | causal_map []) 722 | "\n" 723 | in 724 | Printf.fprintf oc "%s" cm_str; 725 | close_out oc 726 | 727 | let print_fc work_dir causal_map = 728 | let output_file = Filename.concat work_dir "FaultCandidates.txt" in 729 | let oc = open_out output_file in 730 | let fc_str = 731 | Utils.join 732 | (CausalMap.fold 733 | (fun var _ res -> 734 | let var_without_ver = 735 | Utils.join 736 | (List.rev (List.tl (List.rev (String.split_on_char '_' var)))) 737 | "_" 738 | in 739 | if List.mem var_without_ver res then res 740 | else var_without_ver :: res) 741 | causal_map []) 742 | "_1\n" 743 | in 744 | Printf.fprintf oc "%s" fc_str; 745 | close_out oc 746 | 747 | let run work_dir src_dir = 748 | let faulty_func_list = 749 | if !Cmdline.faulty_func then 750 | let ff_path = Filename.concat work_dir "faulty_func.txt" in 751 | let ic = open_in ff_path in 752 | let rec read_lines ic ffs = 753 | try 754 | let line = input_line ic in 755 | read_lines ic (line :: ffs) 756 | with End_of_file -> ffs 757 | in 758 | read_lines ic [] 759 | else [] 760 | in 761 | Utils.traverse_pp_file 762 | (fun pp_file -> 763 | let origin_file_opt = Utils.find_origin_file_opt pp_file in 764 | pp_file 765 | |> predicate_transform ~faulty_func_list 766 | |> gsa_gen ~faulty_func_list work_dir origin_file_opt) 767 | src_dir; 768 | Utils.remove_temp_files src_dir; 769 | print_cm work_dir !causal_map; 770 | print_fc work_dir !causal_map 771 | end 772 | 773 | module Coverage = struct 774 | let location_of_instr = function 775 | | Cil.Set (_, _, l) | Cil.Call (_, _, _, l) | Cil.Asm (_, _, _, _, _, l) -> 776 | l 777 | 778 | let printf_of printf stream loc = 779 | Cil.Call 780 | ( None, 781 | Cil.Lval (Cil.Var printf, Cil.NoOffset), 782 | [ 783 | Cil.Lval (Cil.Var stream, Cil.NoOffset); 784 | Cil.Const (Cil.CStr "%s:%d\n"); 785 | Cil.Const (Cil.CStr loc.Cil.file); 786 | Cil.integer loc.Cil.line; 787 | ], 788 | loc ) 789 | 790 | let flush_of flush stream loc = 791 | Cil.Call 792 | ( None, 793 | Cil.Lval (Cil.Var flush, Cil.NoOffset), 794 | [ Cil.Lval (Cil.Var stream, Cil.NoOffset) ], 795 | loc ) 796 | 797 | class instrumentVisitor printf flush stream = 798 | object 799 | inherit Cil.nopCilVisitor 800 | 801 | method! vglob g = 802 | let loc = Cil.get_globalLoc g in 803 | if String.starts_with ~prefix:"/usr" loc.file then SkipChildren 804 | else DoChildren 805 | 806 | method! vfunc fd = 807 | if fd.Cil.svar.vname = "bugzoo_ctor" then SkipChildren else DoChildren 808 | 809 | method! vblock blk = 810 | let bstmts = 811 | List.fold_left 812 | (fun bstmts s -> 813 | match s.Cil.skind with 814 | | Cil.Instr insts -> 815 | let new_insts = 816 | List.fold_left 817 | (fun is i -> 818 | let loc = Cil.get_instrLoc i in 819 | let call = printf_of printf stream loc in 820 | if not !Cmdline.no_seg then 821 | let flush = flush_of flush stream loc in 822 | i :: flush :: call :: is 823 | else i :: call :: is) 824 | [] insts 825 | |> List.rev 826 | in 827 | s.skind <- Cil.Instr new_insts; 828 | s :: bstmts 829 | | _ -> 830 | let loc = Cil.get_stmtLoc s.Cil.skind in 831 | let call = 832 | printf_of printf stream loc |> Cil.mkStmtOneInstr 833 | in 834 | if not !Cmdline.no_seg then 835 | let flush = 836 | flush_of flush stream loc |> Cil.mkStmtOneInstr 837 | in 838 | s :: flush :: call :: bstmts 839 | else s :: call :: bstmts) 840 | [] blk.Cil.bstmts 841 | |> List.rev 842 | in 843 | blk.bstmts <- bstmts; 844 | Cil.DoChildren 845 | end 846 | 847 | let instrument work_dir origin_file_opt pt_file = 848 | Cil.resetCIL (); 849 | Cil.insertImplicitCasts := false; 850 | let cil_opt = 851 | try Some (Frontc.parse pt_file ()) with 852 | | Frontc.ParseError _ -> None 853 | | Stack_overflow -> 854 | Logging.log "%s" "Stack overflow"; 855 | None 856 | | e -> 857 | Logging.log "%s" (Printexc.to_string e); 858 | None 859 | in 860 | if Option.is_none cil_opt then () 861 | else 862 | let cil = Option.get cil_opt in 863 | let origin_file_cand = Filename.remove_extension pt_file ^ ".c" in 864 | let origin_file = 865 | if Sys.file_exists origin_file_cand then origin_file_cand 866 | else Option.get origin_file_opt 867 | in 868 | Logging.log "Instrument Coverage %s (%s)" origin_file pt_file; 869 | (* TODO: clean up *) 870 | Cil.visitCilFile (new findTypeVisitor "_IO_FILE") cil; 871 | Cil.visitCilFile (new findGVarVisitor "stderr") cil; 872 | if Option.is_none !found_type || Option.is_none !found_gvar then () 873 | else 874 | let fileptr = Cil.TPtr (Cil.TComp (Option.get !found_type, []), []) in 875 | let printf = 876 | Cil.findOrCreateFunc cil "fprintf" 877 | (Cil.TFun 878 | ( Cil.voidType, 879 | Some 880 | [ ("stream", fileptr, []); ("format", Cil.charPtrType, []) ], 881 | true, 882 | [] )) 883 | in 884 | let flush = 885 | Cil.findOrCreateFunc cil "fflush" 886 | (Cil.TFun (Cil.voidType, Some [ ("stream", fileptr, []) ], false, [])) 887 | in 888 | let stream = Cil.makeGlobalVar "__inst_stream" fileptr in 889 | cil.globals <- Cil.GVarDecl (stream, Cil.locUnknown) :: cil.globals; 890 | Cil.visitCilFile (new instrumentVisitor printf flush stream) cil; 891 | Unix.system 892 | ("cp " ^ origin_file ^ " " 893 | ^ Filename.remove_extension origin_file 894 | ^ ".origin.c") 895 | |> ignore; 896 | (if List.mem (Filename.basename origin_file) [ "proc_open.c"; "cast.c" ] 897 | then () 898 | else 899 | let oc = open_out origin_file in 900 | Cil.dumpFile !Cil.printerForMaincil oc "" cil; 901 | close_out oc); 902 | if 903 | List.mem 904 | (Unix.realpath origin_file) 905 | [ 906 | "/experiment/src/gzip.c"; 907 | "/experiment/src/libtiff/tif_unix.c"; 908 | "/experiment/src/src/http_auth.c"; 909 | "/experiment/src/main/main.c"; 910 | "/experiment/src/version.c"; 911 | ] 912 | then append_constructor work_dir origin_file "coverage" 913 | 914 | let run work_dir src_dir = 915 | Utils.traverse_pp_file 916 | (fun pp_file -> 917 | let origin_file_opt = Utils.find_origin_file_opt pp_file in 918 | instrument work_dir origin_file_opt pp_file) 919 | src_dir 920 | end 921 | 922 | let run work_dir = 923 | Cil.initCIL (); 924 | Cil.insertImplicitCasts := false; 925 | let src_dir = Filename.concat work_dir "src" in 926 | match !Cmdline.instrument with 927 | | Cmdline.DfSan -> DfSan.run work_dir src_dir 928 | | Cmdline.GSA -> GSA.run work_dir src_dir 929 | | Cmdline.Coverage -> Coverage.run work_dir src_dir 930 | | Cmdline.Nothing -> () 931 | -------------------------------------------------------------------------------- /src/localizer.ml: -------------------------------------------------------------------------------- 1 | module F = Format 2 | module LineCoverage = Coverage.LineCoverage 3 | module LineCoverageInst = Coverage.LineCoverage2 4 | 5 | module BugLocation = struct 6 | type t = Cil.location * float * float * float * int 7 | 8 | let pp fmt (l, score_neg, score_pos, score, score_time) = 9 | F.fprintf fmt "%s:%d\t%f %f %f %d" l.Cil.file l.Cil.line score_neg score_pos 10 | score score_time 11 | 12 | let pp_cov fmt (l, score_neg, score_pos, score, _score_time) = 13 | F.fprintf fmt "%s:%d,%d,%d,%f" 14 | (l.Cil.file |> Filename.basename) 15 | l.Cil.line (int_of_float score_pos) (int_of_float score_neg) score 16 | 17 | let pp_file fmt file = F.fprintf fmt "%s" file 18 | end 19 | 20 | let print_file bic_locations parent_locations resultname = 21 | let locations = 22 | List.fold_left 23 | (fun acc (l, s1, _, _, _) -> 24 | if List.mem (l.Cil.file |> Filename.basename) acc || s1 = 0. then acc 25 | else (l.Cil.file |> Filename.basename) :: acc) 26 | [] bic_locations 27 | in 28 | let locations = 29 | List.fold_left 30 | (fun acc (l, s1, _, _, _) -> 31 | if List.mem (l.Cil.file |> Filename.basename) acc || s1 = 0. then acc 32 | else (l.Cil.file |> Filename.basename) :: acc) 33 | locations parent_locations 34 | in 35 | let oc3 = Filename.concat !Cmdline.out_dir resultname |> open_out in 36 | let fmt3 = F.formatter_of_out_channel oc3 in 37 | List.iter (fun l -> F.fprintf fmt3 "%a\n" BugLocation.pp_file l) locations; 38 | close_out oc3 39 | 40 | let print_coverage locations resultname = 41 | let oc2 = Filename.concat !Cmdline.out_dir resultname |> open_out in 42 | let fmt2 = F.formatter_of_out_channel oc2 in 43 | List.iter (fun l -> F.fprintf fmt2 "%a\n" BugLocation.pp_cov l) locations; 44 | close_out oc2; 45 | locations 46 | 47 | let print locations resultname = 48 | let oc = Filename.concat !Cmdline.out_dir resultname |> open_out in 49 | let fmt = F.formatter_of_out_channel oc in 50 | List.iter (fun l -> F.fprintf fmt "%a\n" BugLocation.pp l) locations; 51 | close_out oc 52 | 53 | let copy_src () = 54 | Unix.create_process "cp" 55 | [| "cp"; "-rf"; "src"; !Cmdline.out_dir |] 56 | Unix.stdin Unix.stdout Unix.stderr 57 | |> ignore; 58 | 59 | match Unix.wait () |> snd with 60 | | Unix.WEXITED 0 -> () 61 | | Unix.WEXITED n -> 62 | () (*failwith ("Error " ^ string_of_int n ^ ": copy failed")*) 63 | | _ -> () 64 | (*failwith "copy failed"*) 65 | 66 | let dummy_localizer work_dir bug_desc = 67 | let coverage = LineCoverage.run work_dir bug_desc in 68 | Logging.log "Coverage: %a" LineCoverage.pp coverage; 69 | copy_src (); 70 | List.fold_left 71 | (fun locs elem -> 72 | Coverage.StrMap.fold 73 | (fun file lines locs -> 74 | let new_locs = 75 | List.map 76 | (fun line -> ({ Cil.file; line; byte = 0 }, 0.0, 0.0, 0.0, 0)) 77 | lines 78 | in 79 | locs @ new_locs) 80 | elem.LineCoverage.coverage locs) 81 | [] coverage 82 | 83 | let spec_localizer work_dir bug_desc localizer_list = 84 | let coverage = 85 | if !Cmdline.gcov then LineCoverage.run work_dir bug_desc 86 | else LineCoverageInst.run work_dir bug_desc 87 | in 88 | Logging.log "Coverage: %a" LineCoverage.pp coverage; 89 | copy_src (); 90 | let table = Hashtbl.create 99999 in 91 | List.fold_left 92 | (fun locs (elem : LineCoverage.elem) -> 93 | let regexp_pos = Str.regexp "p.*" in 94 | Coverage.StrMap.fold 95 | (fun file lines locs -> 96 | let new_locs = 97 | if Str.string_match regexp_pos elem.LineCoverage.test 0 then 98 | List.rev_map 99 | (fun line -> ({ Cil.file; line; byte = 0 }, 0.0, 1.0, 0.0, 0)) 100 | lines 101 | else 102 | List.rev_map 103 | (fun line -> 104 | ( { Cil.file; line; byte = 0 }, 105 | 1.0, 106 | 0.0, 107 | 0.0, 108 | (*List.find 109 | (fun (x, y) -> x = line) 110 | elem.LineCoverage.linehistory 111 | |> snd *) 112 | 0 )) 113 | lines 114 | in 115 | List.rev_append new_locs locs) 116 | elem.LineCoverage.coverage locs) 117 | [] coverage 118 | |> List.iter (fun (l, s1, s2, s3, s4) -> 119 | match Hashtbl.find_opt table l with 120 | | Some (new_s1, new_s2, new_s3, new_s4) -> 121 | Hashtbl.replace table l 122 | (s1 +. new_s1, s2 +. new_s2, s3 +. new_s3, s4 + new_s4) 123 | | _ -> Hashtbl.add table l (s1, s2, s3, s4)); 124 | if bug_desc.BugDesc.program = "php" then ( 125 | Unix.create_process "sudo" 126 | [| "sudo"; "rm"; "-rf"; "/experiment/src/test/bad" |] 127 | Unix.stdin Unix.stdout Unix.stderr 128 | |> ignore; 129 | match Unix.wait () |> snd with 130 | | Unix.WEXITED 0 -> () 131 | | Unix.WEXITED n -> failwith ("Error " ^ string_of_int n ^ ": rm bad failed") 132 | | _ -> failwith "rm bad failed"); 133 | 134 | let spec_coverage = 135 | List.map 136 | (fun (l, (s1, s2, s3, s4)) -> (l, s1, s2, s3, s4)) 137 | (List.of_seq (Hashtbl.to_seq table)) 138 | in 139 | match localizer_list with 140 | | (v, _) :: _ -> v work_dir bug_desc spec_coverage 141 | | _ -> spec_coverage 142 | 143 | let prophet_localizer _work_dir _bug_desc locations = 144 | List.stable_sort 145 | (fun (_, s11, s12, _, s14) (_, s21, s22, _, s24) -> 146 | if s21 -. s11 <> 0. then int_of_float (s21 -. s11) 147 | else if s12 -. s22 <> 0. then int_of_float (s12 -. s22) 148 | else s24 - s14) 149 | locations 150 | 151 | let tarantula_localizer _work_dir bug_desc locations = 152 | let test_cases = bug_desc.BugDesc.test_cases in 153 | let pos_num = 154 | List.fold_left 155 | (fun acc t -> 156 | let regexp_pos = Str.regexp "p.*" in 157 | if Str.string_match regexp_pos t 0 then acc + 1 else acc) 158 | 0 test_cases 159 | in 160 | let neg_num = 161 | List.fold_left 162 | (fun acc t -> 163 | let regexp_neg = Str.regexp "n.*" in 164 | if Str.string_match regexp_neg t 0 then acc + 1 else acc) 165 | 0 test_cases 166 | in 167 | let taran_loc = 168 | List.map 169 | (fun (l, s1, s2, _, _) -> 170 | let nep = s2 in 171 | let nnp = float_of_int pos_num -. s2 in 172 | let nef = s1 in 173 | let nnf = float_of_int neg_num -. s1 in 174 | let numer = nef /. (nef +. nnf) in 175 | let denom1 = nef /. (nef +. nnf) in 176 | let denom2 = nep /. (nep +. nnp) in 177 | let score = numer /. (denom1 +. denom2) in 178 | (l, s1, s2, score, 0)) 179 | locations 180 | in 181 | List.stable_sort 182 | (fun (_, _, _, s13, _) (_, _, _, s23, _) -> 183 | if s23 > s13 then 1 else if s23 = s13 then 0 else -1) 184 | taran_loc 185 | 186 | let ochiai_localizer _work_dir bug_desc locations = 187 | let test_cases = bug_desc.BugDesc.test_cases in 188 | let pos_num = 189 | List.fold_left 190 | (fun acc t -> 191 | let regexp_pos = Str.regexp "p.*" in 192 | if Str.string_match regexp_pos t 0 then acc + 1 else acc) 193 | 0 test_cases 194 | in 195 | let neg_num = 196 | List.fold_left 197 | (fun acc t -> 198 | let regexp_neg = Str.regexp "n.*" in 199 | if Str.string_match regexp_neg t 0 then acc + 1 else acc) 200 | 0 test_cases 201 | in 202 | let ochiai_loc = 203 | List.map 204 | (fun (l, s1, s2, _, _) -> 205 | let nep = s2 in 206 | let _nnp = float_of_int pos_num -. s2 in 207 | let nef = s1 in 208 | let nnf = float_of_int neg_num -. s1 in 209 | let sub_denom1 = nef +. nnf in 210 | let sub_denom2 = nef +. nep in 211 | let denom = sqrt (sub_denom1 *. sub_denom2) in 212 | let score = nef /. denom in 213 | (l, s1, s2, score, 0)) 214 | locations 215 | in 216 | List.stable_sort 217 | (fun (_, _, _, s13, _) (_, _, _, s23, _) -> 218 | if s23 > s13 then 1 else if s23 = s13 then 0 else -1) 219 | ochiai_loc 220 | 221 | let jaccard_localizer _work_dir bug_desc locations = 222 | let test_cases = bug_desc.BugDesc.test_cases in 223 | let pos_num = 224 | List.fold_left 225 | (fun acc t -> 226 | let regexp_pos = Str.regexp "p.*" in 227 | if Str.string_match regexp_pos t 0 then acc + 1 else acc) 228 | 0 test_cases 229 | in 230 | let neg_num = 231 | List.fold_left 232 | (fun acc t -> 233 | let regexp_neg = Str.regexp "n.*" in 234 | if Str.string_match regexp_neg t 0 then acc + 1 else acc) 235 | 0 test_cases 236 | in 237 | let jaccard_loc = 238 | List.map 239 | (fun (l, s1, s2, _, _) -> 240 | let nep = s2 in 241 | let _nnp = float_of_int pos_num -. s2 in 242 | let nef = s1 in 243 | let nnf = float_of_int neg_num -. s1 in 244 | let denom = nef +. nnf +. nep in 245 | let score = nef /. denom in 246 | (l, s1, s2, score, 0)) 247 | locations 248 | in 249 | List.stable_sort 250 | (fun (_, _, _, s13, _) (_, _, _, s23, _) -> 251 | if s23 > s13 then 1 else if s23 = s13 then 0 else -1) 252 | jaccard_loc 253 | 254 | let diff_localizer work_dir bug_desc localizer_list = 255 | Unix.chdir "/experiment/src"; 256 | Unix.create_process "make" [| "make"; "clean" |] Unix.stdin Unix.stdout 257 | Unix.stderr 258 | |> ignore; 259 | (match Unix.wait () |> snd with 260 | | Unix.WEXITED 0 -> () 261 | | Unix.WEXITED n -> 262 | failwith ("Error " ^ string_of_int n ^ ": make clean failed test") 263 | | _ -> failwith "make clean failed"); 264 | Unix.create_process "make" [| "make"; "distclean" |] Unix.stdin Unix.stdout 265 | Unix.stderr 266 | |> ignore; 267 | (match Unix.wait () |> snd with 268 | | Unix.WEXITED 0 -> () 269 | | Unix.WEXITED n -> 270 | failwith ("Error " ^ string_of_int n ^ ": make distclean failed") 271 | | _ -> failwith "make distclean failed"); 272 | 273 | Unix.chdir "/experiment"; 274 | Unix.create_process "cp" 275 | [| "cp"; "-rf"; "src"; "bic" |] 276 | Unix.stdin Unix.stdout Unix.stderr 277 | |> ignore; 278 | (match Unix.wait () |> snd with 279 | | Unix.WEXITED 0 -> () 280 | | Unix.WEXITED n -> failwith ("Error " ^ string_of_int n ^ ": cp failed") 281 | | _ -> failwith "cp failed"); 282 | 283 | Unix.chdir "/experiment/src"; 284 | Unix.create_process "./configure" [| "./configure" |] Unix.stdin Unix.stdout 285 | Unix.stderr 286 | |> ignore; 287 | (match Unix.wait () |> snd with 288 | | Unix.WEXITED 0 -> () 289 | | Unix.WEXITED n -> 290 | failwith ("Error " ^ string_of_int n ^ ": configure failed") 291 | | _ -> failwith "configure failed"); 292 | 293 | Unix.chdir "/experiment"; 294 | 295 | (*let bic_locations = spec_localizer work_dir bug_desc () in*) 296 | let table = Hashtbl.create 99999 in 297 | let table_parent = Hashtbl.create 99999 in 298 | 299 | spec_localizer work_dir bug_desc [] 300 | |> List.iter (fun (l, s1, s2, s3, s4) -> 301 | match Hashtbl.find_opt table l with 302 | | Some (new_s1, new_s2, new_s3, new_s4) -> 303 | Hashtbl.replace table l 304 | (s1 +. new_s1, s2 +. new_s2, s3 +. new_s3, s4 + new_s4) 305 | | _ -> Hashtbl.add table l (s1, s2, s3, s4)); 306 | 307 | Unix.chdir "/experiment"; 308 | Unix.create_process "./parent_checkout.sh" 309 | [| "./parent_checkout.sh" |] 310 | Unix.stdin Unix.stdout Unix.stderr 311 | |> ignore; 312 | (match Unix.wait () |> snd with 313 | | Unix.WEXITED 0 -> () 314 | | Unix.WEXITED n -> 315 | failwith ("Error " ^ string_of_int n ^ ": parent script failed test2") 316 | | _ -> failwith "parent script failed"); 317 | 318 | Unix.chdir "/experiment/src"; 319 | Unix.create_process "./configure" [| "./configure" |] Unix.stdin Unix.stdout 320 | Unix.stderr 321 | |> ignore; 322 | (match Unix.wait () |> snd with 323 | | Unix.WEXITED 0 -> () 324 | | Unix.WEXITED n -> 325 | failwith ("Error " ^ string_of_int n ^ ": configure failed") 326 | | _ -> failwith "configure failed"); 327 | 328 | Unix.chdir "/experiment"; 329 | spec_localizer work_dir bug_desc [] 330 | |> List.iter (fun (l, s1, s2, s3, s4) -> 331 | match Hashtbl.find_opt table_parent l with 332 | | Some (new_s1, new_s2, new_s3, new_s4) -> 333 | Hashtbl.replace table_parent l 334 | (s1 +. new_s1, s2 +. new_s2, s3 +. new_s3, s4 + new_s4) 335 | | _ -> Hashtbl.add table_parent l (s1, s2, s3, s4)); 336 | 337 | Unix.chdir "/experiment"; 338 | (* 339 | let open Yojson.Basic.Util in 340 | let json = Yojson.Basic.from_file "line_matching.json" in 341 | let changed_file = json |> member "changed_files" |> to_assoc in 342 | let unchanged_file = 343 | json |> member "unchanged_files" |> to_list 344 | |> List.map (fun a -> a |> to_string) 345 | in 346 | *) 347 | let bic_result = 348 | List.map 349 | (fun (l, (s1, s2, s3, s4)) -> (l, s1, s2, s3, s4)) 350 | (List.of_seq (Hashtbl.to_seq table)) 351 | in 352 | 353 | List.iter 354 | (fun (localizer, engine_name) -> 355 | "coverage_" ^ engine_name ^ "_bic.txt" 356 | |> (bic_result |> localizer work_dir bug_desc |> print_coverage) 357 | |> ignore) 358 | localizer_list; 359 | 360 | (* 361 | let parent_result = 362 | List.map 363 | (fun (l, (s1, s2, s3, s4)) -> (l, s1, s2, s3, s4)) 364 | (List.of_seq (Hashtbl.to_seq table_parent)) 365 | in 366 | *) 367 | "coverage_file.txt" 368 | |> ("coverage_parent.txt" 369 | |> (List.map 370 | (fun (l, (s1, s2, s3, s4)) -> (l, s1, s2, s3, s4)) 371 | (List.of_seq (Hashtbl.to_seq table_parent)) 372 | |> print_coverage) 373 | |> print_file bic_result); 374 | [] 375 | (* 376 | List.iter 377 | (fun (l, s1, s2, s3, s4) -> 378 | let new_l = 379 | if List.mem l.Cil.file unchanged_file then Some l 380 | else 381 | match List.assoc_opt l.Cil.file changed_file with 382 | | Some v 383 | when l.Cil.line - 1 < List.length (v |> to_list) 384 | && List.nth (v |> to_list) (l.Cil.line - 1) |> to_int <> 0 -> 385 | Some 386 | { 387 | Cil.file = l.Cil.file; 388 | line = List.nth (v |> to_list) (l.Cil.line - 1) |> to_int; 389 | byte = 0; 390 | } 391 | | _ -> None 392 | in 393 | if new_l <> None then 394 | let l = Option.get new_l in 395 | match Hashtbl.find_opt table l with 396 | | Some (new_s1, new_s2, new_s3, new_s4) -> 397 | Hashtbl.replace table l 398 | (new_s1, s1 +. s2 +. new_s2, s3 +. new_s3, s4 + new_s4) 399 | | _ -> Hashtbl.add table l (0., s1 +. s2, s3, s4)) 400 | parent_result; 401 | 402 | "coverage_diff.txt" 403 | |> (List.map 404 | (fun (l, (s1, s2, s3, s4)) -> (l, s1, s2, s3, s4)) 405 | (List.of_seq (Hashtbl.to_seq table)) 406 | |> print_coverage) 407 | *) 408 | 409 | let unival_compile scenario bug_desc = 410 | Unix.chdir scenario.Scenario.work_dir; 411 | if not !Cmdline.skip_compile then Logging.log "Start compile"; 412 | Scenario.compile scenario bug_desc.BugDesc.compiler_type; 413 | Unix.chdir scenario.Scenario.work_dir 414 | 415 | let unival_run_test scenario bug_desc = 416 | Logging.log "Start test"; 417 | let text_file_name = 418 | Filename.concat scenario.Scenario.work_dir "output.txt" 419 | in 420 | List.iter 421 | (fun test -> 422 | let oc = open_out_gen [ Open_append; Open_creat ] 0o775 text_file_name in 423 | Printf.fprintf oc "*** new execution ***,%s,%d\n" test 424 | (if String.get test 0 = 'p' then 0 else 1); 425 | close_out oc; 426 | Unix.create_process scenario.Scenario.test_script 427 | [| scenario.Scenario.test_script; test |] 428 | Unix.stdin Unix.stdout 429 | (* (Unix.openfile 430 | (Filename.concat work_dir "output.txt") 431 | [ Unix.O_CREAT; Unix.O_WRONLY; Unix.O_CREAT ] 432 | 0o775) *) 433 | Unix.stderr 434 | |> ignore; 435 | Unix.wait () |> ignore; 436 | Logging.log "End test %s" test) 437 | bug_desc.BugDesc.test_cases 438 | 439 | let unival_localizer work_dir bug_desc = 440 | let scenario = Scenario.init work_dir in 441 | unival_compile scenario bug_desc; 442 | Instrument.run scenario.work_dir; 443 | unival_compile scenario bug_desc; 444 | unival_run_test scenario bug_desc; 445 | List.iter 446 | (fun filename -> 447 | Unix.create_process "cp" 448 | [| 449 | "cp"; 450 | "-rf"; 451 | Filename.concat scenario.work_dir filename; 452 | Filename.concat scenario.work_dir "localizer-out/"; 453 | |] 454 | Unix.stdin Unix.stdout Unix.stderr 455 | |> ignore; 456 | Unix.wait () |> ignore) 457 | [ "output.txt"; "CausalMap.txt"; "FaultCandidates.txt"; "src/" ] 458 | 459 | let coverage work_dir bug_desc = 460 | let scenario = Scenario.init ~stdio_only:true work_dir in 461 | Unix.chdir scenario.Scenario.work_dir; 462 | Scenario.compile scenario bug_desc.BugDesc.compiler_type; 463 | Instrument.run scenario.work_dir; 464 | Unix.chdir scenario.Scenario.work_dir; 465 | Scenario.compile scenario bug_desc.BugDesc.compiler_type 466 | 467 | let run work_dir = 468 | Logging.log "Start localization"; 469 | let bug_desc = BugDesc.read work_dir in 470 | Logging.log "Bug desc: %a" BugDesc.pp bug_desc; 471 | let localizer = if !Cmdline.bic then diff_localizer else spec_localizer in 472 | match !Cmdline.engine with 473 | | Cmdline.Dummy -> 474 | "result_dummy.txt" |> (dummy_localizer work_dir bug_desc |> print) 475 | | Cmdline.Tarantula -> 476 | localizer work_dir bug_desc [ (tarantula_localizer, "tarantula") ] 477 | |> Fun.flip print "result_tarantula.txt" 478 | | Cmdline.Prophet -> 479 | localizer work_dir bug_desc [ (prophet_localizer, "prophet") ] 480 | |> Fun.flip print "result_prophet.txt" 481 | | Cmdline.Jaccard -> 482 | localizer work_dir bug_desc [ (jaccard_localizer, "jaccard") ] 483 | |> Fun.flip print "result_jaccard.txt" 484 | | Cmdline.Ochiai -> 485 | localizer work_dir bug_desc [ (ochiai_localizer, "ochiai") ] 486 | |> Fun.flip print "result_ochiai.txt" 487 | | Cmdline.UniVal -> unival_localizer work_dir bug_desc 488 | | Cmdline.All -> 489 | localizer work_dir bug_desc 490 | [ 491 | (prophet_localizer, "prophet"); 492 | (tarantula_localizer, "tarantula"); 493 | (jaccard_localizer, "jaccard"); 494 | (ochiai_localizer, "ochiai"); 495 | ] 496 | |> ignore 497 | | Cmdline.Coverage -> coverage work_dir bug_desc 498 | -------------------------------------------------------------------------------- /src/logging.ml: -------------------------------------------------------------------------------- 1 | module F = Format 2 | module P = Printf 3 | 4 | let log_file : out_channel option ref = ref None 5 | 6 | let log_formatter = ref None 7 | 8 | let string_of_current_time () = 9 | Unix.time () |> Unix.localtime |> fun tm -> 10 | P.sprintf "%d%02d%02d-%02d:%02d:%02d" (1900 + tm.tm_year) (tm.tm_mon + 1) 11 | tm.tm_mday tm.tm_hour tm.tm_min tm.tm_sec 12 | 13 | let log fmt = 14 | match !log_formatter with 15 | | Some log_formatter -> 16 | F.fprintf log_formatter "[%s] " (string_of_current_time ()); 17 | F.kfprintf 18 | (fun log_formatter -> 19 | F.fprintf log_formatter "\n"; 20 | F.pp_print_flush log_formatter ()) 21 | log_formatter fmt 22 | | None -> failwith "Cannot open logfile" 23 | -------------------------------------------------------------------------------- /src/main.ml: -------------------------------------------------------------------------------- 1 | module F = Format 2 | 3 | let initialize work_dir = 4 | let out_dir = Filename.concat work_dir !Cmdline.out_dir in 5 | (try Unix.mkdir out_dir 0o775 with Unix.Unix_error (Unix.EEXIST, _, _) -> ()); 6 | let cov_dir = Filename.concat work_dir "coverage_data" in 7 | (try Unix.mkdir cov_dir 0o775 with Unix.Unix_error (Unix.EEXIST, _, _) -> ()); 8 | let cov_tmp_dir = Filename.concat cov_dir "tmp" in 9 | (try Unix.mkdir cov_tmp_dir 0o775 10 | with Unix.Unix_error (Unix.EEXIST, _, _) -> ()); 11 | print_endline ("Logging to " ^ out_dir); 12 | Logging.log_file := 13 | Filename.concat out_dir "log.txt" |> open_out |> Option.some; 14 | Logging.log_formatter := 15 | Option.map F.formatter_of_out_channel !Logging.log_file 16 | 17 | let main () = 18 | let usageMsg = "Usage: localizer [options] [work dir]" in 19 | Arg.parse Cmdline.options Cmdline.parse_arg usageMsg; 20 | match !Cmdline.work_dir with 21 | | None -> 22 | prerr_endline "Error: No work directory is given"; 23 | exit 1 24 | | Some work_dir -> 25 | initialize work_dir; 26 | Localizer.run work_dir 27 | 28 | let _ = main () 29 | -------------------------------------------------------------------------------- /src/scenario.ml: -------------------------------------------------------------------------------- 1 | type t = { 2 | work_dir : string; 3 | compile_script : string; 4 | test_script : string; 5 | coverage_data : string; 6 | } 7 | 8 | let file_instrument filename preamble = 9 | let read_whole_file filename = 10 | let ch = open_in filename in 11 | let s = really_input_string ch (in_channel_length ch) in 12 | close_in ch; 13 | s 14 | in 15 | let c_code = read_whole_file filename in 16 | let instr_c_code = preamble ^ c_code in 17 | let oc = open_out filename in 18 | Printf.fprintf oc "%s" instr_c_code; 19 | close_out oc 20 | 21 | let file_instrument_all work_dir preamble = 22 | let rec traverse_file f root_dir = 23 | let files = Sys.readdir root_dir in 24 | Array.iter 25 | (fun file -> 26 | let file_path = Filename.concat root_dir file in 27 | if (Unix.lstat file_path).st_kind = Unix.S_LNK then () 28 | else if List.mem file !Cmdline.blacklist then () 29 | else if Sys.is_directory file_path then traverse_file f file_path 30 | else if Filename.extension file = ".c" then f file_path preamble 31 | else ()) 32 | files 33 | in 34 | traverse_file file_instrument work_dir 35 | 36 | let init ?(stdio_only = false) work_dir = 37 | let work_dir = 38 | if Filename.is_relative work_dir then 39 | Filename.concat (Unix.getcwd ()) work_dir 40 | else work_dir 41 | in 42 | { 43 | work_dir; 44 | compile_script = Filename.concat work_dir "compile.sh"; 45 | test_script = Filename.concat work_dir "test.sh"; 46 | coverage_data = Filename.concat work_dir "coverage.xml"; 47 | } 48 | 49 | let simple_compiler compile_script = 50 | Unix.create_process compile_script [| compile_script |] Unix.stdin Unix.stdout 51 | Unix.stderr 52 | |> ignore; 53 | match Unix.wait () |> snd with 54 | | Unix.WEXITED 0 -> () 55 | | Unix.WEXITED n -> 56 | failwith ("Error " ^ string_of_int n ^ ": " ^ compile_script ^ " failed") 57 | | _ -> failwith (compile_script ^ " failed") 58 | 59 | let make () = 60 | let jobs = 61 | if !Cmdline.jobs = 0 then "-j" else "-j" ^ string_of_int !Cmdline.jobs 62 | in 63 | Unix.create_process "make" [| "make"; jobs |] Unix.stdin Unix.stdout 64 | Unix.stderr 65 | |> ignore; 66 | match Unix.wait () |> snd with 67 | | Unix.WEXITED 0 -> () 68 | | Unix.WEXITED n -> failwith ("Error " ^ string_of_int n ^ ": make failed") 69 | | _ -> failwith "make failed" 70 | 71 | let configure () = 72 | Unix.create_process "./configure" 73 | [| 74 | "./configure"; 75 | "CFLAGS=--coverage -save-temps=obj -Wno-error"; 76 | "CXXFLAGS=--coverage -save-temps=obj"; 77 | "LDFLAGS=-lgcov --coverage"; 78 | |] 79 | Unix.stdin Unix.stdout Unix.stderr 80 | |> ignore; 81 | match Unix.wait () |> snd with 82 | | Unix.WEXITED 0 -> () 83 | | Unix.WEXITED n -> 84 | failwith ("Error " ^ string_of_int n ^ ": configure failed") 85 | | _ -> failwith "configure failed" 86 | 87 | let make_clean () = 88 | Unix.create_process "make" [| "make"; "clean" |] Unix.stdin Unix.stdout 89 | Unix.stderr 90 | |> ignore; 91 | match Unix.wait () |> snd with 92 | | Unix.WEXITED 0 -> () 93 | | Unix.WEXITED n -> 94 | failwith ("Error " ^ string_of_int n ^ ": make clean failed") 95 | | _ -> failwith "make clean failed" 96 | 97 | let make_distclean () = 98 | Unix.create_process "make" [| "make"; "distclean" |] Unix.stdin Unix.stdout 99 | Unix.stderr 100 | |> ignore; 101 | match Unix.wait () |> snd with 102 | | Unix.WEXITED 0 -> () 103 | | Unix.WEXITED n -> 104 | failwith ("Error " ^ string_of_int n ^ ": make distclean failed") 105 | | _ -> failwith "make distclean failed" 106 | 107 | let configure_and_make () = 108 | Unix.chdir "src"; 109 | make_clean (); 110 | make_distclean (); 111 | configure (); 112 | make () 113 | 114 | let compile scenario compiler_type = 115 | match compiler_type with 116 | | "compile" -> simple_compiler scenario.compile_script 117 | | "configure-and-make" -> configure_and_make () 118 | | _ -> failwith "Unknown compiler" 119 | 120 | let run_test test_script name = 121 | Unix.create_process test_script [| test_script; name |] Unix.stdin Unix.stdout 122 | Unix.stderr 123 | |> ignore; 124 | Unix.wait () |> ignore 125 | -------------------------------------------------------------------------------- /src/utils.ml: -------------------------------------------------------------------------------- 1 | let rec join strlist delimiter = 2 | match strlist with 3 | | [ hd ] -> hd 4 | | hd :: tl -> hd ^ delimiter ^ join tl delimiter 5 | | [] -> "" 6 | 7 | let remove_unnec_file filename = 8 | if not (Sys.file_exists (Filename.remove_extension filename ^ ".c")) then () 9 | else ( 10 | print_endline ("Remove " ^ filename); 11 | Unix.create_process "rm" [| "rm"; "-f"; filename |] Unix.stdin Unix.stdout 12 | Unix.stderr 13 | |> ignore; 14 | Unix.wait () |> ignore) 15 | 16 | let rec remove_temp_files root_dir = 17 | let files = Sys.readdir root_dir in 18 | Array.iter 19 | (fun file -> 20 | let file_path = Filename.concat root_dir file in 21 | print_endline file_path; 22 | if (Unix.lstat file_path).st_kind = Unix.S_LNK then () 23 | else if Sys.is_directory file_path then 24 | if Filename.check_suffix file_path ".hg" then () 25 | else remove_temp_files file_path 26 | else if 27 | List.mem (Filename.extension file) 28 | [ ".i"; ".lo"; ".s"; ".gcno"; ".o"; ".asm" ] 29 | then remove_unnec_file file_path 30 | else ()) 31 | files 32 | 33 | let dash2under_bar s = String.map (fun c -> if c = '-' then '_' else c) s 34 | 35 | let rec traverse_pp_file f root_dir = 36 | let files = Sys.readdir root_dir in 37 | Array.iter 38 | (fun file -> 39 | let file_path = Filename.concat root_dir file in 40 | if (Unix.lstat file_path).st_kind = Unix.S_LNK then () 41 | else if Sys.is_directory file_path then 42 | if 43 | Filename.check_suffix file_path ".libs" 44 | || Filename.check_suffix file_path ".hg" 45 | then () 46 | else traverse_pp_file f file_path 47 | else if 48 | List.mem (Filename.basename file) 49 | [ 50 | "libldtestplug_la-testplug.i"; 51 | "sysinfo.i"; 52 | "sed_sed-compile.i"; 53 | "sed_sed-regexp.i"; 54 | "sed_sed-execute.i"; 55 | "sed_sed-mbcs.i"; 56 | "sed_sed-sed.i"; 57 | "sed_sed-utils.i"; 58 | "dummy-1522.i"; 59 | ] 60 | then () 61 | else if Filename.extension file = ".i" then f file_path 62 | else ()) 63 | files 64 | 65 | let rec find_file filename root_dir = 66 | let files = Sys.readdir root_dir in 67 | Array.fold_left 68 | (fun paths file -> 69 | let file_path = Filename.concat root_dir file in 70 | if (Unix.lstat file_path).st_kind = Unix.S_LNK then paths 71 | else if Sys.is_directory file_path then 72 | if 73 | Filename.check_suffix file_path "mytest" 74 | || Filename.check_suffix file_path ".libs" 75 | then paths 76 | else paths @ find_file filename file_path 77 | else if Filename.basename file_path = Filename.basename filename then 78 | file_path :: paths 79 | else paths) 80 | [] files 81 | 82 | let find_origin_file_opt pp_file = 83 | let ic = open_in pp_file in 84 | let line = input_line ic in 85 | assert (String.starts_with ~prefix:"# 1" line); 86 | let filename = String.split_on_char '"' line |> Fun.flip List.nth 1 in 87 | if Filename.is_relative filename then 88 | try Some (Unix.realpath filename) with _ -> None 89 | else Some filename 90 | -------------------------------------------------------------------------------- /src/visualizer.ml: -------------------------------------------------------------------------------- 1 | module Node = struct 2 | include String 3 | 4 | let hash = Hashtbl.hash 5 | end 6 | 7 | module Edge = struct 8 | type t = string 9 | 10 | let compare = compare 11 | 12 | let default = "" 13 | end 14 | 15 | module G = struct 16 | include Graph.Persistent.Digraph.ConcreteBidirectionalLabeled (Node) (Edge) 17 | 18 | let default_vertex_attributes _ = [] 19 | 20 | let default_edge_attributes _ = [] 21 | 22 | let edge_attributes (_, edge, _) = 23 | if edge = "n1" then [ `Color 0xff0000 ] else [ `Color 0x000000 ] 24 | 25 | let get_subgraph _ = None 26 | 27 | let vertex_attributes _ = [ `Shape `Box ] 28 | 29 | let vertex_name v = "\"" ^ v ^ "\"" 30 | 31 | let graph_attributes _ = [] 32 | end 33 | 34 | module Graphviz = Graph.Graphviz.Dot (G) 35 | 36 | let rec draw ic name pred graph = 37 | match input_line ic with 38 | | s -> G.add_edge_e graph (pred, name, s) |> draw ic name s 39 | | exception _ -> graph 40 | 41 | let main () = 42 | Array.to_list Sys.argv |> List.tl 43 | |> List.fold_left 44 | (fun graph file -> 45 | let ic = open_in file in 46 | let graph = draw ic file "__START__" graph in 47 | close_in ic; 48 | graph) 49 | G.empty 50 | |> Graphviz.output_graph stdout 51 | 52 | let _ = main () 53 | -------------------------------------------------------------------------------- /test/simple1/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | ./compile.sh 3 | 4 | clean: 5 | rm -rf bug *.gc* *.xml 6 | -------------------------------------------------------------------------------- /test/simple1/bug_desc.json: -------------------------------------------------------------------------------- 1 | { 2 | "compiler": { 3 | "time-limit": 300, 4 | "type": "compile" 5 | }, 6 | "test-harness": { 7 | "failing": 1, 8 | "passing": 5, 9 | "time-limit": 300 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /test/simple1/compile.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cd src 3 | gcc -o bug --coverage --save-temps -lgcov -g bug.c 4 | cd .. 5 | -------------------------------------------------------------------------------- /test/simple1/src/bug.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int calc(int arg) { 6 | if (arg != 487) { 7 | return arg * arg; 8 | } else { 9 | return arg + 1; 10 | } 11 | } 12 | 13 | int main(int argc, char *argv[]) { 14 | if (argc != 2) { 15 | printf("Please, provide a positive float number."); 16 | return 1; 17 | } 18 | 19 | int arg = atoi(argv[1]); 20 | int res = calc(arg); 21 | 22 | printf("%d\n", res); 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /test/simple1/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | TEST_HOME="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 4 | BIN=$TEST_HOME/bug 5 | 6 | function run_test() { 7 | diff -q <($BIN $1) <(expr $1 \* $1) >/dev/null 8 | return $? 9 | } 10 | 11 | case $1 in 12 | p1) run_test 1 && exit 0 ;; 13 | p2) run_test 12 && exit 0 ;; 14 | p3) run_test 123 && exit 0 ;; 15 | p4) run_test 1234 && exit 0 ;; 16 | p5) run_test 12345 && exit 0 ;; 17 | n1) run_test 487 && exit 0 ;; 18 | esac 19 | exit 1 20 | -------------------------------------------------------------------------------- /unival-docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM openjdk:8-jdk 2 | MAINTAINER oojahooo 3 | 4 | WORKDIR /unival/ 5 | 6 | ARG DEBIAN_FRONTEND=noninteractive 7 | RUN apt-get update && apt-get install -y r-base 8 | 9 | RUN Rscript -e "install.packages(c(\"glmnet\", \"ranger\", \"stringdist\", \"dbscan\", \"dplyr\", \"tidyr\"))" 10 | 11 | ADD src/* /unival/ 12 | -------------------------------------------------------------------------------- /unival-docker/src/ProcessDataProfile.java: -------------------------------------------------------------------------------- 1 | import java.io.BufferedReader; 2 | import java.io.FileReader; 3 | import java.io.IOException; 4 | import java.util.HashMap; 5 | import java.util.HashSet; 6 | import java.util.Set; 7 | 8 | public class ProcessDataProfile { 9 | 10 | public static void main(String[] args) throws IOException { 11 | 12 | 13 | // Change file name to the same as the file being produced by the instrumented program 14 | String dataFileName = "output.txt"; 15 | 16 | 17 | HashMap> causalMap = createCausalMap(); 18 | Set faultCandidates = getFaultCandidates(); 19 | StructuredDataCollector.structureData(dataFileName, causalMap, faultCandidates); 20 | } 21 | 22 | public static HashMap> createCausalMap() { 23 | HashMap> causalMap = new HashMap<>(); 24 | 25 | BufferedReader reader; 26 | try { 27 | // Change name of data file accordingly 28 | reader = new BufferedReader(new FileReader("CausalMap.txt")); 29 | String line = reader.readLine(); 30 | while (line != null) { 31 | String[] row = line.split(","); 32 | String 33 | var = row[0]; 34 | causalMap.put(var, new HashSet<>()); 35 | 36 | if (row.length > 1) { 37 | for (int i = 1; i < row.length; i++) { 38 | causalMap.get(var).add(row[i]); 39 | } 40 | } 41 | 42 | line = reader.readLine(); 43 | } 44 | reader.close(); 45 | return causalMap; 46 | } catch (IOException e) { 47 | return null; 48 | } 49 | } 50 | 51 | public static Set getFaultCandidates() { 52 | Set faultCandidates = new HashSet(); 53 | 54 | BufferedReader reader; 55 | try { 56 | // Change name of data file accordingly 57 | reader = new BufferedReader(new FileReader("FaultCandidates.txt")); 58 | String line = reader.readLine(); 59 | while (line != null) { 60 | 61 | faultCandidates.add(line); 62 | 63 | line = reader.readLine(); 64 | } 65 | reader.close(); 66 | return faultCandidates; 67 | } catch (IOException e) { 68 | return null; 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /unival-docker/src/RFC.R: -------------------------------------------------------------------------------- 1 | source("RforCFmeansRF.R") 2 | 3 | # 3 RF 4 | predCFoutRF <- 5 | function(dataFrame, 6 | outVarName, 7 | treatVarName, 8 | treatVal) { 9 | library("ranger") 10 | 11 | forest <- 12 | ranger(paste(outVarName, " ~ .", sep = ""), data = dataFrame) 13 | 14 | CFdata <- data.frame(dataFrame) 15 | 16 | CFdata[[treatVarName]] <- 17 | rep(treatVal, length(CFdata[[treatVarName]])) 18 | 19 | CFout <- predictions(predict(forest, CFdata)) 20 | 21 | return(CFout) 22 | 23 | } 24 | 25 | # 3 LM 26 | predCFoutLM <- 27 | function(dataFrame, 28 | outVarName, 29 | treatVarName, 30 | treatVal) { 31 | M <- lm(paste(outVarName, " ~ .", sep = ""), data = dataFrame) 32 | 33 | CFdata <- data.frame(dataFrame) 34 | 35 | CFdata[[treatVarName]] <- 36 | rep(treatVal, length(CFdata[[treatVarName]])) 37 | 38 | CFout <- predict(M, CFdata) 39 | 40 | return(CFout) 41 | 42 | } 43 | 44 | # 3 predict with lasso 45 | predCFoutLM <- 46 | function(dataFrame, 47 | outVarName, 48 | treatVarName, 49 | treatVal) { 50 | library(glmnet) 51 | # newoutput <- apply(newoutput, 2, as.numeric) 52 | # Y <- apply(Y, 2, as.numeric) 53 | fit.glmnet <- 54 | cv.glmnet(x = data.matrix(dataFrame[,-1]), 55 | data.matrix(dataFrame[, 1]), 56 | family = "gaussian") 57 | 58 | CFdata <- data.frame(dataFrame) 59 | 60 | CFdata[[treatVarName]] <- 61 | rep(treatVal, length(CFdata[[treatVarName]])) 62 | 63 | 64 | # CFdata <- apply(CFdata, 2, as.numeric) 65 | CFout <- 66 | predict(fit.glmnet, 67 | newx = data.matrix(CFdata[,-1]), 68 | s = "lambda.min", 69 | type = 'response') 70 | 71 | # return(CFout); 72 | } 73 | #prediction happens here 74 | predCFprobRF <- 75 | function(dataFrame, 76 | outVarName, 77 | treatVarName, 78 | treatVal) { 79 | library("ranger") 80 | 81 | forest <- 82 | ranger(paste(outVarName, " ~ .", sep = ""), 83 | data = dataFrame, 84 | probability = TRUE) 85 | 86 | CFdata <- data.frame(dataFrame) 87 | 88 | CFdata[[treatVarName]] <- 89 | rep(treatVal, length(CFdata[[treatVarName]])) 90 | 91 | CFout <- predictions(predict(forest, data = CFdata)) 92 | return(CFout[, 2]) 93 | } 94 | 95 | # 0 96 | trainCFoutPredRF <- function(dataFrame, outVarName) { 97 | library("ranger") 98 | 99 | forest <- 100 | ranger(paste(outVarName, " ~ .", sep = ""), data = dataFrame) 101 | 102 | return(forest) 103 | 104 | } 105 | 106 | trainCFprobPredRF <- function(dataFrame, outVarName) { 107 | library("ranger") 108 | 109 | forest <- 110 | ranger(paste(outVarName, " ~ .", sep = ""), 111 | data = dataFrame, 112 | probability = TRUE) 113 | 114 | return(forest) 115 | 116 | } 117 | 118 | CFmeansForTreatRangeRF <- 119 | function(dataFrame, 120 | outVarName, 121 | treatVarName, 122 | treatVec, 123 | minTreat, 124 | maxTreat) { 125 | CFmeans <- rep(0, maxTreat - minTreat + 1) 126 | 127 | 128 | for (i in minTreat:maxTreat) { 129 | CFmeans[i] <- 130 | mean(predCFoutRF(dataFrame, outVarName, treatVarName, treatVec[i])) 131 | 132 | } 133 | 134 | return(CFmeans) 135 | 136 | 137 | } 138 | 139 | # 2 RF 140 | CFmeansForTreatVecRF <- 141 | function(dataFrame, 142 | outVarName, 143 | treatVarName, 144 | treatVec) { 145 | CFmeans <- rep(0, length(treatVec)) 146 | 147 | 148 | 149 | for (i in 1:length(treatVec)) { 150 | CFmeans[i] <- 151 | mean(predCFoutRF(dataFrame, outVarName, treatVarName, treatVec[i])) 152 | 153 | } 154 | 155 | return(CFmeans) 156 | 157 | 158 | } 159 | 160 | # 2 LM 161 | CFmeansForTreatVecLM <- 162 | function(dataFrame, 163 | outVarName, 164 | treatVarName, 165 | treatVec) { 166 | CFmeans <- rep(0, length(treatVec)) 167 | 168 | 169 | for (i in 1:length(treatVec)) { 170 | CFmeans[i] <- 171 | mean(predCFoutLM(dataFrame, outVarName, treatVarName, treatVec[i])) 172 | 173 | } 174 | 175 | return(CFmeans) 176 | 177 | 178 | } 179 | 180 | 181 | CFprobsForTreatVecRF <- 182 | function(dataFrame, 183 | outVarName, 184 | treatVarName, 185 | treatVec) { 186 | CFprobs <- rep(0, length(treatVec)) 187 | 188 | 189 | for (i in 1:length(treatVec)) { 190 | CFprobs[i] <- 191 | mean(predCFprobRF(dataFrame, outVarName, treatVarName, treatVec[i])) 192 | 193 | } 194 | 195 | return(CFprobs) 196 | 197 | 198 | } 199 | 200 | 201 | 202 | # 1 LM for lasso 203 | # CFmeansForDecileBinsLM <- function(dataFrame, outVarName, treatVarName) { 204 | # fivePercentQuantiles <- quantile(dataFrame[[treatVarName]], prob = seq(0, 1, length = 21), type = 5, na.rm = TRUE) 205 | # evenQuantiles <- fivePercentQuantiles[seq(2, 20, by=2)] 206 | # 207 | # vec <- dataFrame[c(treatVarName)] 208 | # average <- mean(vec[,1]) 209 | # s = 0 210 | # for (i in 1:nrow(dataFrame[c(treatVarName)])){ 211 | # temp <- vec[i,] 212 | # s = s + abs(temp - average); 213 | # } 214 | # if(s > 0){ 215 | # return(CFmeansForTreatVecLM(dataFrame, outVarName, treatVarName, evenQuantiles)) 216 | # }else{ 217 | # return(-1) 218 | # } 219 | # } 220 | 221 | #1 LM for Lasso 222 | CFmeansForDecileBinsLM <- 223 | function(dataFrame, outVarName, treatVarName) { 224 | fivePercentQuantiles <- 225 | quantile( 226 | dataFrame[[treatVarName]], 227 | prob = seq(0, 1, length = 21), 228 | type = 5, 229 | na.rm = TRUE 230 | ) 231 | evenQuantiles <- fivePercentQuantiles[seq(2, 20, by = 2)] 232 | 233 | # replace Inf with NA 234 | dataFrame <- 235 | do.call(data.frame, lapply(dataFrame, function(x) 236 | replace(x, is.infinite(x), NA))) 237 | # remove NaN and NA 238 | dataFrame <- dataFrame[complete.cases(dataFrame),] 239 | if ((nrow(dataFrame) == 0) || (mean(dataFrame$Y) == 0)) { 240 | return (-1) 241 | } 242 | else{ 243 | vec <- dataFrame[c(treatVarName)] 244 | medianValue <- median(vec[, 1]) 245 | count <- 0 246 | for (i in 1:nrow(dataFrame[c(treatVarName)])) { 247 | temp <- vec[i,] 248 | if (temp == medianValue) { 249 | count <- count + 1 250 | } 251 | } 252 | if (count < nrow(dataFrame[c(treatVarName)]) - 3) { 253 | return(CFmeansForTreatVecLM(dataFrame, outVarName, treatVarName, evenQuantiles)) 254 | } else{ 255 | return(-1) 256 | } 257 | } 258 | } 259 | 260 | # 1 LM 261 | CFmeansForDecileBinsLM <- 262 | function(dataFrame, outVarName, treatVarName) { 263 | fivePercentQuantiles <- 264 | quantile( 265 | dataFrame[[treatVarName]], 266 | prob = seq(0, 1, length = 21), 267 | type = 5, 268 | na.rm = TRUE 269 | ) 270 | evenQuantiles <- fivePercentQuantiles[seq(2, 20, by = 2)] 271 | return(CFmeansForTreatVecLM(dataFrame, outVarName, treatVarName, evenQuantiles)) 272 | } 273 | 274 | #Where I quantile the treatment values 275 | CFprobsForDecileBinsRF <- 276 | function(dataFrame, outVarName, treatVarName) { 277 | fivePercentQuantiles <- 278 | quantile(dataFrame[[treatVarName]], 279 | prob = seq(0, 1, length = 21), 280 | type = 5) 281 | evenQuantiles <- fivePercentQuantiles[seq(2, 20, by = 2)] 282 | return(CFprobsForTreatVecRF(dataFrame, outVarName, treatVarName, evenQuantiles)) 283 | } 284 | #Find the largest and smallest numbers and return the difference 285 | maxContrast <- function(CFMeanVec) { 286 | maxCon <- 0 287 | index1 <- -1 288 | index2 <- -1 289 | 290 | for (i in 1:(length(CFMeanVec) - 1)) { 291 | for (j in (i + 1):length(CFMeanVec)) { 292 | if ((CFMeanVec[i] - CFMeanVec[j]) > maxCon) { 293 | maxCon <- CFMeanVec[i] - CFMeanVec[j] 294 | index1 <- i 295 | index2 <- j 296 | } 297 | else if ((CFMeanVec[j] - CFMeanVec[i]) > maxCon) { 298 | maxCon <- CFMeanVec[j] - CFMeanVec[i] 299 | index1 <- j 300 | index2 <- i 301 | } 302 | } 303 | } 304 | 305 | return(c(maxCon, index1, index2)) 306 | 307 | } 308 | 309 | normalize <- function(x) { 310 | # From https://stats.stackexchange.com/questions/70801/how-to-normalize-data-to-0-1-range 311 | x <- as.matrix(x) 312 | minAttr = apply(x, 2, min) 313 | maxAttr = apply(x, 2, max) 314 | x <- sweep(x, 2, minAttr, FUN = "-") 315 | x = sweep(x, 2, maxAttr - minAttr, "/") 316 | attr(x, 'normalized:min') = minAttr 317 | attr(x, 'normalized:max') = maxAttr 318 | return (x) 319 | } 320 | #Take the min and maximum of each outcome prediction (averaged) assign a suspiciousness for the variable 321 | computeSuspiciousness <- function(dataframe) { 322 | headers <- names(dataframe) 323 | result <- c(1, 1, 1) 324 | for (i in 1:length(dataframe)) { 325 | currentVec <- unlist(dataframe[[i]], use.names = FALSE) 326 | if (length(currentVec) >= 2) { 327 | vec <- maxContrast(currentVec) 328 | } else{ 329 | vec <- c(0, 0, 0) 330 | } 331 | result <- data.frame(result, vec) 332 | } 333 | result[, 1] <- NULL 334 | names(result) <- headers 335 | return(result) 336 | } 337 | 338 | getTheBiggest <- function(dataframe) { 339 | # return (names(dataframe)[order(-dataframe[1,])]) 340 | return (dataframe[order(-dataframe[1,])]) 341 | } 342 | #cleaning NA's 343 | ditch <- function(x) { 344 | temp <- as.matrix(x) 345 | for (i in temp) { 346 | if (i == "NaN") { 347 | print(i) 348 | i <- 0 349 | } 350 | if (i == "Inf") { 351 | print(i) 352 | } 353 | } 354 | y <- as.matrix(temp) 355 | print(y) 356 | # ifelse(is.infinite(x), 2147483647, x) 357 | } 358 | # 1 RF 359 | CFmeansForDecileBinsRF <- 360 | function(dataFrame, outVarName, treatVarName) { 361 | # replace Inf with NA 362 | dataFrame <- 363 | do.call(data.frame, lapply(dataFrame, function(x) 364 | replace(x, is.infinite(x), NA))) 365 | # remove NaN and NA 366 | dataFrame <- dataFrame[complete.cases(dataFrame), ] 367 | 368 | if ((nrow(dataFrame) < 1)) { 369 | return(0) 370 | } 371 | #print(treatVarName) 372 | if (grepl("P[0-9]", treatVarName, perl = TRUE)) { 373 | #if it is a predicate only make 2 bins for 1 and 0 cases 374 | 375 | quantiles <- 376 | quantile( 377 | dataFrame[[treatVarName]], 378 | prob = seq(0, 1, length = 2), 379 | type = 5, 380 | na.rm = TRUE 381 | ) 382 | return(CFmeansForTreatVecRF(dataFrame, outVarName, treatVarName, quantiles)) 383 | } else { 384 | if (is.finite(dataFrame[[treatVarName]]) && 385 | is.numeric(dataFrame[[treatVarName]])) { 386 | fivePercentQuantiles <- 387 | quantile( 388 | dataFrame[[treatVarName]], 389 | prob = seq(0, 1, length = 21), 390 | type = 5, 391 | na.rm = TRUE 392 | ) 393 | 394 | # Define the bins 395 | evenQuantiles <- 396 | fivePercentQuantiles[seq(2, 20, by = 2)] # 10 bins 397 | 398 | 399 | # evenQuantiles <- fivePercentQuantiles[seq(2, 20, by = 1)] # 19 bins 400 | return(CFmeansForTreatVecRF(dataFrame, outVarName, treatVarName, evenQuantiles)) 401 | } else{ 402 | if (includeStrings == 1) { 403 | suppressPackageStartupMessages({ 404 | library(stringdist) 405 | library(dbscan) 406 | }) 407 | dftemp <- dataFrame 408 | distmatrix <- 409 | stringdistmatrix(as.character(dftemp[[treatVarName]]), 410 | as.character(dftemp[[treatVarName]]), 411 | method = "dl") 412 | clustering.dbscan <- 413 | dbscan::dbscan(distmatrix, eps = 0.30, minPts = 10) 414 | 415 | dftemp$clusters <- clustering.dbscan$cluster 416 | 417 | dftemp <- 418 | aggregate(clusters ~ ., data = dftemp, FUN = median) 419 | 420 | return(CFmeansForTreatVecRF(dataFrame, outVarName, treatVarName, dftemp[[treatVarName]])) 421 | } 422 | } 423 | } 424 | } 425 | 426 | computeESP <- function(S_p_obs, F_p_obs, NumF, dataFrame) { 427 | S_p <- nrow(subset(dataFrame, Y == 0)) 428 | 429 | F_p <- nrow(subset(dataFrame, Y == 1)) 430 | 431 | sensitivity <- log(F_p) / log(NumF) 432 | 433 | increase_p <- F_p / (S_p + F_p) - F_p_obs / (S_p_obs + F_p_obs) 434 | 435 | importance_p <- 436 | 2 / ((1 / increase_p) + 1 / (log(F_p) / log(NumF))) 437 | return(importance_p) 438 | } 439 | 440 | CFmeansForESP <- function(dataFrame, outVarName, treatVarName) { 441 | NumF <- nrow(subset(dataFrame, Y == 1)) 442 | # print("NumF") 443 | 444 | # replace Inf with NA 445 | dataFrame <- 446 | do.call(data.frame, lapply(dataFrame, function(x) 447 | replace(x, is.infinite(x), NA))) 448 | # remove NaN and NA 449 | dataFrame <- dataFrame[complete.cases(dataFrame), ] 450 | 451 | if (nrow(dataFrame) < 1) { 452 | return(-1) 453 | } else{ 454 | if (is.finite(dataFrame[[treatVarName]]) && 455 | is.numeric(dataFrame[[treatVarName]])) { 456 | elastic <- data.frame(importance = c(0, 0, 0, 0, 0, 0, 0, 0, 0)) 457 | 458 | vec <- dataFrame[c(treatVarName)] 459 | mu <- mean(vec[, 1]) 460 | tau <- sd(vec[, 1]) 461 | 462 | S_p_obs <- nrow(subset(dataFrame, Y == 0)) 463 | # print("S_p_obs") 464 | # print(S_p_obs) 465 | F_p_obs <- nrow(subset(dataFrame, Y == 1)) 466 | # print("F_p_obs") 467 | # print(F_p_obs) 468 | 469 | elastic[1, 1] <- 470 | computeESP(S_p_obs, 471 | F_p_obs, 472 | NumF, 473 | subset(dataFrame, eval(as.name(treatVarName)) < mu - 3 * tau)) 474 | elastic[2, 1] <- 475 | computeESP(S_p_obs, 476 | F_p_obs, 477 | NumF, 478 | subset( 479 | dataFrame, 480 | eval(as.name(treatVarName)) >= mu - 3 * tau & 481 | eval(as.name(treatVarName)) < mu - 2 * tau 482 | )) 483 | elastic[3, 1] <- 484 | computeESP(S_p_obs, 485 | F_p_obs, 486 | NumF, 487 | subset( 488 | dataFrame, 489 | eval(as.name(treatVarName)) >= mu - 2 * tau & 490 | eval(as.name(treatVarName)) < mu - tau 491 | )) 492 | elastic[4, 1] <- 493 | computeESP(S_p_obs, 494 | F_p_obs, 495 | NumF, 496 | subset(dataFrame, eval(as.name(treatVarName)) >= mu - tau & 497 | eval(as.name(treatVarName)) < mu)) 498 | elastic[5, 1] <- 499 | computeESP(S_p_obs, F_p_obs, NumF, subset(dataFrame, eval(as.name(treatVarName)) == mu)) 500 | elastic[6, 1] <- 501 | computeESP(S_p_obs, 502 | F_p_obs, 503 | NumF, 504 | subset(dataFrame, eval(as.name(treatVarName)) > mu & 505 | eval(as.name(treatVarName)) <= mu + tau)) 506 | elastic[7, 1] <- 507 | computeESP(S_p_obs, 508 | F_p_obs, 509 | NumF, 510 | subset( 511 | dataFrame, 512 | eval(as.name(treatVarName)) > mu + tau & 513 | eval(as.name(treatVarName)) <= mu + 2 * tau 514 | )) 515 | elastic[8, 1] <- 516 | computeESP(S_p_obs, 517 | F_p_obs, 518 | NumF, 519 | subset( 520 | dataFrame, 521 | eval(as.name(treatVarName)) > mu + 2 * tau & 522 | eval(as.name(treatVarName)) <= mu + 3 * tau 523 | )) 524 | elastic[9, 1] <- 525 | computeESP(S_p_obs, 526 | F_p_obs, 527 | NumF, 528 | subset(dataFrame, eval(as.name(treatVarName)) > mu + 3 * tau)) 529 | 530 | elastic <- data.frame(elastic[complete.cases(elastic),]) 531 | 532 | if (nrow(elastic) == 0) { 533 | maxValue = -Inf 534 | 535 | 536 | } else{ 537 | maxValue <- abs(sort(elastic[, 1])[length(elastic[, 1])]) 538 | } 539 | 540 | return (maxValue) 541 | } else{ 542 | if (includeStrings == 1) { 543 | return (-1) 544 | } 545 | } 546 | } 547 | } 548 | computeBaah <- function(dataFrame, outVarName, treatVarName) { 549 | if (includeStrings == 1) { 550 | suppressPackageStartupMessages({ 551 | library(dplyr) 552 | library(tidyr) 553 | }) 554 | #library(tidyverse) 555 | dataFrame %>% mutate_if(is.numeric, replace_na, 0) %>% 556 | mutate_if(is.character, replace_na, "0") 557 | #print(dataFrame) 558 | dataFrame[-1][!is.na(dataFrame[-1])] <- 1 559 | dataFrame[-1][is.na(dataFrame[-1])] <- 0 560 | dataFrame[] <- 561 | lapply(dataFrame, function(x) 562 | as.numeric(as.character(x))) 563 | model <- 564 | lm(paste(outVarName, " ~ .", sep = ""), data = dataFrame) 565 | 566 | return(model$coefficients[2]) 567 | } else{ 568 | if (is.numeric(dataFrame[[treatVarName]])) { 569 | suppressPackageStartupMessages({ 570 | library(dplyr) 571 | library(tidyr) 572 | }) 573 | #library(tidyverse) 574 | dataFrame %>% mutate_if(is.numeric, replace_na, 0) %>% 575 | mutate_if(is.character, replace_na, "0") 576 | #print(dataFrame) 577 | dataFrame[-1][!is.na(dataFrame[-1])] <- 1 578 | dataFrame[-1][is.na(dataFrame[-1])] <- 0 579 | dataFrame[] <- 580 | lapply(dataFrame, function(x) 581 | as.numeric(as.character(x))) 582 | model <- 583 | lm(paste(outVarName, " ~ .", sep = ""), data = dataFrame) 584 | 585 | return(model$coefficients[2]) 586 | } 587 | } 588 | } 589 | 590 | is.nan.data.frame <- function(x) { 591 | do.call(cbind, lapply(x, is.nan)) 592 | } 593 | 594 | is.infinite.data.frame <- function(x) { 595 | do.call(cbind, lapply(x, is.infinite)) 596 | } 597 | 598 | #newoutput2 <- data.frame(t(newoutput)) 599 | #colnames(newoutput2) <- newoutput2[1,] 600 | # ============================= 601 | # Start HERE 602 | # input: newoutput, outY 603 | args <- commandArgs(trailingOnly = TRUE) 604 | includeStrings <- as.numeric(args[1]) 605 | 606 | newoutput <- 607 | read.table( 608 | "/unival/newoutput.txt", 609 | sep = "\t", 610 | quote = "", 611 | comment.char = "", 612 | stringsAsFactors = FALSE, 613 | fill = TRUE 614 | ) 615 | 616 | 617 | outY <- 618 | read.table( 619 | "/unival/outY.txt", 620 | quote = "\"", 621 | comment.char = "" 622 | ) 623 | # ============================= 624 | newoutput <- data.frame(newoutput) 625 | rownames(newoutput) <- newoutput[, 1] 626 | newoutput <- newoutput[,-1] 627 | newoutput <- as.data.frame(t(newoutput), stringsAsFactors = FALSE) 628 | newoutput[] <- lapply(newoutput, type.convert, as.is = TRUE) 629 | # Y <- data.frame(t(outY)) 630 | # names(Y) <- c("Y") 631 | outY <- data.frame(outY[, 2]) 632 | names(outY) <- c("Y") 633 | numfldata <- newoutput 634 | #newoutput <-do.call(data.frame, lapply(newoutput, function(x)replace(x,!is.finite(x), NA))) 635 | fault_binerrs_all <- data.frame(outY, newoutput) 636 | 637 | # trainCFoutPredRF(TestShimple_fault_binerrs_all, "Y") 638 | CFmeanResult <- genCFmeansRF_fault_binerrs() 639 | # for RF 640 | maxContrastDF <- computeSuspiciousness(CFmeanResult) 641 | result <- getTheBiggest(maxContrastDF) 642 | # for ESP 643 | resultESP <- getTheBiggest(genCFmeansESP_fault_binerrs()) 644 | #maxContrastESP<- compute(resultESP) 645 | #resultESP<-getTheBiggest(maxContrastESP) 646 | 647 | #=========For boxplot=============== 648 | # cbind the fault rate 649 | faultRate <- sum(outY) / nrow(outY) 650 | tempResultCF <- result 651 | tempResultESP <- resultESP 652 | 653 | write.csv( 654 | tempResultCF, 655 | file = "/unival/resultUniVal.csv", 656 | ) 657 | write.csv( 658 | tempResultESP, 659 | file = "/unival/resultESP.csv" 660 | ) 661 | # only the first time 662 | #meanResult <- tempResult 663 | 664 | # other runs (2nd, 3rd, 4th, 5th time) 665 | #meanResult <- rbind(meanResult, tempResult) 666 | 667 | # after 5 runs 668 | # meanResult <- rbind(meanResult, colMeans(meanResult)) 669 | # meanResult <- meanResult[order(meanResult[6,], decreasing = T)] 670 | 671 | # output to a excel 672 | #write.csv(Result, file = "/result.csv") 673 | 674 | #========================= 675 | 676 | # rbind with each result, run FOR 10 TIMES 677 | #meanResult <- rbind (meanResult, result[1,]) 678 | # remove wrong lines 679 | # meanResult<- meanResult[-c(2),] 680 | # make a copy of meanResult 681 | # meanResultCopy <- meanResult 682 | 683 | # sort by the mean Y 684 | #meanResult <- rbind(meanResult, colMeans(meanResult)) 685 | #meanResult <- meanResult[order(meanResult[6,], decreasing = T)] 686 | # resultForPlot <- resultForPlot[-nrow(resultForPlot),] 687 | # only cares about the top 20 variable in the rank 688 | # resultForPlot <- resultForPlot[,1:20] 689 | # boxplot(resultForPlot, las = 2) 690 | -------------------------------------------------------------------------------- /unival-docker/src/StructuredDataCollector.java: -------------------------------------------------------------------------------- 1 | import java.io.*; 2 | import java.util.*; 3 | import java.util.regex.Pattern; 4 | 5 | public class StructuredDataCollector { 6 | public static void structureData(String filePath, HashMap> causalMap, 7 | Set usedVariables) { 8 | // 7 Columns of data processed from output file - output.txt is the default 9 | // Class, method, line, scope, variable, version, value 10 | BufferedReader reader; 11 | HashMap> variableVersionValueArrayMap = new HashMap<>(); 12 | try { 13 | BufferedWriter testWrt = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("outY.txt"))); 14 | BufferedWriter wrtTruth = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("truth.txt"))); 15 | BufferedWriter wrtDiff = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("diff.txt"))); 16 | // Change name of data file accordingly 17 | reader = new BufferedReader(new FileReader(filePath)); 18 | String line = reader.readLine(); 19 | HashMap variableVersionValueMap = new HashMap<>(); 20 | 21 | int numExecutions = 0; 22 | int index = 1; 23 | String failIndicator = ""; 24 | while ((line = reader.readLine()) != null) { 25 | 26 | if (line.contains("*** new execution ***")) { 27 | 28 | failIndicator = line.split(",")[2]; 29 | testWrt.append(index + "\t" + failIndicator); 30 | wrtTruth.write("0"); 31 | wrtTruth.write('\n'); 32 | if (failIndicator.equals("1")) { 33 | wrtDiff.write("1"); 34 | } else { 35 | wrtDiff.write("0"); 36 | } 37 | wrtDiff.write('\n'); 38 | testWrt.append('\n'); 39 | index++; 40 | testWrt.flush(); 41 | HashMap variableVersionValueArrayChecklist = new HashMap<>(); 42 | 43 | for (String variable : variableVersionValueArrayMap.keySet()) { 44 | variableVersionValueArrayChecklist.put(variable, false); 45 | } 46 | for (String variable : variableVersionValueMap.keySet()) { 47 | if (!variableVersionValueArrayMap.containsKey(variable)) { 48 | variableVersionValueArrayMap.put(variable, new ArrayList()); 49 | for (int i = 0; i < numExecutions; i++) { 50 | 51 | variableVersionValueArrayMap.get(variable).add("NA"); 52 | 53 | } 54 | variableVersionValueArrayMap.get(variable).add(variableVersionValueMap.get(variable)); 55 | 56 | variableVersionValueArrayChecklist.put(variable, true); 57 | } else { 58 | 59 | variableVersionValueArrayMap.get(variable).add(variableVersionValueMap.get(variable)); 60 | variableVersionValueArrayChecklist.put(variable, true); 61 | } 62 | } 63 | 64 | for (String variable : variableVersionValueArrayChecklist.keySet()) { 65 | if (!variableVersionValueArrayChecklist.get(variable)) { 66 | 67 | variableVersionValueArrayMap.get(variable).add("NA"); 68 | } 69 | } 70 | variableVersionValueMap.clear(); 71 | line = reader.readLine(); 72 | numExecutions++; 73 | continue; 74 | } 75 | String[] row = line.split(","); 76 | int len = row.length; 77 | // Where the magic happens with each row containing information is decomposed 78 | if (len == 6) { 79 | // String className = row[0]; 80 | // String methodName = row[1]; 81 | // String lineNumber = row[2]; 82 | // String scope = row[3]; 83 | String variable = row[3]; 84 | String version = row[4]; 85 | Double value = 0.0; 86 | String strValue = ""; 87 | boolean isnum = false; 88 | 89 | if (row[len - 1] != null) { 90 | row[len - 1] = row[len - 1].trim(); 91 | // if (row[len - 1].contains("@") || row[len - 1].contains("java") || row[len - 92 | // 1].contains("org") || row[len - 1].contains(":") || row[len - 93 | // 1].contains("[")|| row[len - 1].contains("\n")||row[len - 1].contains("\t")){ 94 | if (row[len - 1].contains("\n") || row[len - 1].contains("\t")) { 95 | row[len - 1] = row[len - 1].replace("\n", ""); 96 | row[len - 1] = row[len - 1].replace("\t", ""); 97 | } else if (row[len - 1].equals("true")) { 98 | value = 1.0; 99 | isnum = true; 100 | } else if (row[len - 1].equals("false")) { 101 | value = 0.0; 102 | isnum = true; 103 | } else if (row[len - 1].equalsIgnoreCase("null")) { 104 | value = Double.POSITIVE_INFINITY; 105 | isnum = true; 106 | } else if (row[len - 1].contains("/")) { 107 | String[] fract = row[len - 1].split("/"); 108 | if (fract.length == 2 && isNumeric(fract[0]) && isNumeric(fract[1])) { 109 | 110 | double num = Double.valueOf(fract[0]); 111 | double den = Double.valueOf(fract[1]); 112 | value = num / den; 113 | isnum = true; 114 | } else { 115 | strValue = row[len - 1]; 116 | } 117 | } else { 118 | if (isNumeric(row[len - 1])) { 119 | value = Double.valueOf(row[len - 1]); 120 | isnum = true; 121 | } else { 122 | strValue = row[len - 1]; 123 | } 124 | } 125 | if (variable.startsWith("_")) 126 | variable = "UNDERSCORE" + variable; 127 | 128 | if (isnum) { 129 | variableVersionValueMap.put(variable + "_" + version, Double.toString(value)); 130 | } else { 131 | variableVersionValueMap.put(variable + "_" + version, strValue); 132 | } 133 | 134 | } 135 | } else { 136 | continue; 137 | } 138 | } 139 | reader.close(); 140 | 141 | BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("newoutput.txt"))); 142 | BufferedWriter writer2 = new BufferedWriter( 143 | new OutputStreamWriter(new FileOutputStream("newoutput.txt.full"))); 144 | 145 | for (String s : variableVersionValueArrayMap.keySet()) { 146 | ArrayList list = variableVersionValueArrayMap.get(s); 147 | 148 | if (Pattern.matches("^[a-zA-Z_]\\w*$", s)) { 149 | writer.write(s); 150 | for (int i = 0; i < list.size(); i++) { 151 | try { 152 | if (isNumeric(list.get(i))) { 153 | writer.write("\t" + Double.parseDouble(list.get(i))); 154 | } else { 155 | writer.write("\t" + list.get(i)); 156 | } 157 | } catch (NumberFormatException e) { 158 | writer.write("\tNA"); 159 | } 160 | } 161 | writer.write("\n"); 162 | writer.flush(); 163 | } 164 | writer2.write(s); 165 | for (int i = 0; i < list.size(); i++) { 166 | try { 167 | if (isNumeric(list.get(i))) { 168 | writer2.write("\t" + Double.parseDouble(list.get(i))); 169 | } else { 170 | writer2.write("\t" + list.get(i)); 171 | } 172 | } catch (NumberFormatException e) { 173 | writer2.write("\tNA"); 174 | } 175 | } 176 | writer2.write("\n"); 177 | writer2.flush(); 178 | 179 | } 180 | 181 | writer.close(); 182 | writer2.close(); 183 | testWrt.close(); 184 | wrtTruth.close(); 185 | wrtDiff.close(); 186 | } catch (IOException e) { 187 | e.printStackTrace(); 188 | } 189 | try { 190 | genRForCFmeansRF("RforCFmeansRF.R", "fault_binerrs_all", "fault_binerrs", "Y", causalMap, 191 | variableVersionValueArrayMap.keySet(), usedVariables); 192 | } catch (IOException e) { 193 | e.printStackTrace(); 194 | } 195 | } 196 | 197 | public static boolean isDigit(String str) { 198 | return str.matches("-?\\d+(\\.\\d+)?"); // match a number with optional '-' and decimal. 199 | } 200 | 201 | public static boolean isNumeric(String strNum) { 202 | if (strNum == null) { 203 | return false; 204 | } 205 | try { 206 | double d = Double.parseDouble(strNum); 207 | } catch (NumberFormatException nfe) { 208 | return false; 209 | } 210 | return true; 211 | } 212 | 213 | // Method that generates the R function that will be used by the Random forest 214 | // (RFC.R) script 215 | private static void genRForCFmeansRF(String RFileName, String varFrameName, String prefix, String outName, 216 | HashMap> covariant, Set usedVariables, Set faultCandidates) 217 | throws IOException { 218 | 219 | OutputStream out = new FileOutputStream(RFileName); 220 | BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); 221 | BufferedWriter testInf = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("info.txt"))); 222 | int variableCounter = 1; 223 | 224 | writer.write("genCFmeansRF_" + prefix + " <- function() {\n\n"); 225 | // for RF 226 | writer.write("results <- list()\n\n"); 227 | writer.write("varnames <- list()\n\n"); 228 | // for esp 229 | // writer.write("results <- data.frame(row.names = \"mean\")\n\n"); 230 | 231 | for (String t : covariant.keySet()) { 232 | System.out.println(t); 233 | if (!usedVariables.contains(t))// ||!faultCandidates.contains(t)) //Change this accordingly if restrictions 234 | // on which variables are considered exist 235 | continue; 236 | if (t.startsWith("_")) 237 | t = "UNDERSCORE" + t; 238 | // for (Value t : treatNames){ 239 | String vfn = varFrameName; 240 | // for confounder 241 | String tfn = prefix + "_" + t + "_treat_df"; 242 | // for no confounder 243 | String tfn_nocnfd = prefix + "_" + t + "_treat_nocnfd_df"; 244 | 245 | // // for tfn 246 | writer.write( 247 | tfn + " <- data.frame(" + outName + "=" + vfn + "$" + outName + ", " + t + "=" + vfn + "$" + t); 248 | HashSet set = covariant.get(t); 249 | for (String c : set) { 250 | if (!usedVariables.contains(c)) 251 | continue; 252 | if (c.startsWith("_")) 253 | c = "UNDERSCORE" + c; 254 | writer.write(", " + c + "=" + vfn + "$" + c); 255 | } 256 | 257 | // for tfn_nocnfd 258 | // writer.write(tfn_nocnfd + " <- data.frame(" + outName + "=" + vfn + "$" + 259 | // outName + ", " + t + "=" + vfn + "$" + t); 260 | 261 | writer.write(", stringsAsFactors = FALSE)\n"); 262 | 263 | // to remove NA 264 | // writer.write(tfn + " <- " + tfn + "[complete.cases(" + tfn + "),]" + '\n'); 265 | 266 | // Only treatement, no confounder (ESP) 267 | // writer.write("results[[\"" + t + "\"]] <- CFmeansForESP(" + tfn_nocnfd + ", 268 | // \"" + outName + "\", \"" + t + "\""); 269 | writer.write("numFLOut <- data.frame(" + t + "=" + "numfldata" + "$" + t); 270 | for (String c : set) { 271 | if (!usedVariables.contains(c)) 272 | continue; 273 | if (c.startsWith("_")) 274 | c = "UNDERSCORE" + c; 275 | writer.write(", " + c + "=" + "numfldata" + "$" + c); 276 | } 277 | if (set.isEmpty()) { 278 | writer.write(", " + t + "=" + "numfldata" + "$" + t); 279 | } 280 | writer.write(")\n"); 281 | // For random forest 282 | writer.write("id <- rownames(numFLOut)\n"); 283 | writer.write("numFLOut<-cbind(id,numFLOut)\n"); 284 | writer.write("write.table(numFLOut,file=\"./NUMFL/" + variableCounter + ".txt\"" 285 | + ",quote = F,row.names = F, col.names=T)\n"); 286 | 287 | writer 288 | .write("results[[\"" + t + "\"]] <- CFmeansForDecileBinsRF(" + tfn + ", \"" + outName + "\", \"" + t 289 | + "\""); 290 | 291 | writer.write(")\n"); 292 | writer 293 | .write("varnames[[" + variableCounter + "]] <- \"" + t + "\""); 294 | writer.write("\n\n"); 295 | 296 | variableCounter++; 297 | } 298 | // write info for numfl and coverage based methods 299 | testInf.write(variableCounter + "\n"); 300 | Random rand = new Random(); 301 | int dummy_version = rand.nextInt(variableCounter); 302 | testInf.write(dummy_version + "\n"); 303 | testInf.close(); 304 | writer.write( 305 | "varframe <- data.frame(matrix(unlist(varnames), nrow = length(varnames), byrow=T),stringsAsFactors=FALSE)\n"); 306 | writer.write("names(varframe)<- \" Variables \"\n"); 307 | writer.write("ID<-rownames(varframe)\n"); 308 | writer.write("varframe<-cbind(ID,varframe)\n"); 309 | writer.write("write.csv(varframe, file=\"./NUMFL/numflvariables.csv\",row.names = F)\n\n"); 310 | writer.write("return(results)\n\n"); 311 | writer.write("}\n"); 312 | writer.flush(); 313 | 314 | writer.write("genCFmeansESP_" + prefix + " <- function() {\n\n"); 315 | // for RF 316 | writer.write("results <- data.frame(row.names=seq(1, 10))\n\n"); 317 | writer.write("Baah2010 <- data.frame(row.names=\"Baah2010\")\n\n"); 318 | // for esp 319 | // writer.write("results <- data.frame(row.names = \"mean\")\n\n"); 320 | 321 | for (String t : covariant.keySet()) { 322 | System.out.println(t); 323 | if (!usedVariables.contains(t)) 324 | continue; 325 | if (t.startsWith("_")) 326 | t = "UNDERSCORE" + t; 327 | // for (Value t : treatNames){ 328 | String vfn = varFrameName; 329 | // for confounder 330 | String tfn = prefix + "_" + t + "_treat_df"; 331 | // for no confounder 332 | String tfn_nocnfd = prefix + "_" + t + "_treat_nocnfd_df"; 333 | 334 | // // for tfn 335 | writer.write( 336 | tfn + " <- data.frame(" + outName + "=" + vfn + "$" + outName + ", " + t + "=" + vfn + "$" + t); 337 | HashSet set = covariant.get(t); 338 | for (String c : set) { 339 | if (!usedVariables.contains(c)) 340 | continue; 341 | if (c.startsWith("_")) 342 | c = "UNDERSCORE" + c; 343 | writer.write(", " + c + "=" + vfn + "$" + c); 344 | } 345 | 346 | // for tfn_nocnfd 347 | // writer.write(tfn_nocnfd + " <- data.frame(" + outName + "=" + vfn + "$" + 348 | // outName + ", " + t + "=" + vfn + "$" + t); 349 | 350 | writer.write(", stringsAsFactors = FALSE)\n"); 351 | 352 | // to remove NA 353 | // writer.write(tfn + " <- " + tfn + "[complete.cases(" + tfn + "),]" + '\n'); 354 | 355 | // Only treatement, no confounder (ESP) 356 | // writer.write("results[[\"" + t + "\"]] <- CFmeansForESP(" + tfn_nocnfd + ", 357 | // \"" + outName + "\", \"" + t + "\""); 358 | 359 | // For random forest 360 | writer 361 | .write("results[[\"" + t + "\"]] <- CFmeansForESP(" + tfn + ", \"" + outName + "\", \"" + t + "\""); 362 | writer.write(")\n"); 363 | 364 | writer 365 | .write("Baah2010[[\"" + t + "\"]] <- computeBaah(" + tfn + ", \"" + outName + "\", \"" + t + "\""); 366 | writer.write(")\n\n"); 367 | // For LM and LASSO 368 | // writer.write("results[[\"" + t + "\"]] <- CFmeansForDecileBinsLM(" + tfn + ", 369 | // \"" + outName + "\", \"" + t + "\""); 370 | 371 | } 372 | writer.write( 373 | "write.csv(getTheBiggest(Baah2010),file = \"/unival/resultBaah2010.csv\")\n\n"); 374 | 375 | writer.write("return(results)\n\n"); 376 | writer.write("}\n"); 377 | writer.flush(); 378 | writer.close(); 379 | } 380 | } 381 | --------------------------------------------------------------------------------