├── .github └── workflows │ └── main.yml ├── .gitignore ├── .ocp-indent ├── CHANGES.md ├── Dockerfile.esy ├── LICENSE ├── Makefile ├── README.md ├── dune ├── dune-project ├── package.json ├── src ├── lib │ ├── compat.ml │ ├── dune │ ├── tsort.ml │ └── tsort.mli └── test │ ├── dune │ └── test_tsort.ml └── tsort.opam /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: CI 4 | 5 | # Controls when the action will run. Triggers the workflow on push or pull request 6 | # events but only for the master branch 7 | on: 8 | push: 9 | branches: [ main ] 10 | pull_request: 11 | branches: [ main ] 12 | 13 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 14 | jobs: 15 | # This workflow contains a single job called "build" 16 | build: 17 | # The type of runner that the job will run on 18 | runs-on: macos-latest 19 | 20 | # Steps represent a sequence of tasks that will be executed as part of the job 21 | steps: 22 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 23 | - uses: actions/checkout@v4 24 | 25 | - name: Install build dependencies via homebrew 26 | run: | 27 | echo Installing the OCaml toolchain 28 | brew install gpatch 29 | brew install ocaml opam 30 | opam init -y 31 | 32 | - name: Install OCaml dependencies from opam 33 | run: | 34 | echo Installing OCaml dependencies 35 | opam install -y --with-test . 36 | 37 | - name: Run tests 38 | run: | 39 | eval $(opam env) 40 | dune runtest 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.annot 2 | *.cmo 3 | *.cma 4 | *.cmi 5 | *.a 6 | *.o 7 | *.cmx 8 | *.cmxs 9 | *.cmxa 10 | 11 | # ocamlbuild working directory 12 | _build/ 13 | 14 | # ocamlbuild targets 15 | *.byte 16 | *.native 17 | 18 | # oasis generated files 19 | setup.data 20 | setup.log 21 | 22 | # Merlin configuring file for Vim and Emacs 23 | .merlin 24 | 25 | # Ignoring Esy 26 | _esy 27 | *esy.lock* 28 | node_modules 29 | *.install -------------------------------------------------------------------------------- /.ocp-indent: -------------------------------------------------------------------------------- 1 | # See https://github.com/OCamlPro/ocp-indent/blob/master/.ocp-indent for more 2 | 3 | # Indent for clauses inside a pattern-match (after the arrow): 4 | # match foo with 5 | # | _ -> 6 | # ^^^^bar 7 | # the default is 2, which aligns the pattern and the expression 8 | match_clause = 4 9 | 10 | # When nesting expressions on the same line, their indentation are in 11 | # some cases stacked, so that it remains correct if you close them one 12 | # at a line. This may lead to large indents in complex code though, so 13 | # this parameter can be used to set a maximum value. Note that it only 14 | # affects indentation after function arrows and opening parens at end 15 | # of line. 16 | # 17 | # for example (left: `none`; right: `4`) 18 | # let f = g (h (i (fun x -> # let f = g (h (i (fun x -> 19 | # x) # x) 20 | # ) # ) 21 | # ) # ) 22 | max_indent = 2 23 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 2.1.0 4 | 5 | Allow the same dependency to be specified more than once. 6 | This allows correct handling of inputs like this: 7 | 8 | ``` 9 | Tsort.sort_strongly_connected_components @@ 10 | [(1, [2]); (2, [1]); (1, [3])] 11 | ``` 12 | 13 | Tsort no longer depends on the containers library. 14 | 15 | # 2.0.0 16 | 17 | API overhaul, support for cyclic graphs (thanks to Martin Jambon). 18 | -------------------------------------------------------------------------------- /Dockerfile.esy: -------------------------------------------------------------------------------- 1 | FROM superherointj/archlinux-esy 2 | 3 | COPY src ./src/ 4 | COPY *.opam *.json dune dune-project Makefile ./ 5 | 6 | RUN npm run install 7 | RUN npm run build 8 | RUN npm run test 9 | RUN npm run clean 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Daniil Baturin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build 2 | build: 3 | dune build 4 | 5 | .PHONY: test-esy 6 | test-esy: 7 | docker build -f Dockerfile.esy -t tsort-esy . 8 | 9 | .PHONY: test 10 | test: 11 | dune exec src/test/test_tsort.exe 12 | 13 | .PHONY: test-complete 14 | test-complete: test test-esy 15 | 16 | .PHONY: install 17 | install: 18 | dune install 19 | 20 | .PHONY: clean 21 | clean: 22 | dune clean 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ocaml-tsort 2 | =========== 3 | 4 | ![maintenance-status](https://img.shields.io/badge/maintenance-passively--maintained-yellowgreen.svg) 5 | ![Build](https://github.com/dmbaturin/ocaml-tsort/actions/workflows/main.yml/badge.svg) 6 | 7 | ocaml-tsort is a library for sorting graphs in topological order. Its UI/UX is inspired by the classic UNIX `tsort(1)`. 8 | 9 | * Uses Kahn's algorithm. 10 | * Easy to use, but not very fast. 11 | * Provides friendly error reporting (e.g., if there's a cycle, tells you what the offending nodes are). 12 | 13 | The input type is (`('a * 'a list) list`). Essentially, a list of "tasks" mapped to lists of their dependencies. 14 | 15 | # Sorting DAGs 16 | 17 | Most of the time cyclic dependencies are bad. The main function, `Tsort.sort` returns value of this type: 18 | 19 | ``` 20 | type 'a sort_result = 21 | Sorted of 'a list 22 | | ErrorCycle of 'a list 23 | ``` 24 | 25 | The function is: 26 | 27 | ``` 28 | val sort : ('a * 'a list) list -> 'a sort_result 29 | ``` 30 | 31 | Examples: 32 | 33 | ``` 34 | # Tsort.sort [ 35 | ("foundation", []); 36 | ("walls", ["foundation"]); 37 | ("roof", ["walls"]) 38 | ] ;; 39 | - : string Tsort.sort_result = Tsort.Sorted ["foundation"; "walls"; "roof"] 40 | 41 | # Tsort.sort [ 42 | ("foundation", ["building permit"]); 43 | ("walls", ["foundation"]); 44 | ("roof", ["walls"]) 45 | ] ;; 46 | - : string Tsort.sort_result = 47 | Tsort.Sorted ["building permit"; "foundation"; "walls"; "roof"] 48 | 49 | # Tsort.sort [ 50 | ("foundation", ["roof"]); 51 | ("walls", ["foundation"]); 52 | ("roof", ["walls"]) 53 | ] ;; 54 | - : string Tsort.sort_result = Tsort.ErrorCycle ["roof"; "foundation"; "walls"] 55 | ``` 56 | 57 | As you can see from the second example, if there's a dependency on a node that doesn't exist in the input, 58 | it's automatically inserted, and assumed to have no dependencies. 59 | 60 | # Detecting non-existent dependencies 61 | 62 | If your graph comes directly from user input, there's a good chance that dependency on a non-existent node 63 | is a user error. 64 | 65 | To prevent it, use `Tsort.find_nonexistent_nodes`. Example: 66 | 67 | ``` 68 | # Tsort.find_nonexistent_nodes [ 69 | ("foundation", ["building permit"]); 70 | ("walls", ["foundation"]); 71 | ("roof", ["walls"])] ;; 72 | - : (string * string list) list = [("foundation", ["building permit"])] 73 | ``` 74 | 75 | # Sorting graphs with cycles 76 | 77 | Sometimes cycles are fine. In this case you can use `Tsort.sort_strongly_connected_components` to split 78 | your graph into strongly connected components and sort its condensation. 79 | 80 | Contrived example: suppose you want to line up the [Addams family](https://en.wikipedia.org/wiki/The_Addams_Family) 81 | so that children come after parents, but spouse and sibling pairs are not separated. 82 | 83 | ``` 84 | Tsort.sort_strongly_connected_components [ 85 | "Morticia", ["Gomez"; "Grandmama"]; 86 | "Gomez", ["Morticia"; "Grandmama"]; 87 | "Wednesday", ["Morticia"; "Gomez"; "Pugsley"]; 88 | "Pugsley", ["Morticia"; "Gomez"; "Wednesday"]; 89 | "Grandmama", []; 90 | "Fester", [] 91 | ] 92 | ;; 93 | 94 | - : string list list = 95 | [["Fester"]; ["Grandmama"]; ["Morticia"; "Gomez"]; ["Wednesday"; "Pugsley"]] 96 | 97 | ``` 98 | 99 | There's also `Tsort.find_strongly_connected_components` if you just want to find what them. 100 | For the data above, it would return `[["Morticia"; "Gomez"]; ["Wednesday"; "Pugsley"]; ["Grandmama"]; ["Fester"]]`. 101 | 102 | # Contributing 103 | 104 | To run our complete test suite, run `make test-complete` (requires docker). 105 | -------------------------------------------------------------------------------- /dune: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmbaturin/ocaml-tsort/0a4e3fff34eb4066ebdb1ced0632e5c6ed9fcf7b/dune -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 1.9) 2 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tsort", 3 | "scripts": { 4 | "install": "esy", 5 | "build": "esy build", 6 | "test": "esy make test", 7 | "clean": "esy make clean; rm -rf _esy _build node_modules esy.lock *.install" 8 | }, 9 | "esy": { 10 | "build": "dune build -p #{self.name}", 11 | "buildDev": "refmterr dune build -p #{self.name}" 12 | }, 13 | "dependencies": { 14 | "ocaml": "*", 15 | "@opam/dune": "*", 16 | "@opam/containers": "*" 17 | }, 18 | "devDependencies": { 19 | "refmterr": "*", 20 | "@opam/merlin": "*", 21 | "@opam/alcotest": "*" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/lib/compat.ml: -------------------------------------------------------------------------------- 1 | (* 2 | Basic functions not available in older versions of OCaml's standard 3 | library. 4 | *) 5 | 6 | module Hashtbl = struct 7 | let find_opt tbl key = 8 | try Some (Hashtbl.find tbl key) 9 | with Not_found -> None 10 | 11 | let list_keys tbl = 12 | Hashtbl.fold (fun k _ acc -> k :: acc) tbl [] 13 | end 14 | 15 | module List = struct 16 | let filter_map f l = 17 | List.fold_left (fun acc x -> 18 | match f x with 19 | | None -> acc 20 | | Some y -> y :: acc 21 | ) [] l 22 | |> List.rev 23 | 24 | let find_opt f l = 25 | try Some (List.find f l) 26 | with Not_found -> None 27 | 28 | let rec remove ?(eq=(=)) ~key xs = 29 | match xs with 30 | | [] -> [] 31 | | x :: xs -> 32 | if eq x key then xs 33 | else x :: (remove ~eq:eq ~key:key xs) 34 | end 35 | -------------------------------------------------------------------------------- /src/lib/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name tsort) 3 | (public_name tsort)) 4 | -------------------------------------------------------------------------------- /src/lib/tsort.ml: -------------------------------------------------------------------------------- 1 | (* User-friendly topological sort based on Kahn's algorithm. 2 | 3 | Usage example: sort [("foundation", []); ("basement", ["foundation"]);] 4 | 5 | Authors: Daniil Baturin (2019), Martin Jambon (2020). 6 | *) 7 | 8 | type 'a sort_result = 9 | | Sorted of 'a list 10 | | ErrorCycle of 'a list 11 | 12 | (* Deduplicate list items. 13 | 14 | Differences with CCList.uniq: 15 | - when an item is duplicated, keep the first item encountered rather than 16 | the last: [1;2;3;1] gives [1;2;3] (not [2;3;1]). 17 | - complexity is O(n), not O(n^2). 18 | *) 19 | let deduplicate l = 20 | let tbl = Hashtbl.create (List.length l) in 21 | List.fold_left (fun acc x -> 22 | if Hashtbl.mem tbl x then 23 | acc 24 | else ( 25 | Hashtbl.add tbl x (); 26 | x :: acc 27 | ) 28 | ) [] l 29 | |> List.rev 30 | 31 | 32 | let graph_hash_of_list l = 33 | let update h k v = 34 | let orig_v = Compat.Hashtbl.find_opt h k in 35 | match orig_v with 36 | | None -> Hashtbl.add h k v 37 | | Some orig_v -> 38 | (* Allow "partial" dependency lists like [(1, [2]); (1, [3]); (2, [1])]. 39 | Sometimes it's a more natural way to write cyclic graphs. 40 | *) 41 | Hashtbl.replace h k (List.append orig_v v) 42 | in 43 | let tbl = Hashtbl.create 100 in 44 | let () =List.iter (fun (k, v) -> update tbl k v) l in 45 | let () = Hashtbl.filter_map_inplace (fun _ xs -> Some (deduplicate xs)) tbl in 46 | tbl 47 | 48 | (* Finds "isolated" nodes, 49 | that is, nodes that have no dependencies *) 50 | let find_isolated_nodes hash = 51 | let aux id deps acc = 52 | match deps with 53 | | [] -> id :: acc 54 | | _ -> acc 55 | in Hashtbl.fold aux hash [] 56 | 57 | (* Takes a node name list and removes all those nodes from a hash *) 58 | let remove_nodes nodes hash = 59 | List.iter (Hashtbl.remove hash) nodes 60 | 61 | (* Walks through a node:dependencies hash and removes a dependency 62 | from all nodes that have it in their dependency lists *) 63 | let remove_dependency hash dep = 64 | let aux dep hash id = 65 | let deps = Hashtbl.find hash id in 66 | let deps = 67 | if List.exists ((=) dep) deps then 68 | Compat.List.remove ~eq:(=) ~key:dep deps 69 | else deps 70 | in 71 | begin 72 | Hashtbl.remove hash id; 73 | Hashtbl.add hash id deps 74 | end 75 | in 76 | let ids = Compat.Hashtbl.list_keys hash in 77 | List.iter (aux dep hash) ids 78 | 79 | (* Finds non-existent nodes, 80 | that is, nodes that are mentioned in the value part of the assoc list, 81 | but don't exist among the assoc list keys. 82 | 83 | Complexity is O(n), where n = |V| + |E|. 84 | *) 85 | let find_nonexistent_nodes nodes = 86 | let graph = Hashtbl.create (List.length nodes) in 87 | List.iter (fun (u, vl) -> Hashtbl.add graph u vl) nodes; 88 | Compat.List.filter_map (fun (u, vl) -> 89 | let missing = 90 | List.filter (fun v -> not (Hashtbl.mem graph v)) vl 91 | |> deduplicate 92 | in 93 | match missing with 94 | | [] -> None 95 | | missing -> Some (u, missing) 96 | ) nodes 97 | 98 | (* 99 | Append missing nodes to the graph, in the order in which they were 100 | encountered. This particular order doesn't have to be guaranteed by the 101 | API but seems nice to have. 102 | *) 103 | let _add_missing_nodes graph_l graph = 104 | let missing = 105 | List.fold_left (fun acc (_, vl) -> 106 | List.fold_left (fun acc v -> 107 | if not (Hashtbl.mem graph v) then 108 | (v, []) :: acc 109 | else 110 | acc 111 | ) acc vl 112 | ) [] graph_l 113 | |> List.rev 114 | in 115 | List.iter (fun (v, vl) -> Hashtbl.replace graph v vl) missing; 116 | graph_l @ missing 117 | 118 | (* The public version of [_add_missing_nodes] 119 | that doesn't require a graph hash argument. 120 | *) 121 | let add_missing_nodes graph = 122 | let graph_hash = graph_hash_of_list graph in 123 | _add_missing_nodes graph graph_hash 124 | 125 | (* The Kahn's algorithm: 126 | 1. Find nodes that have no dependencies ("isolated") and remove them from 127 | the graph hash. 128 | Add them to the initial sorted nodes list and the list of isolated 129 | nodes for the first sorting pass. 130 | 2. For every isolated node, walk through the remaining nodes and 131 | remove it from their dependency list. 132 | Nodes that only depended on it now have empty dependency lists. 133 | 3. Find all nodes with empty dependency lists and append them to the sorted 134 | nodes list _and_ the list of isolated nodes to use for the next step 135 | 4. Repeat until the list of isolated nodes is empty 136 | 5. If the graph hash is still not empty, it means there is a cycle. 137 | *) 138 | let sort nodes = 139 | let rec sorting_loop deps hash acc = 140 | match deps with 141 | | [] -> acc 142 | | dep :: deps -> 143 | let () = remove_dependency hash dep in 144 | let isolated_nodes = find_isolated_nodes hash in 145 | let () = remove_nodes isolated_nodes hash in 146 | sorting_loop 147 | (List.append deps isolated_nodes) hash (List.append acc isolated_nodes) 148 | in 149 | let nodes_hash = graph_hash_of_list nodes in 150 | let _nodes = _add_missing_nodes nodes nodes_hash in 151 | let base_nodes = find_isolated_nodes nodes_hash in 152 | let () = remove_nodes base_nodes nodes_hash in 153 | let sorted_node_ids = sorting_loop base_nodes nodes_hash [] in 154 | let sorted_node_ids = List.append base_nodes sorted_node_ids in 155 | let remaining_ids = Compat.Hashtbl.list_keys nodes_hash in 156 | match remaining_ids with 157 | | [] -> Sorted sorted_node_ids 158 | | _ -> ErrorCycle remaining_ids 159 | 160 | (* Functions for dealing with cyclic graphs. *) 161 | 162 | let transpose tbl = 163 | let tbl2 = Hashtbl.create 100 in 164 | let init v = 165 | if not (Hashtbl.mem tbl2 v) then 166 | Hashtbl.add tbl2 v [] 167 | in 168 | Hashtbl.iter (fun u vl -> 169 | init u; 170 | List.iter (fun v -> 171 | let ul = 172 | try Hashtbl.find tbl2 v 173 | with Not_found -> [] 174 | in 175 | Hashtbl.replace tbl2 v (u :: ul) 176 | ) vl 177 | ) tbl; 178 | tbl2 179 | 180 | let _to_list tbl = 181 | Hashtbl.fold (fun u vl acc -> (u, vl) :: acc) tbl [] 182 | 183 | (* 184 | Sort the results of 'partition' so as to follow the original node 185 | ordering. If not for the user, it's useful for us for testing. 186 | *) 187 | let sort_partition graph_l clusters = 188 | let priority = Hashtbl.create 100 in 189 | let () = List.iteri (fun i (v, _) -> Hashtbl.replace priority v i) graph_l in 190 | let prio v = 191 | try Hashtbl.find priority v 192 | with Not_found -> assert false 193 | in 194 | let list_prio vl = 195 | match vl with 196 | | [] -> assert false 197 | | x :: _ -> prio x 198 | in 199 | let cmp u v = compare (prio u) (prio v) in 200 | let cmp_list ul vl = compare (list_prio ul) (list_prio vl) in 201 | List.map (fun l -> List.sort cmp l) clusters |> List.sort cmp_list 202 | 203 | (* 204 | Implementation of Kosaraju's algorithm for partitioning a graph into its 205 | strongly connected components. 206 | *) 207 | let partition graph_l = 208 | let graph = graph_hash_of_list graph_l in 209 | let graph_l = _add_missing_nodes graph_l graph in 210 | let tr_graph = transpose graph in 211 | let visits = Hashtbl.create 100 in 212 | let is_visited v = Hashtbl.mem visits v in 213 | let mark_visited v = Hashtbl.replace visits v () in 214 | let get_out_neighbors v = 215 | try Hashtbl.find graph v 216 | with Not_found -> assert false 217 | in 218 | let get_in_neighbors v = 219 | try Hashtbl.find tr_graph v 220 | with Not_found -> assert false 221 | in 222 | let rec visit acc v = 223 | if not (is_visited v) then 224 | begin 225 | mark_visited v; 226 | let out_neighbors = get_out_neighbors v in 227 | let acc = List.fold_left (fun acc u -> visit acc u) acc out_neighbors in 228 | v :: acc 229 | end 230 | else acc 231 | in 232 | let l = 233 | List.fold_left (fun acc (v, _vl) -> visit acc v) [] graph_l 234 | in 235 | let assignments = Hashtbl.create 100 in 236 | let is_assigned v = Hashtbl.mem assignments v in 237 | let rec assign v root = 238 | if not (is_assigned v) then begin 239 | Hashtbl.replace assignments v root; 240 | let in_neighbors = get_in_neighbors v in 241 | List.iter (fun u -> assign u root) in_neighbors 242 | end 243 | in 244 | let () = List.iter (fun v -> assign v v) l 245 | (* end Kosaraju's algorithm *) 246 | in 247 | let clusters = Hashtbl.create 100 in 248 | let () = Hashtbl.iter (fun v id -> 249 | let members = 250 | try Hashtbl.find clusters id 251 | with Not_found -> [] 252 | in 253 | Hashtbl.replace clusters id (v :: members) 254 | ) assignments 255 | in 256 | let partition = Hashtbl.fold (fun _id members acc -> members :: acc) clusters [] in 257 | graph_l, sort_partition graph_l partition 258 | 259 | let find_strongly_connected_components graph_l = 260 | let _completed_graph_l, components = partition graph_l in 261 | components 262 | 263 | (* 264 | Algorithm: 265 | 1. Identify the strongly-connected components of the input graph. 266 | 2. Derive a DAG from merging the nodes within each component 267 | (condensation). 268 | 3. Topologically-sort that DAG. 269 | 4. Re-expand the nodes representing components into the original nodes. 270 | *) 271 | let sort_strongly_connected_components graph_l = 272 | let graph_l, components = partition graph_l in 273 | let index = Hashtbl.create 100 in 274 | let rev_index = Hashtbl.create 100 in 275 | List.iteri (fun id comp -> 276 | List.iter (fun v -> 277 | Hashtbl.add index v id; 278 | Hashtbl.add rev_index id comp 279 | ) comp 280 | ) components; 281 | 282 | let get_comp_id v = 283 | try Hashtbl.find index v 284 | with Not_found -> assert false 285 | in 286 | let get_comp_members id = 287 | try Hashtbl.find rev_index id 288 | with Not_found -> assert false 289 | in 290 | let condensation = 291 | let tbl = Hashtbl.create 100 in 292 | List.iter (fun (u, vl) -> 293 | let id = get_comp_id u in 294 | let idl0 = 295 | try Hashtbl.find tbl id 296 | with Not_found -> [] 297 | in 298 | let idl = List.map get_comp_id vl @ idl0 in 299 | Hashtbl.replace tbl id idl 300 | ) graph_l; 301 | Hashtbl.fold (fun id idl acc -> 302 | (* Remove v->v edges because they are not supported by tsort. 303 | Duplicates seem ok. *) 304 | let filtered = List.filter ((<>) id) idl in 305 | (id, filtered) :: acc 306 | ) tbl [] 307 | in 308 | let sorted_components = 309 | match sort condensation with 310 | | Sorted comp_ids -> List.map get_comp_members comp_ids 311 | | ErrorCycle _ -> 312 | (* Shouldn't happen if graph partioning etc. work correctly. *) 313 | failwith "ocaml-tsort internal error: sorting strongly connected components failed. Please report a bug." 314 | in 315 | sorted_components 316 | 317 | let find_dependencies graph node = 318 | let rec aux graph node = 319 | let deps = List.assoc node graph in 320 | List.concat (List.map (aux graph) deps) |> List.append deps 321 | in 322 | let graph_hash = graph_hash_of_list graph in 323 | let graph = _add_missing_nodes graph graph_hash in 324 | aux graph node |> deduplicate 325 | -------------------------------------------------------------------------------- /src/lib/tsort.mli: -------------------------------------------------------------------------------- 1 | (** 2 | Topological sort 3 | *) 4 | 5 | type 'a sort_result = 6 | | Sorted of 'a list 7 | | ErrorCycle of 'a list 8 | 9 | (** Perform a normal topological sort on a directed acyclic graph (DAG). 10 | 11 | The result is in "dependency order", i.e. if there's an edge from 12 | A to B, then B comes first. For example, 13 | [sort [1, [2]; 2, []]] returns [[2; 1]]. 14 | 15 | If your graph may contain legitimate cycles, consider using 16 | [sort_strongly_connected_components] instead. 17 | 18 | Missing nodes such as node 2 in graph [[1, [2]]] are automatically added, 19 | resulting in the graph [[1, [2]; 2, []]]. If this is undesirable, 20 | consider running [find_nonexistent_nodes] on the input graph. 21 | *) 22 | val sort : ('a * 'a list) list -> 'a sort_result 23 | 24 | (** Perform a topological sort on a directed graph that may have cycles. 25 | Uses [find_strongly_connected_components] and [sort]. 26 | 27 | Like with [sort], missing nodes are silently added to the graph. 28 | 29 | For example, [find_strongly_connected_components 30 | ["A", ["B"]; "B", ["C"]; "C", ["B"; "D"]]] returns 31 | [["D"]; ["B"; "C"]; ["A"]]. 32 | *) 33 | val sort_strongly_connected_components : ('a * 'a list) list -> 'a list list 34 | 35 | (** Report nodes that have non-existent dependencies. 36 | This is useful for detecting user-entry errors, 37 | since the other functions of the module silently add missing nodes to 38 | the graph. 39 | 40 | The result is an assoc list where the keys are nodes with bad dependencies, 41 | and values are lists of nodes not found among the original assoc list keys. 42 | 43 | For example, [find_nonexistent_nodes ["test", ["biuld"]; "build", []]] 44 | returns ["test", ["biuld"]]. 45 | *) 46 | val find_nonexistent_nodes : ('a * 'a list) list -> ('a * 'a list) list 47 | 48 | (** Add nodes that are specified as a dependencies but not present in the graph. 49 | 50 | The assumption is that in many cases those nodes are 51 | simply nodes with no dependencies that are not mentioned explicitly. 52 | 53 | For example: [add_missing_nodes ["test", ["build"]] 54 | returns [["test", ["build"], "build", []]]. 55 | *) 56 | val add_missing_nodes : ('a * 'a list) list -> ('a * 'a list) list 57 | 58 | (** 59 | Partition a graph into its strongly-connected components: 60 | Two vertices u, v belong to the same component iff there's a path from 61 | u to v and there's a path from v to u. 62 | 63 | See https://en.wikipedia.org/wiki/Strongly_connected_component 64 | 65 | The current implementation uses the Kosaraju-Sharir algorithm, 66 | which is described at 67 | https://en.wikipedia.org/wiki/Kosaraju%27s_algorithm 68 | 69 | The theoretical complexity of the Kosaraju-Sharir algorithm is 70 | O(n) = O(|V|+|E|) but due to the use of resizable hash tables and a final 71 | sorting pass, the complexity of this implementation is O(n log n). 72 | *) 73 | val find_strongly_connected_components : ('a * 'a list) list -> 'a list list 74 | 75 | (** Find dependencies of a given node (direct and transitive). 76 | 77 | Missing nodes are silently added to the graph. 78 | 79 | This function is safe to use on graph with dependency cycles: 80 | it just lists everything that is specified as a dependency. 81 | *) 82 | val find_dependencies : ('a * 'a list) list -> 'a -> 'a list 83 | -------------------------------------------------------------------------------- /src/test/dune: -------------------------------------------------------------------------------- 1 | (test 2 | (name test_tsort) 3 | (libraries alcotest tsort)) 4 | -------------------------------------------------------------------------------- /src/test/test_tsort.ml: -------------------------------------------------------------------------------- 1 | (* 2 | Test suite and entry point of the test executable. 3 | *) 4 | 5 | open Printf 6 | 7 | let fmt_list to_string l = 8 | sprintf "[%s]" (l |> List.map to_string |> String.concat " ") 9 | 10 | let fmt_graph l = 11 | sprintf "[%s]" 12 | (l 13 | |> List.map (fun (v, vl) -> sprintf "%i->%s" 14 | v (fmt_list string_of_int vl)) 15 | |> String.concat " ") 16 | 17 | let fmt_partition l = 18 | fmt_list (fmt_list string_of_int) l 19 | 20 | let fmt_tsort_result res = 21 | match res with 22 | | Tsort.Sorted l -> 23 | sprintf "Sorted %s" (fmt_list string_of_int l) 24 | | Tsort.ErrorCycle l -> 25 | sprintf "ErrorCycle %s" (fmt_list string_of_int l) 26 | 27 | let test_find_nonexistent_nodes () = 28 | (* basic functionality *) 29 | assert ( 30 | (Tsort.find_nonexistent_nodes 31 | [1, []; 2, [3]] 32 | ) 33 | = [2, [3]] 34 | ); 35 | (* check edge filtering *) 36 | assert ( 37 | (Tsort.find_nonexistent_nodes 38 | [1, []; 2, [3]; 4, [5; 6; 7; 5]; 6, [2]] 39 | ) 40 | = [2, [3]; 4, [5; 7]] 41 | ) 42 | 43 | let test_tsort () = 44 | let sort graph = 45 | printf "input: %s\n%!" (fmt_graph graph); 46 | let res = Tsort.sort graph in 47 | printf "output: %s\n%!" (fmt_tsort_result res); 48 | res 49 | in 50 | assert (sort [] = Sorted []); 51 | assert ( 52 | sort [ 53 | 1, [2]; 54 | 2, [3; 4]; 55 | 3, [4; 5]; 56 | 4, [6]; 57 | 5, [6]; 58 | 6, [7]; 59 | 7, []; 60 | ] 61 | = 62 | (* Multiple solutions are valid. This is the one returned by the current 63 | implementation. *) 64 | Sorted [7; 6; 4; 5; 3; 2; 1] 65 | ); 66 | (* Tolerate missing nodes. *) 67 | assert ( 68 | sort [1, [2]; 2, [3] (* 3 is missing *)] = Sorted [3; 2; 1] 69 | ); 70 | (* Tolerate partial dependency lists. *) 71 | assert ( 72 | sort [1, []; 2, []; 3, [2]; 3, [1]] = Sorted [1; 2; 3] 73 | ) 74 | 75 | let test_find_sc_components () = 76 | let p graph = 77 | printf "input: %s\n%!" (fmt_graph graph); 78 | let partition = Tsort.find_strongly_connected_components graph in 79 | printf "output: %s\n%!" (fmt_partition partition); 80 | partition 81 | in 82 | assert (p [] = []); 83 | assert (p [0, []] = [[0]]); 84 | 85 | (* tolerate duplicate node entries *) 86 | assert (p [0, [1]; 0, [2]; 1, []; 2, []] = [[0]; [1]; [2]]); 87 | 88 | (* tolerate missing node entries *) 89 | assert (p [0, [1; 2]] = [[0]; [1]; [2]]); 90 | 91 | (* sort result according to original order *) 92 | assert ( 93 | p [ 94 | 2, []; 95 | 0, [2]; 96 | 1, [0] 97 | ] 98 | = [[2]; [0]; [1]] 99 | ); 100 | 101 | assert ( 102 | p [ 103 | 1, [2]; 104 | 2, [3; 4]; 105 | 3, [4]; 106 | 4, [2; 5] 107 | ] 108 | = [[1]; [2; 3; 4]; [5]] 109 | ) 110 | 111 | let test_sort_sc_components () = 112 | let sort graph = 113 | printf "input: %s\n%!" (fmt_graph graph); 114 | let components = Tsort.sort_strongly_connected_components graph in 115 | printf "output: %s\n%!" (fmt_partition components); 116 | components 117 | in 118 | assert (sort [] = []); 119 | assert (sort [0,[]] = [[0]]); 120 | assert ( 121 | sort [ 122 | 1, [2]; 123 | 2, [3; 4]; 124 | 3, [4]; 125 | 4, [2; 5] 126 | ] 127 | = [[5]; [2; 3; 4]; [1]] 128 | ) 129 | 130 | let test_find_dependencies () = 131 | let graph = [1, [2; 3]; 2, [3; 5]; 3, [5; 8]] in 132 | assert( 133 | (Tsort.find_dependencies graph 1 |> List.sort compare) = 134 | [2; 3; 5; 8] 135 | ); 136 | assert( 137 | (Tsort.find_dependencies graph 2 |> List.sort compare) = 138 | [3; 5; 8] 139 | ); 140 | assert((Tsort.find_dependencies graph 8) = []) 141 | 142 | let test_add_missing_nodes () = 143 | let graph = [1, [2]] in 144 | assert((Tsort.add_missing_nodes graph) = [1, [2]; 2, []]) 145 | 146 | let main () = 147 | Alcotest.run "Tsort" [ 148 | "Tsort", [ 149 | "find nonexistent nodes", `Quick, test_find_nonexistent_nodes; 150 | "sort", `Quick, test_tsort; 151 | "find s.c. components", `Quick, test_find_sc_components; 152 | "sort s.c. components", `Quick, test_sort_sc_components; 153 | "find dependencies", `Quick, test_find_dependencies; 154 | "add_missing_nodes", `Quick, test_add_missing_nodes; 155 | ]; 156 | ] 157 | 158 | let () = main () 159 | -------------------------------------------------------------------------------- /tsort.opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | version: "2.2.0" 3 | maintainer: "daniil@baturin.org" 4 | authors: ["Daniil Baturin "] 5 | homepage: "https://github.com/dmbaturin/ocaml-tsort" 6 | bug-reports: "https://github.com/dmbaturin/ocaml-tsort/issues" 7 | dev-repo: "git+https://github.com/dmbaturin/ocaml-tsort.git" 8 | license: "MIT" 9 | x-maintenance-intent: ["(latest)"] 10 | build: [ 11 | ["dune" "build" 12 | "-p" 13 | name 14 | "-j" jobs 15 | "@install" 16 | "@runtest" {with-test}] 17 | ] 18 | depends: [ 19 | "ocaml" {>= "4.03.0"} 20 | "dune" {>= "1.9"} 21 | "alcotest" {with-test} 22 | ] 23 | synopsis: "Easy to use and user-friendly topological sort" 24 | description: """ 25 | Easy to use and user-friendly topological sort. 26 | 27 | Example: 28 | ``` 29 | Tsort.sort [("foundation", []); ("walls", ["foundation"]); ("roof", ["walls"])] 30 | ``` 31 | """ 32 | --------------------------------------------------------------------------------