├── src ├── .gitignore ├── dune └── dutop.ml ├── test ├── .gitignore ├── dune ├── test.expected.out └── test ├── dune-project ├── .gitignore ├── Makefile ├── dutop.opam ├── .ocp-indent ├── LICENSE └── README.md /src/.gitignore: -------------------------------------------------------------------------------- 1 | .merlin 2 | -------------------------------------------------------------------------------- /test/.gitignore: -------------------------------------------------------------------------------- 1 | root 2 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 1.0) 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | *~ 3 | *.install 4 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | (executables 2 | (names dutop) 3 | (public_names dutop) 4 | (libraries unix) 5 | (package dutop)) 6 | -------------------------------------------------------------------------------- /test/dune: -------------------------------------------------------------------------------- 1 | (rule 2 | (targets test.out) 3 | (action (with-stdout-to test.out (run ./test %{bin:dutop})))) 4 | 5 | (alias 6 | (name runtest) 7 | (package dutop) 8 | (action (diff test.expected.out test.out))) 9 | -------------------------------------------------------------------------------- /test/test.expected.out: -------------------------------------------------------------------------------- 1 | 33.3% 1 root/b 2 | 33.3% 1 root/sub/c 3 | d 33.3% 1 root/sub 4 | 33.3% 1 root/a 5 | 33.3% 1 root/b2 [root/b] 6 | d 100.0% 3 root 7 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: default build install uninstall test clean 2 | 3 | default: build 4 | 5 | build: 6 | dune build 7 | 8 | test: 9 | dune runtest -f 10 | 11 | install: 12 | dune install 13 | 14 | uninstall: 15 | dune uninstall 16 | 17 | clean: 18 | dune clean 19 | # Optionally, remove all files/folders ignored by git as defined 20 | # in .gitignore (-X). 21 | git clean -dfXq 22 | -------------------------------------------------------------------------------- /dutop.opam: -------------------------------------------------------------------------------- 1 | opam-version: "1.2" 2 | maintainer: "martin@mjambon.com" 3 | authors: ["Martin Jambon"] 4 | homepage: "https://github.com/mjambon/dutop" 5 | bug-reports: "https://github.com/mjambon/dutop/issues" 6 | dev-repo: "https://github.com/mjambon/dutop.git" 7 | 8 | build: [ 9 | ["dune" "subst"] {pinned} 10 | ["dune" "build" "-p" name "-j" jobs] 11 | ] 12 | 13 | build-test: ["dune" "runtest" "-p" name] 14 | 15 | depends: [ 16 | "dune" {build} 17 | ] 18 | -------------------------------------------------------------------------------- /test/test: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Create a sample file hierarchy and run dutop on it. 4 | # 5 | # Usage: ./test [path to dutop executable] 6 | # 7 | 8 | set -eu 9 | 10 | dutop="${1:-dutop}" 11 | 12 | rm -rf root 13 | mkdir -p root 14 | ( 15 | # Create a tree with 3 regular files containing 1 byte each. 16 | cd root 17 | 18 | # Regular files 19 | echo -n x > a 20 | echo -n x > b 21 | 22 | # Symbolic link 23 | ln -s a a2 24 | 25 | # Hard link 26 | ln b b2 27 | 28 | # Sub-directory 29 | mkdir -p sub 30 | echo -n x > sub/c 31 | ) 32 | 33 | "$dutop" root 34 | -------------------------------------------------------------------------------- /.ocp-indent: -------------------------------------------------------------------------------- 1 | # See https://github.com/OCamlPro/ocp-indent/blob/master/.ocp-indent for more 2 | 3 | # Indent for clauses inside a pattern-match (after the arrow): 4 | # match foo with 5 | # | _ -> 6 | # ^^^^bar 7 | # the default is 2, which aligns the pattern and the expression 8 | match_clause = 4 9 | 10 | # When nesting expressions on the same line, their indentation are in 11 | # some cases stacked, so that it remains correct if you close them one 12 | # at a line. This may lead to large indents in complex code though, so 13 | # this parameter can be used to set a maximum value. Note that it only 14 | # affects indentation after function arrows and opening parens at end 15 | # of line. 16 | # 17 | # for example (left: `none`; right: `4`) 18 | # let f = g (h (i (fun x -> # let f = g (h (i (fun x -> 19 | # x) # x) 20 | # ) # ) 21 | # ) # ) 22 | max_indent = 2 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 MyLife 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | 3. The name of the author may not be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 | IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | dutop - disk usage top 2 | == 3 | 4 | dutop is a command-line utility that scans the file system from a given 5 | root path and reports any the file or directory that occupies 6 | more than 5% of the space. It answers the question: 7 | 8 | > Which single file or directory should I remove to reclaim significant 9 | > storage space? 10 | 11 | Example 12 | -- 13 | 14 | Here's how to get a sense of what takes the most space in the `/usr` 15 | directory: 16 | 17 | ``` 18 | $ dutop /usr 19 | d 5.2% 429,055,696 /usr/local/lib/python3.5/dist-packages 20 | d 5.2% 429,055,696 /usr/local/lib/python3.5 21 | d 5.6% 461,789,323 /usr/lib/ghc 22 | d 5.7% 473,685,763 /usr/share/doc 23 | d 6.6% 549,563,745 /usr/bin 24 | d 9.0% 743,990,153 /usr/local/lib 25 | d 11.7% 972,273,271 /usr/local 26 | d 16.9% 1,401,349,064 /usr/lib/x86_64-linux-gnu 27 | d 27.7% 2,301,009,747 /usr/share 28 | d 49.0% 4,070,197,838 /usr/lib 29 | d 100.0% 8,309,946,131 /usr 30 | ``` 31 | 32 | The output is brief, since only any object representing at least 5% of 33 | the total is shown. Compare that to `du /usr` which here produces 34 | 44,476 lines of output due to the large number of files. The closest 35 | standard command would be `du -s`, which gives us the following: 36 | 37 | ``` 38 | $ du -s /usr/* 39 | 540568 /usr/bin 40 | 772 /usr/games 41 | 187364 /usr/include 42 | 4061168 /usr/lib 43 | 16880 /usr/lib32 44 | 17568 /usr/libx32 45 | 1110684 /usr/local # lacks details 46 | 20 /usr/locale # too small to be of interest 47 | 41052 /usr/sbin 48 | 2649668 /usr/share 49 | 370756 /usr/src 50 | ``` 51 | 52 | The granularity of `dutop` can be adjusted. For example, we can set it 53 | to 3%: 54 | 55 | ``` 56 | $ dutop -m 0.03 /usr 57 | d 3.0% 253,064,866 /usr/local/lib/node_modules 58 | d 3.1% 260,450,536 /usr/lib/jvm 59 | d 4.4% 369,531,237 /usr/share/doc/texlive-doc 60 | d 5.2% 429,055,696 /usr/local/lib/python3.5/dist-packages 61 | d 5.2% 429,055,696 /usr/local/lib/python3.5 62 | d 5.6% 461,789,323 /usr/lib/ghc 63 | d 5.7% 473,685,763 /usr/share/doc 64 | d 6.6% 549,563,745 /usr/bin 65 | d 9.0% 743,990,153 /usr/local/lib 66 | d 11.7% 972,273,271 /usr/local 67 | d 16.9% 1,401,349,064 /usr/lib/x86_64-linux-gnu 68 | d 27.7% 2,301,009,747 /usr/share 69 | d 49.0% 4,070,197,838 /usr/lib 70 | d 100.0% 8,309,946,131 /usr 71 | ``` 72 | 73 | Installation 74 | -- 75 | 76 | Requires a standard installation of OCaml and Dune. 77 | 78 | ``` 79 | $ make 80 | $ make install 81 | ``` 82 | 83 | Uninstallation: 84 | 85 | ``` 86 | $ make uninstall 87 | ``` 88 | 89 | Contributors 90 | -- 91 | 92 | Dutop was designed and created by Martin Jambon. 93 | This is free software developed by volunteers. 94 | Help is always appreciated. 95 | -------------------------------------------------------------------------------- /src/dutop.ml: -------------------------------------------------------------------------------- 1 | (* ocamlopt -o dutop unix.cmxa dutop.ml *) 2 | 3 | open Printf 4 | open Unix.LargeFile 5 | 6 | let version = "1.1.0" 7 | let debug = ref false 8 | 9 | type path = string 10 | type kind = File | Dir 11 | 12 | type info = { 13 | path : path; 14 | inode : int * int; (* device number, inode number *) 15 | kind : kind; 16 | child_paths : path list; 17 | size : int64; 18 | } 19 | 20 | type node = { 21 | info : info; 22 | child_nodes : node list; 23 | cumulated_size : int64; 24 | hard_link : string option; 25 | } 26 | 27 | type sort_by = Name | Size 28 | 29 | let warn = ref false 30 | 31 | let warning s = 32 | eprintf "Warning: %s\n%!" s 33 | 34 | let lookup ~follow ~root_device path = 35 | try 36 | let x = 37 | try 38 | if follow then 39 | stat path 40 | else 41 | lstat path 42 | with e -> 43 | if !warn then 44 | warning (sprintf "Cannot access info on file %S: %s" 45 | path (Printexc.to_string e)); 46 | raise Exit 47 | in 48 | let device = x.st_dev in 49 | (match root_device with 50 | | Some dev when dev <> device -> raise Exit 51 | | _ -> () 52 | ); 53 | let inode = (device, x.st_ino) in 54 | let kind = 55 | match x.st_kind with 56 | Unix.S_DIR -> Dir 57 | | Unix.S_REG -> File 58 | | Unix.S_LNK -> 59 | raise Exit 60 | | _ -> 61 | if !warn then 62 | warning (sprintf "Ignoring special file %s" path); 63 | raise Exit 64 | in 65 | let size, child_paths = 66 | match kind with 67 | Dir -> 68 | let a = 69 | try Sys.readdir path 70 | with e -> 71 | if !warn then 72 | warning (sprintf "Cannot read directory %S: %s" 73 | path (Printexc.to_string e)); 74 | raise Exit 75 | in 76 | let children = 77 | Array.fold_right 78 | (fun name acc -> Filename.concat path name :: acc) 79 | a [] 80 | in 81 | 0L, children 82 | 83 | | File -> 84 | x.st_size, [] 85 | in 86 | Some { path; inode; kind; child_paths; size } 87 | with _ -> None 88 | 89 | let get_root_device ~follow path = 90 | match lookup ~follow ~root_device:None path with 91 | | Some info -> 92 | let device, _inode = info.inode in 93 | Some device 94 | | None -> 95 | None 96 | 97 | let rec scan_filesystem ?(follow = false) ~root_device inodes path = 98 | match lookup ~follow ~root_device path with 99 | | Some info -> 100 | let hard_link = 101 | try 102 | Some (Hashtbl.find inodes info.inode) 103 | with Not_found -> 104 | Hashtbl.add inodes info.inode path; 105 | None 106 | in 107 | let child_nodes = 108 | List.fold_left ( 109 | fun acc path -> 110 | match scan_filesystem ~root_device inodes path with 111 | | None -> acc 112 | | Some x -> x :: acc 113 | ) [] info.child_paths 114 | in 115 | let cumulated_size = 116 | List.fold_left ( 117 | fun acc x -> 118 | if x.hard_link = None then 119 | Int64.add acc x.cumulated_size 120 | else 121 | acc 122 | ) info.size child_nodes 123 | in 124 | Some { info; child_nodes; cumulated_size; hard_link } 125 | 126 | | None -> None 127 | 128 | let rec select_big_nodes min_size acc node = 129 | List.fold_left ( 130 | fun acc x -> 131 | if x.cumulated_size >= min_size then 132 | select_big_nodes min_size (x :: acc) x 133 | else 134 | acc 135 | ) acc node.child_nodes 136 | 137 | let get_selection ~deref_root:follow ~ignore_mounts min_fraction root_path = 138 | let root_device = 139 | match ignore_mounts with 140 | | true -> 141 | get_root_device ~follow root_path 142 | | false -> 143 | None 144 | in 145 | match 146 | scan_filesystem 147 | ~follow 148 | ~root_device 149 | (Hashtbl.create 10000) root_path 150 | with 151 | None -> 0L, [] 152 | | Some x -> 153 | let total_size = x.cumulated_size in 154 | let min_size = 155 | Int64.of_float (ceil (min_fraction *. (Int64.to_float total_size))) 156 | in 157 | total_size, select_big_nodes min_size [x] x 158 | 159 | 160 | let list_of_string s = 161 | let l = ref [] in 162 | String.iter (fun c -> l := c :: !l) s; 163 | List.rev !l 164 | 165 | let string_of_list l = 166 | let n = List.length l in 167 | let s = Bytes.create n in 168 | let l = ref l in 169 | for i = 0 to n - 1 do 170 | Bytes.set s i (List.hd !l); 171 | l := List.tl !l; 172 | done; 173 | Bytes.to_string s 174 | 175 | let comma_string_of_int64 x = 176 | assert (x >= 0L); 177 | let l = list_of_string (Int64.to_string x) in 178 | let rec insert = function 179 | a :: b :: c :: (_ :: _ as l) -> a :: b :: c :: ',' :: insert l 180 | | l -> l 181 | in 182 | string_of_list (List.rev (insert (List.rev l))) 183 | 184 | 185 | let print_info_line total_size x = 186 | let r = 187 | if total_size > 0L then 188 | Int64.to_float x.cumulated_size /. Int64.to_float total_size 189 | else 1. 190 | in 191 | printf "%c %5.1f%% %21s %s%s\n" 192 | (match x.info.kind with Dir -> 'd' | File -> ' ') 193 | (100. *. r) 194 | (comma_string_of_int64 x.cumulated_size) 195 | x.info.path 196 | (match x.hard_link with 197 | None -> "" 198 | | Some s -> sprintf " [%s]" s) 199 | 200 | let run bare deref_root ignore_mounts min_fraction reverse sort_by root_path = 201 | let total_size, selection = 202 | get_selection ~deref_root ~ignore_mounts min_fraction root_path in 203 | let cmp = 204 | match sort_by with 205 | Name -> 206 | (fun a b -> String.compare a.info.path b.info.path) 207 | | Size -> 208 | (fun a b -> Int64.compare a.cumulated_size b.cumulated_size) 209 | in 210 | let cmp = if reverse then (fun a b -> cmp b a) else cmp in 211 | let l = List.sort cmp selection in 212 | let print = 213 | if bare then 214 | (fun x -> printf "%s\n" x.info.path) 215 | else 216 | (print_info_line total_size) 217 | in 218 | List.iter print l 219 | 220 | let main () = 221 | let bare = ref false in 222 | let deref_root = ref true in 223 | let ignore_mounts = ref false in 224 | let min_fraction = ref 0.05 in 225 | let reverse = ref false in 226 | let sort_by = ref Size in 227 | let root = ref "." in 228 | let anon_fun s = root := s in 229 | let options = [ 230 | "-b", Arg.Set bare, 231 | " 232 | Bare output, i.e. the output consists only in file paths, 233 | one per line."; 234 | 235 | "-d", Arg.Clear deref_root, 236 | " 237 | Do not follow the link if the root path is a symbolic link. 238 | The default behavior is to dereference the root path. 239 | Other symlinks than the root are never dereferenced regardless 240 | of this setting."; 241 | 242 | "-i", Arg.Set ignore_mounts, 243 | " 244 | Ignore files located on other devices than the root path."; 245 | 246 | "-m", Arg.Set_float min_fraction, 247 | " 248 | Set the minimum fraction of the total size for a node to be reported. 249 | The default is 0.05, i.e. only files and directories that use 250 | 5% of the space are reported."; 251 | 252 | "-p", Arg.Unit (fun () -> sort_by := Name), 253 | " 254 | Sort alphabetically by path."; 255 | 256 | "-r", Arg.Set reverse, 257 | " 258 | Reverse sort."; 259 | 260 | "-s", Arg.Unit (fun () -> sort_by := Size), 261 | " 262 | Sort by increasing size (default)."; 263 | 264 | "-version", 265 | Arg.Unit (fun () -> print_endline version; exit 0), 266 | " 267 | Print program's version and exit."; 268 | 269 | "-w", 270 | Arg.Set warn, 271 | " 272 | Warn against unreadable or missing files."; 273 | ] 274 | in 275 | let usage_msg = 276 | sprintf "\ 277 | Usage: %s [PATH] 278 | %s reports all directories and regular files that use at least 5%% of the 279 | total space. 280 | " 281 | Sys.argv.(0) Sys.argv.(0) 282 | in 283 | Arg.parse options anon_fun usage_msg; 284 | run !bare !deref_root !ignore_mounts !min_fraction !reverse !sort_by !root 285 | 286 | let () = main () 287 | --------------------------------------------------------------------------------