├── .gitignore ├── .travis.yml ├── ChangeLog ├── Guardfile ├── README.md ├── config └── config.exs ├── lib └── dir_walker.ex ├── mix.exs └── test ├── dir ├── a.txt ├── b.txt ├── badlink ├── c │ └── d │ │ └── f.txt └── goodlink ├── dir_walker_test.exs ├── dirlink └── test_helper.exs /.gitignore: -------------------------------------------------------------------------------- 1 | /_build 2 | /deps 3 | erl_crash.dump 4 | *.ez 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: erlang 2 | env: 3 | - ELIXIR="v1.0.0" 4 | otp_release: 5 | - 17.1 6 | before_install: 7 | - mkdir -p vendor/elixir 8 | - wget -q https://github.com/elixir-lang/elixir/releases/download/$ELIXIR/Precompiled.zip && unzip -qq Precompiled.zip -d vendor/elixir 9 | - export PATH="$PATH:$PWD/vendor/elixir/bin" 10 | - mix local.hex --force 11 | script: "MIX_ENV=test mix do deps.get, test" 12 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | 2014-03-30 Dave Thomas 2 | 3 | * Fix issue with missing directory in tests. Based on a PR 4 | from Low Kian Seong (lowks) 5 | 6 | 2014-10-18 Dave Thomas 7 | 8 | * Add `matching:` option to filter names of returned files. 9 | 10 | 2014-10-14 Booker Bense 11 | * :include_stats option turns off following 12 | symlinks 13 | 14 | 2014-10-12 Booker Bense 15 | * Added support for detecting symlinks 16 | * Tweaks to tests to work around git not storing 17 | empty directories. 18 | 19 | 20 | 2014-10-04 Dave Thomas 21 | 22 | * Add options to 23 | * include File.Stat with each file name 24 | * include directory names in the list returned 25 | 26 | 2014-10-03 Booker Bense 27 | 28 | * Add a Stream API 29 | 30 | 2014-09-18 Dave Thomas 31 | 32 | * lib/dir_walker.ex: Ignore special files. 33 | Skip direcories and files that we have no access to. 34 | 35 | * mix.exs: bump to 0.0.3 36 | 37 | -------------------------------------------------------------------------------- /Guardfile: -------------------------------------------------------------------------------- 1 | # -*- ruby -*- 2 | guard :shell do 3 | interactor :off 4 | notification :emacs 5 | watch(/^(lib|test).*\.exs?$/) do |f| 6 | `mix test >/dev/tty` 7 | if $?.success? 8 | Notifier.notify "Success", type: "success" 9 | else 10 | Notifier.notify "Failed", type: "failed" 11 | end 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | DirWalker 2 | ========= 3 | 4 | DirWalker lazily traverses one or more directory trees, depth first, 5 | returning successive file names. 6 | 7 | Initialize the walker using 8 | 9 | ```elixir 10 | {:ok, walker} = DirWalker.start_link(path, [, options ]) # or [path, path...] 11 | ``` 12 | 13 | Then return the next `n` path names using 14 | 15 | ```elixir 16 | paths = DirWalker.next(walker [, n \\ 1]) 17 | ``` 18 | 19 | Successive calls to `next` will return successive file names, until 20 | all file names have been returned. 21 | 22 | These methods have also been wrapped into a Stream resource. 23 | 24 | ```elixir 25 | paths = DirWalker.stream(path [, options]) # or [path,path...] 26 | ``` 27 | 28 | By default DirWalker will follow any symlinks found. With the `include_stat` 29 | option, it will instead simply return the `File.Stat` of the symlink 30 | and it is up to the calling code to handle symlinks. 31 | 32 | `options` is a map containing zero or more of: 33 | 34 | * `include_stat: true` 35 | 36 | Return tuples containing both the file name and the `File.Stat` 37 | structure for each file. This does not incur a performance penalty 38 | but obviously can use more memory. When this option is specified, 39 | DirWalker will not follow symlinks. 40 | 41 | * `include_dir_names: true` 42 | 43 | Include the names of directories that are traversed (normally just the names 44 | of regular files are returned). Note that the order is such that directory names 45 | will typically be returned after the names of files in those directories. 46 | 47 | * `matching:` _regex_ 48 | 49 | Only file names matching the regex will be returned. Does not affect 50 | directory traversals. 51 | -------------------------------------------------------------------------------- /config/config.exs: -------------------------------------------------------------------------------- 1 | use Mix.Config 2 | -------------------------------------------------------------------------------- /lib/dir_walker.ex: -------------------------------------------------------------------------------- 1 | defmodule DirWalker do 2 | 3 | @moduledoc Path.join([__DIR__, "../README.md"]) |> File.read! 4 | 5 | require Logger 6 | 7 | use GenServer 8 | 9 | def start_link(path, opts \\ %{}) 10 | 11 | def start_link(list_of_paths, opts) when is_list(list_of_paths) do 12 | mappers = setup_mappers(opts) 13 | GenServer.start_link(__MODULE__, {list_of_paths, mappers}) 14 | end 15 | 16 | def start_link(path, opts) when is_binary(path) do 17 | start_link([path], opts) 18 | end 19 | 20 | @doc """ 21 | Return the next _n_ files from the lists of files, recursing into 22 | directories if necessary. Return `nil` when there are no files 23 | to return. (If there are fewer than _n_ files remaining, just those 24 | files are returned, and `nil` will be returned on the next call. 25 | 26 | ## Example 27 | 28 | iex> {:ok,d} = DirWalker.start_link "." 29 | {:ok, #PID<0.83.0>} 30 | iex> DirWalker.next(d) 31 | ["./.gitignore"] 32 | iex> DirWalker.next(d) 33 | ["./_build/dev/lib/dir_walter/.compile.elixir"] 34 | iex> DirWalker.next(d, 3) 35 | ["./_build/dev/lib/dir_walter/ebin/Elixir.DirWalker.beam", 36 | "./_build/dev/lib/dir_walter/ebin/dir_walter.app", 37 | "./_build/dev/lib/dir_walter/.compile.lock"] 38 | iex> 39 | """ 40 | def next(iterator, n \\ 1) do 41 | GenServer.call(iterator, { :get_next, n }) 42 | end 43 | 44 | @doc """ 45 | Stops the DirWalker 46 | """ 47 | def stop(server) do 48 | GenServer.call(server, :stop) 49 | end 50 | 51 | @doc """ 52 | Implement a stream interface that will return a lazy enumerable. 53 | 54 | ## Example 55 | 56 | iex> first_file = DirWalker.stream("/") |> Enum.take(1) 57 | 58 | """ 59 | 60 | def stream(path_list, opts \\ %{}) do 61 | Stream.resource( fn -> 62 | {:ok, dirw} = DirWalker.start_link(path_list,opts) 63 | dirw 64 | end , 65 | fn(dirw) -> 66 | case DirWalker.next(dirw,1) do 67 | data when is_list(data) -> {data, dirw } 68 | _ -> {:halt, dirw} 69 | end 70 | end, 71 | fn(dirw) -> DirWalker.stop(dirw) end 72 | ) 73 | end 74 | 75 | ################## 76 | # Implementation # 77 | ################## 78 | 79 | def init(path_list) do 80 | { :ok, path_list } 81 | end 82 | 83 | def handle_call({:get_next, _n}, _from, state = {[], _}) do 84 | { :reply, nil, state} 85 | end 86 | 87 | def handle_call({:get_next, n}, _from, {path_list, mappers}) do 88 | {result, new_path_list} = first_n(path_list, n, mappers, _result=[]) 89 | return_result = 90 | case {result, new_path_list} do 91 | {[], []} -> nil 92 | _ -> result 93 | end 94 | { :reply, return_result, {new_path_list, mappers} } 95 | end 96 | 97 | def handle_call(:stop, from, state) do 98 | GenServer.reply(from, :ok ) 99 | {:stop, :normal, state} 100 | end 101 | 102 | 103 | # If the first element is a list, then it represents a 104 | # nested directory listing. We keep it as a list rather 105 | # than flatten it in order to keep performance up. 106 | 107 | defp first_n([ [] | rest ], n, mappers, result) do 108 | first_n(rest, n, mappers, result) 109 | end 110 | 111 | defp first_n([ [first] | rest ], n, mappers, result) do 112 | first_n([ first | rest ], n, mappers, result) 113 | end 114 | 115 | defp first_n([ [first | nested] | rest ], n, mappers, result) do 116 | first_n([ first | [ nested | rest ] ], n, mappers, result) 117 | end 118 | 119 | # Otherwise just a path as the first entry 120 | 121 | defp first_n(path_list, 0, _mappers, result), do: {result, path_list} 122 | defp first_n([], _n, _mappers, result), do: {result, []} 123 | 124 | defp first_n([ path | rest ], n, mappers, result) do 125 | # Should figure out a way to pass this in. 126 | time_opts = [time: :posix] 127 | 128 | # File.stat! blows up on dangling symlink, until File.lstat! is in elixir 129 | # add this workaround. 130 | lstat = :file.read_link_info(path, time_opts) 131 | stat = 132 | case lstat do 133 | {:ok , fileinfo } ->File.Stat.from_record(fileinfo) 134 | {:error, reason} -> 135 | raise File.Error, reason: reason, action: "read file stats", path: path 136 | end 137 | 138 | case stat.type do 139 | :directory -> 140 | first_n([files_in(path) | rest], 141 | n, 142 | mappers, 143 | mappers.include_dir_names.(mappers.include_stat.(path, stat), result)) 144 | 145 | :regular -> 146 | handle_regular_file(path,stat,rest,n,mappers,result) 147 | :symlink -> 148 | if(include_stat?(mappers)) do 149 | handle_regular_file(path,stat,rest,n,mappers,result) 150 | else 151 | handle_symlink(path,time_opts,rest,n,mappers,result) 152 | end 153 | _ -> 154 | first_n(rest, n, mappers, result) 155 | end 156 | end 157 | 158 | defp files_in(path) do 159 | path 160 | |> :file.list_dir 161 | |> ignore_error(path) 162 | |> Enum.map(fn(rel) -> Path.join(path, rel) end) 163 | end 164 | 165 | def ignore_error({:error, type}, path) do 166 | Logger.info("Ignore folder #{path} (#{type})") 167 | [] 168 | end 169 | 170 | def ignore_error({:ok, list}, _path), do: list 171 | 172 | # Notes on symlinks. 173 | #A symlink can be either 174 | # 175 | # A file 176 | # 177 | # A directory 178 | # 179 | # A dangling link 180 | # 181 | # Without any options, DirWalker returns a list of all the "files" in the paths. 182 | # For symlinks using File.stat! works on options 1,2 and blows up on 3. 183 | # file:read_link_info doesn't blow up on any of these, but requires the user to deal 184 | # with symlinks in some fashion. 185 | 186 | # I think the "right" thing to do is emulate the current behaviour, if the user 187 | # does not specify any options. If they specify :include_stat, then the code should 188 | # simply return a list and it's up to the user to deal. 189 | 190 | # It also might make sense to add an :ignore_symlinks, option.# 191 | 192 | defp handle_symlink(path,time_opts,rest,n,mappers,result) do 193 | rstat = File.stat(path,time_opts) 194 | case rstat do 195 | {:ok , rstat } -> 196 | handle_existing_symlink(path,rstat,rest,n,mappers,result) 197 | {:error, :enoent } -> 198 | Logger.info("Dangling symlink found: #{path}") 199 | handle_regular_file(path,rstat,rest,n,mappers,result) 200 | {:error, reason} -> 201 | Logger.info("Stat failed on #{path} with #{reason}") 202 | { result, [] } 203 | end 204 | end 205 | 206 | # This emulates existing behaviour, but does not return just the symlink 207 | # when include_stat is set. 208 | defp handle_existing_symlink(path,stat,rest,n,mappers,result) do 209 | case stat.type do 210 | :directory -> 211 | first_n([files_in(path) | rest], 212 | n, 213 | mappers, 214 | mappers.include_dir_names.(mappers.include_stat.(path, stat), result)) 215 | :regular -> 216 | handle_regular_file(path,stat,rest,n,mappers,result) 217 | true -> 218 | first_n(rest, n-1, mappers, [ result ]) 219 | end 220 | 221 | end 222 | 223 | # Extract this into function since we need it multiple places. 224 | defp handle_regular_file(path,stat,rest,n,mappers,result) do 225 | if mappers.matching.(path) do 226 | first_n(rest, n-1, mappers, [ mappers.include_stat.(path, stat) | result ]) 227 | else 228 | first_n(rest, n, mappers, result) 229 | end 230 | end 231 | 232 | defp include_stat?(mappers) do 233 | mappers.include_stat.(:a, :b) == {:a, :b} 234 | end 235 | 236 | defp setup_mappers(opts) do 237 | %{ 238 | include_stat: 239 | one_of(opts[:include_stat], 240 | fn (path, _stat) -> path end, 241 | fn (path, stat) -> {path, stat} end), 242 | 243 | include_dir_names: 244 | one_of(opts[:include_dir_names], 245 | fn (_path, result) -> result end, 246 | fn (path, result) -> [ path | result ] end), 247 | matching: 248 | one_of(!!opts[:matching], 249 | fn _path -> true end, 250 | fn path -> String.match?(path, opts[:matching]) end), 251 | } 252 | end 253 | 254 | defp one_of(bool, _if_false, if_true) when bool, do: if_true 255 | defp one_of(_bool, if_false, _if_true), do: if_false 256 | end 257 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule DirWalker.Mixfile do 2 | use Mix.Project 3 | 4 | @moduledoc """ 5 | DirWalker lazily traverses one or more directory trees, depth first, 6 | returning successive file names. Provides both a `next()` and 7 | a Stream-based API. 8 | 9 | Directory names may optionally be returned. The File.Stat structure 10 | associated with the file name may also optionally be returned. 11 | """ 12 | 13 | def project do 14 | [ 15 | app: :dir_walker, 16 | version: "0.0.8", 17 | elixir: ">= 1.5.0", 18 | deps: deps(), 19 | description: @moduledoc, 20 | package: package() 21 | ] 22 | end 23 | 24 | def application do 25 | [applications: [:logger]] 26 | end 27 | 28 | defp package do 29 | [ 30 | files: [ 31 | "lib", 32 | "mix.exs", 33 | "README.md" 34 | ], 35 | contributors: [ 36 | "Dave Thomas ", 37 | "Booker C. Bense " 38 | ], 39 | licenses: [ 40 | "Same as Elixir" 41 | ], 42 | links: %{ 43 | "GitHub" => "https://github.com/pragdave/dir_walker" 44 | }, 45 | ] 46 | end 47 | 48 | def deps do 49 | [ 50 | {:ex_doc, "~> 0.5", only: :dev}, 51 | ] 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /test/dir/a.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pragdave/dir_walker/d2df87320fdfdc4b3b0f5bbba63d9e794c13f649/test/dir/a.txt -------------------------------------------------------------------------------- /test/dir/b.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pragdave/dir_walker/d2df87320fdfdc4b3b0f5bbba63d9e794c13f649/test/dir/b.txt -------------------------------------------------------------------------------- /test/dir/badlink: -------------------------------------------------------------------------------- 1 | ./not_there -------------------------------------------------------------------------------- /test/dir/c/d/f.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pragdave/dir_walker/d2df87320fdfdc4b3b0f5bbba63d9e794c13f649/test/dir/c/d/f.txt -------------------------------------------------------------------------------- /test/dir/goodlink: -------------------------------------------------------------------------------- 1 | ./a.txt -------------------------------------------------------------------------------- /test/dir_walker_test.exs: -------------------------------------------------------------------------------- 1 | defmodule DirWalkerTest do 2 | use ExUnit.Case 3 | 4 | 5 | test "basic traversal works" do 6 | test_files = ["test/dir/a.txt", "test/dir/b.txt", "test/dir/badlink", "test/dir/c/d/f.txt", "test/dir/goodlink"] 7 | {:ok, walker} = DirWalker.start_link("test/dir") 8 | files = DirWalker.next(walker, 99) 9 | assert length(files) == 5 10 | assert Enum.sort(files) == Enum.sort(test_files) 11 | end 12 | 13 | # Travis CI returns files in different order. 14 | test "traversal in chunks works" do 15 | test_files = ["test/dir/a.txt", "test/dir/b.txt", "test/dir/badlink", "test/dir/c/d/f.txt", "test/dir/goodlink"] 16 | {:ok, walker} = DirWalker.start_link("test/dir") 17 | 18 | found_files = for _path <- test_files do 19 | files = DirWalker.next(walker) 20 | assert length(files) == 1 21 | filename = Enum.at(files,0) 22 | assert Enum.member?(test_files,filename) 23 | filename 24 | end |> Enum.into([]) 25 | assert DirWalker.next(walker) == nil 26 | assert Enum.sort(found_files) == Enum.sort(test_files) 27 | end 28 | 29 | test "returns only matching names if requested" do 30 | test_files = [ "test/dir/a.txt","test/dir/badlink", "test/dir/c/d/f.txt" ] 31 | {:ok, walker} = DirWalker.start_link("test/dir", matching: ~r(a|f)) 32 | for _path <- test_files do 33 | files = DirWalker.next(walker) 34 | assert length(files) == 1 35 | filename = Enum.at(files,0) 36 | assert Enum.member?(test_files,filename) 37 | end 38 | assert DirWalker.next(walker) == nil 39 | end 40 | 41 | test "matching names works with different matching order " do 42 | test_files = [ "test/dir/b.txt","test/dir/badlink" ] 43 | {:ok, walker} = DirWalker.start_link("test/dir", matching: ~r(b)) 44 | found_files = for _path <- test_files do 45 | files = DirWalker.next(walker) 46 | assert length(files) == 1 47 | filename = Enum.at(files,0) 48 | assert Enum.member?(test_files,filename) 49 | filename 50 | end |> Enum.into([]) 51 | 52 | assert DirWalker.next(walker) == nil 53 | assert Enum.sort(found_files) == Enum.sort(test_files) 54 | end 55 | 56 | test "returns both matching names and stats if asked to " do 57 | test_types = [ :regular , :symlink ] 58 | test_files = [ "test/dir/a.txt","test/dir/badlink", "test/dir/c/d/f.txt" ] 59 | {:ok, walker} = DirWalker.start_link("test/dir", 60 | matching: ~r(a|f), 61 | include_stat: true) 62 | for _path <- test_files do 63 | [{filename, fstat}] = DirWalker.next(walker) 64 | assert Enum.member?(test_types,fstat.type) 65 | assert Enum.member?(test_files,filename) 66 | end 67 | assert DirWalker.next(walker) == nil 68 | end 69 | 70 | test "returns stat if asked to" do 71 | {:ok, walker} = DirWalker.start_link("test/dir/c", include_stat: true) 72 | files = DirWalker.next(walker, 99) 73 | assert length(files) == 1 74 | assert [ {"test/dir/c/d/f.txt", %File.Stat{}} ] = files 75 | end 76 | 77 | test "returns directory names if asked to" do 78 | test_files = ["test/dir/c/d/f.txt", "test/dir/c/d/e", "test/dir/c/d"] 79 | {:ok, walker} = DirWalker.start_link("test/dir/c/d", include_dir_names: true) 80 | files = DirWalker.next(walker, 99) 81 | assert length(files) == 3 82 | assert Enum.sort(test_files) == Enum.sort(files) 83 | end 84 | 85 | test "returns directory names and stats if asked to" do 86 | test_files = ["test/dir/c/d/f.txt", "test/dir/c/d/e", "test/dir/c/d"] 87 | {:ok, walker} = DirWalker.start_link("test/dir/c/d", 88 | include_stat: true, 89 | include_dir_names: true) 90 | files = DirWalker.next(walker, 99) 91 | assert length(files) == 3 92 | assert [{file1, s1 = %File.Stat{}}, 93 | {file2, s2 = %File.Stat{}}, 94 | {file3, s3 = %File.Stat{}}] = files 95 | found_files = [file1,file2,file3] 96 | found_stats = [s1,s2,s3] 97 | assert Enum.sort(found_files) == Enum.sort(test_files) 98 | should_be_dir = Enum.at(found_stats,Enum.find_index(found_files, fn(x) -> x == "test/dir/c/d/e" end)) 99 | should_be_file = Enum.at(found_stats,Enum.find_index(found_files, fn(x) -> x == "test/dir/c/d/f.txt" end)) 100 | assert should_be_file.type == :regular 101 | assert should_be_dir.type == :directory 102 | end 103 | 104 | test "returns symlink as file type with include_stat option" do 105 | {:ok, walker} = DirWalker.start_link("test/dirlink", 106 | include_stat: true) 107 | [{"test/dirlink", stat }] = DirWalker.next(walker) 108 | assert stat.type == :symlink 109 | end 110 | 111 | test "follows symlinks without include_stat option" do 112 | test_files = ["test/dirlink/a.txt", "test/dirlink/b.txt", "test/dirlink/badlink", "test/dirlink/c/d/f.txt", "test/dirlink/goodlink"] 113 | {:ok, walker} = DirWalker.start_link("test/dirlink") 114 | files = DirWalker.next(walker, 99) 115 | assert length(files) == 5 116 | assert Enum.sort(files) == Enum.sort(test_files) 117 | end 118 | 119 | test "stop method works" do 120 | {:ok, walker} = DirWalker.start_link("test/dir") 121 | assert DirWalker.stop(walker) == :ok 122 | refute Process.alive?(walker) 123 | end 124 | 125 | # Travis CI returns files in different order. 126 | test "stream method works" do 127 | test_files = ["test/dir/a.txt", "test/dir/b.txt", "test/dir/badlink","test/dir/c/d/f.txt", "test/dir/goodlink"] 128 | dirw = DirWalker.stream("test/dir") 129 | file = Enum.take(dirw,1) 130 | assert length(file) == 1 131 | filename = Enum.at(file,0) 132 | assert Enum.member?(test_files,filename) 133 | end 134 | 135 | test "stream method completes" do 136 | test_files = ["test/dir/a.txt", "test/dir/b.txt", "test/dir/badlink", "test/dir/c/d/f.txt", "test/dir/goodlink"] 137 | dirw = DirWalker.stream("test/dir") 138 | files = Enum.into(dirw,[]) 139 | assert Enum.sort(files) == Enum.sort(test_files) 140 | end 141 | 142 | test "stream method takes options" do 143 | paths = [ "test/dir/a.txt", "test/dir/c/d/f.txt", "test/dir/badlink" ] 144 | dirw = DirWalker.stream("test/dir", matching: ~r(a|f)) 145 | files = Enum.into(dirw,[]) 146 | assert Enum.sort(files) == Enum.sort(paths) 147 | end 148 | 149 | 150 | end 151 | -------------------------------------------------------------------------------- /test/dirlink: -------------------------------------------------------------------------------- 1 | ./dir -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | defmodule DirWalker.TestHelper do 2 | import ExUnit.Assertions 3 | 4 | def assert_list_equal(actual, expected) do 5 | import Enum, only: [ sort: 1 ] 6 | assert length(actual) == length(expected) 7 | assert sort(actual) == sort(expected) 8 | end 9 | end 10 | 11 | ExUnit.start() 12 | 13 | git_wont_track_empty_dir = "test/dir/c/d/e" 14 | 15 | File.mkdir(git_wont_track_empty_dir) --------------------------------------------------------------------------------