├── test_data ├── subdir │ └── file3 ├── file1 └── file2 ├── .travis.yml ├── .gitignore ├── test ├── test_helper.exs ├── elixgrep_test.exs └── find_plugin_test.exs ├── lib ├── parallel.ex └── elixgrep.ex ├── plugins ├── grep.exs └── find.exs ├── config └── config.exs ├── LICENSE ├── mix.exs └── README.md /test_data/subdir/file3: -------------------------------------------------------------------------------- 1 | This is file3 2 | -------------------------------------------------------------------------------- /test_data/file1: -------------------------------------------------------------------------------- 1 | This is file 1, Elixir Rules! 2 | -------------------------------------------------------------------------------- /test_data/file2: -------------------------------------------------------------------------------- 1 | This is file 2, Elixir Rules! 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: elixir 2 | elixir: 3 | - 1.3.4 4 | 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .eunit 2 | deps 3 | *.o 4 | *.beam 5 | *.plt 6 | erl_crash.dump 7 | *DS_Store* 8 | _build 9 | elixgrep 10 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | 3 | File.touch("test_data/file2") 4 | # Make sure file2 is older 5 | :timer.sleep(2000) -------------------------------------------------------------------------------- /lib/parallel.ex: -------------------------------------------------------------------------------- 1 | 2 | defmodule Parallel do 3 | def pmap(collection, fun) do 4 | me = self 5 | 6 | collection 7 | |> Enum.map(fn (elem) -> spawn_link fn -> (send me, { self, fun.(elem) }) end end) 8 | |> Enum.map(fn (pid) -> receive do { ^pid, result } -> result end end) 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /plugins/grep.exs: -------------------------------------------------------------------------------- 1 | defmodule EgPlugin.Grep do 2 | 3 | @moduledoc """ 4 | This is a test plugin that should work exactly the same as the default 5 | functions used in elixgrep. All plugins must send the :all_done_boss 6 | message after finalization. These functions aren't map/reduce in the 7 | classic functional sense, but in the Hadoop sense. 8 | """ 9 | 10 | def gr_reduce(options) do 11 | receive do 12 | { :item, path, results } -> 13 | results |> Enum.map(fn(str) -> IO.write("#{path}: #{str}") end ) 14 | gr_reduce(options) 15 | 16 | { :finalize } -> 17 | IO.puts("Signing off from grep plugin") 18 | send options.master_pid, { :all_done_boss } 19 | exit(:normal) 20 | 21 | end 22 | end 23 | 24 | def gr_map(options,path) do 25 | %{ search: string } = options 26 | match = Regex.compile!(string) 27 | File.stream!(path) 28 | |> Stream.filter(fn(line) -> Regex.match?(match,line) end ) 29 | |> Enum.map( fn(x) -> x end ) 30 | end 31 | 32 | end 33 | -------------------------------------------------------------------------------- /config/config.exs: -------------------------------------------------------------------------------- 1 | # This file is responsible for configuring your application 2 | # and its dependencies with the aid of the Mix.Config module. 3 | use Mix.Config 4 | 5 | # This configuration is loaded before any dependency and is restricted 6 | # to this project. If another project depends on this project, this 7 | # file won't be loaded nor affect the parent project. For this reason, 8 | # if you want to provide default values for your application for third- 9 | # party users, it should be done in your mix.exs file. 10 | 11 | # Sample configuration: 12 | # 13 | # config :logger, 14 | # level: :info, 15 | # format: "$time $metadata[$level] $levelpad$message\n" 16 | 17 | # It is also possible to import configuration files, relative to this 18 | # directory. For example, you can emulate configuration per environment 19 | # by uncommenting the line below and defining dev.exs, test.exs and such. 20 | # Configuration from the imported file will override the ones defined 21 | # here (which is why it is important to import them last). 22 | # 23 | # import_config "#{Mix.env}.exs" 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 bbense 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule Elixgrep.Mixfile do 2 | use Mix.Project 3 | 4 | def project do 5 | [app: :elixgrep, 6 | version: "0.3.1", 7 | elixir: "> 1.0.0", 8 | name: "elixgrep", 9 | source_url: "https://github.com/bbense/elixgrep", 10 | homepage_url: "https://github.com/bbense/elixgrep/wiki", 11 | escript: escript(), 12 | description: description(), 13 | package: package(), 14 | deps: deps()] 15 | end 16 | 17 | 18 | def escript do 19 | [main_module: Elixgrep] 20 | end 21 | # Configuration for the OTP application 22 | # 23 | # Type `mix help compile.app` for more information 24 | def application do 25 | [applications: [:logger ]] 26 | end 27 | 28 | # Dependencies can be Hex packages: 29 | # 30 | # {:mydep, "~> 0.3.0"} 31 | # 32 | # Or git/path repositories: 33 | # 34 | # {:mydep, git: "https://github.com/elixir-lang/mydep.git", tag: "0.1.0"} 35 | # 36 | # Type `mix help deps` for more examples and options 37 | 38 | def deps do 39 | [{:earmark, "~> 1.0", only: :dev}, 40 | {:dir_walker, git: "https://github.com/bbense/dir_walker.git" }, 41 | {:pluginator, git: "https://github.com/bbense/pluginator.git" }, 42 | {:ex_doc, "~> 0.14", only: :dev}] 43 | end 44 | 45 | defp description do 46 | """ 47 | A framework for doing Hadoop style map/reduce on lists of files/directories. 48 | The initial list of plugins implements concurrent versions of the unix find 49 | and grep utilities. 50 | """ 51 | end 52 | 53 | defp package do 54 | [# These are the default files included in the package 55 | files: ["lib", "plugin", "mix.exs", "README*", "readme*", "LICENSE*"], 56 | contributors: ["Booker Bense "], 57 | licenses: ["MIT"], 58 | links: %{"GitHub" => "https://github.com/bbense/elixgrep", 59 | "Docs" => "http://bbense.github.io/elixgrep/"}] 60 | end 61 | end 62 | -------------------------------------------------------------------------------- /test/elixgrep_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ElixgrepTest do 2 | use ExUnit.Case , async: true 3 | #import ExUnit.CaptureIO 4 | 5 | # Does weird things. Seems to swallow any output errors. 6 | # test "-h returns module_doc" do 7 | # assert capture_io( fn -> 8 | # Elixgrep.main(["-h"]) 9 | # end 10 | # ) == "Usage:" 11 | # end 12 | 13 | test "count is set to default w/o -c" do 14 | {opts,args} = Elixgrep.parse_args(["fred", "/tmp/bar", "/tmp/foo"]) 15 | assert opts.count == 8 * :erlang.system_info(:logical_processors) 16 | assert args == ["fred","/tmp/bar","/tmp/foo"] 17 | end 18 | 19 | test "count is set with -c" do 20 | {opts,args} = Elixgrep.parse_args(["--count","10000","fred", "/tmp/bar", "/tmp/foo"]) 21 | assert opts.count == 10000 22 | assert args == ["fred","/tmp/bar","/tmp/foo"] 23 | end 24 | 25 | test "Custom args are parsed correctly" do 26 | {opts,args} = Elixgrep.parse_args(["-a","atime","fred", "/tmp/bar", "/tmp/foo"]) 27 | assert opts.a == "atime" 28 | assert args == ["fred","/tmp/bar","/tmp/foo"] 29 | end 30 | 31 | test "build_paths returns correct values" do 32 | {options, filestream } = Elixgrep.build_paths({%{:count => 1000},["fred","./test_data"]}) 33 | assert options.count == 1000 34 | assert options.search == "fred" 35 | files = Enum.to_list(filestream) 36 | tfiles = ["./test_data/file1","./test_data/file2","./test_data/subdir/file3"] 37 | assert Enum.sort(files) == Enum.sort(tfiles) 38 | end 39 | 40 | test "rehabilitate_args returns an option map" do 41 | bad_args = [{"-p", "people"}, {"-v", nil},{"--type","bad"}] 42 | good_opts = [p: "people", v: true, type: "bad"] 43 | assert good_opts == Elixgrep.rehabilitate_args(bad_args) 44 | end 45 | 46 | test "Eligrep.gr_map works" do 47 | opts = %{ search: "Elix"} 48 | path = "README.md" 49 | assert Elixgrep.gr_map(opts,path) == ["Elixgrep\n"] 50 | end 51 | 52 | # Tried 80 different ways to get this test to actually run. 53 | # Clearly there is something here I don't understand. 54 | # test "Elixgrep.gr_reduce prints strings" do 55 | # pid = spawn_link(fn ->Elixgrep.gr_reduce([]) end) 56 | # assert capture_io( 57 | # send pid, { :item ,"README.md",["Elixgrep\n"] } 58 | # ) == "README.md: Elixgrep\n" 59 | # end 60 | 61 | end 62 | -------------------------------------------------------------------------------- /test/find_plugin_test.exs: -------------------------------------------------------------------------------- 1 | defmodule FindPluginTest do 2 | use ExUnit.Case 3 | 4 | setup_all do 5 | {:ok, test_plugin } = Pluginator.load_with_signature("find",[gr_map: 2],["./plugins"]) 6 | {:ok, test_plugin: test_plugin } 7 | end 8 | 9 | setup do 10 | File.touch("test_data/file1") 11 | end 12 | 13 | # Stolen from Dave Thomas's blog. 14 | # for %{ md: md, html: html } <- StmdTest.Reader.tests do 15 | # @md Enum.join(Enum.reverse(md)) 16 | # @html Enum.join(Enum.reverse(html)) 17 | # test "\n--- === ---\n" <> @md <> "--- === ---\n" do 18 | # result = Earmark.to_html(@md) 19 | # assert result == @html 20 | # end 21 | # end 22 | 23 | # This should really be two nested enums on ["mtime","ctime","atime"] 24 | # and ["newer","older","around"] 25 | test "EgPlugin.Find.gr_map newer mtime fails when target older", context do 26 | options = %{ search: "newer" , mtime: "./test_data/file1" } 27 | path = "./test_data/file2" 28 | assert [] == context[:test_plugin].gr_map(options,path) 29 | end 30 | 31 | test "EgPlugin.Find.gr_map newer mtime succedes when target newer", context do 32 | options = %{ search: "newer" , mtime: "./test_data/file2" } 33 | path = "./test_data/file1" 34 | assert ["newer"] == context[:test_plugin].gr_map(options,path) 35 | end 36 | 37 | test "EgPlugin.Find.gr_map older mtime succedes when target older", context do 38 | options = %{ search: "older" , mtime: "./test_data/file1" } 39 | path = "./test_data/file2" 40 | assert ["older"] == context[:test_plugin].gr_map(options,path) 41 | end 42 | 43 | test "EgPlugin.Find.gr_map older time fails when target older", context do 44 | options = %{ search: "older" , mtime: "./test_data/file2" } 45 | path = "./test_data/file1" 46 | assert [] == context[:test_plugin].gr_map(options,path) 47 | end 48 | 49 | test "EgPlugin.Find.gr_map around mtime succedes ", context do 50 | options = %{ search: "around" , mtime: "./test_data/file1" } 51 | path = "./test_data/file2" 52 | assert ["around"] == context[:test_plugin].gr_map(options,path) 53 | end 54 | 55 | test "EgPlugin.Find.gr_map can search names via regexp", context do 56 | options = %{ search: "file.*" } 57 | path = "./test_data/file2" 58 | assert ["file.*"] == context[:test_plugin].gr_map(options,path) 59 | end 60 | 61 | 62 | 63 | end 64 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Elixgrep 2 | ======== 3 | 4 | ![alt text](https://api.travis-ci.org/bbense/elixgrep.png "Travis CI build status") 5 | 6 | A project to learn elixir by building a grep that will cause the fan on my laptop to 7 | turn on when it runs. It has since expanded to a general purpose program for running 8 | a function on many files and returning the results. 9 | 10 | Currently supports a simple string search in a list of files and directories. It will 11 | expand to search all regular files in the subtree of any directory you give it. It 12 | also has the ablity to load elixir code plugins to implement a general map/reduce 13 | (in the Hadoop sense). 14 | 15 | 16 | 17 | Usage 18 | ======= 19 | ``` 20 | Usage: 21 | exilgrep [string] [files and/or directories] 22 | 23 | Options: 24 | -h, [--help] # Show this help message and quit. 25 | -c, [--count] filecount # Number of files to search in parallel. 26 | # Default is 512. 27 | -p, [--plugin] elixir_code # Basename of a plugin to implement 28 | # other functions. 29 | 30 | Description: 31 | Prints all the lines in file containing string ( default) 32 | 33 | Runs a version of map/reduce on the file list given on the command 34 | line. Requires an elixir module that implements two functions. 35 | 36 | gr_map(options,path) -> {path,[]} 37 | gr_reduce(options) Expects to recieve two kinds of messages 38 | { item: { path, []}} 39 | { finalize: } -> Should output results and exit. 40 | ``` 41 | 42 | Build 43 | ===== 44 | 45 | Run `mix escript.build` to create the elixgrep executable. 46 | 47 | WARNING 48 | ======== 49 | 50 | This program can easily drive the load on your machine to the number of availiable cores 51 | if you point it at a large enough set of files. If you do decide to use it on a production 52 | server, use the count option to limit it's use of resources. 53 | 54 | Plugins 55 | ======== 56 | 57 | There are currently two plugins available: 58 | 59 | grep 60 | ---- 61 | elixgrep -p grep [PCRE regexp] [files/directories] 62 | 63 | This plugin will list out all the files and lines that 64 | match the regex, it only examines files one line per time 65 | so it won't match multiline regex. 66 | 67 | find 68 | ---- 69 | 70 | This plugin will search for files based on either their basename or 71 | File.stat output. 72 | 73 | elixgrep -p find [regex] [files/directories] 74 | 75 | Without any attribute arguements, the plugin uses the given regex 76 | to match against the basename of the files. 77 | 78 | elixgrep -p find --mtime target_file [verb] [files/directories] 79 | 80 | You can use the keywords of the File.Stat structure as command line 81 | arguments and it will search on that attribute. Currently it only 82 | supports the time values `--atime, --ctime, --mtime` 83 | 84 | The verbs you can use with the time stat options are: 85 | 86 | - `newer` Find files newer than the target file. 87 | - `older` Find files older than the target file. 88 | - `around` Find files that are within `--delta seconds` of the target file. The default delta is 24 hours. 89 | 90 | 91 | 92 | To Do 93 | ===== 94 | 95 | Add other attributes/verbs to find plugin, Figure out how to use a 96 | date string instead of target file in attributes. Add 1h30m type 97 | parsing to `--delta` option.ß 98 | 99 | Expand plugins to implement a tripwire, access monitoring, etc.. 100 | 101 | Work on creating a plugin path discovery mechanism. 102 | 103 | Figure out how plugins can implement plugin specific help. -------------------------------------------------------------------------------- /plugins/find.exs: -------------------------------------------------------------------------------- 1 | defmodule EgPlugin.Find do 2 | 3 | @moduledoc """ 4 | This is a plugin to duplicate some of the functionality of the find command. 5 | It also demonstrates how a plugin can use the wildcard command line options. 6 | The syntax is as follows 7 | 8 | --plugin find --atime file_or_parsable_time_string [verb] [paths or files] 9 | 10 | For time based attributes the verbs are older, newer, around 11 | The default range for around is 24hrs, use `--delta seconds` to change it. 12 | For now it just supports the time based parameters in File.Stat 13 | [ atime, ctime, mtime ] 14 | 15 | Without any additional arguments it assumes the verb is a string regexp 16 | and matchs the basename against the regexp. 17 | 18 | * ToDo: Figure out how to have -h in the main program call help in the plugin. 19 | * ToDo: Add parsable time string parsing. 20 | * ToDo: Add find style delta parsing 21 | 22 | Possible time units are as follows: 23 | 24 | s second 25 | m minute (60 seconds) 26 | h hour (60 minutes) 27 | d day (24 hours) 28 | w week (7 days) 29 | 30 | Any number of units may be combined in one `--delta` argument, for example, `--delta 1h30m`. 31 | """ 32 | 33 | @default_delta 86400 34 | 35 | @doc """ 36 | The gr_reduce function collects the results of applying gr_map to each file. 37 | The results parameter in the item bracket is exactly the output from the gr_map function on the 38 | given file path. 39 | """ 40 | def gr_reduce(options) do 41 | receive do 42 | { :item, path, results } -> 43 | results |> Enum.map(fn(str) -> IO.puts("#{path}: #{str}") end ) 44 | gr_reduce(options) 45 | 46 | { :finalize } -> 47 | IO.puts("Signing off from find plugin") 48 | send options.master_pid, { :all_done_boss } 49 | exit(:normal) 50 | 51 | end 52 | end 53 | 54 | @doc """ 55 | elixgrep runs this function in parallel on every file in it's expanded 56 | pathlist. The search string is always the first string in the cmd line that is 57 | not an arg. 58 | """ 59 | def gr_map(options,path) do 60 | %{ search: string } = options 61 | 62 | case string do 63 | "older" -> compare_time(fn(a,b) -> a > b end,options,path) 64 | "newer" -> compare_time(fn(a,b) -> a < b end,options,path) 65 | "around" -> compare_time(fn(a,b) -> around(options,a,b) end,options,path) 66 | _ -> match_name(options,path) 67 | end 68 | |> if( do: [string], else: [] ) 69 | end 70 | 71 | def match_name(options,path) do 72 | %{ search: string } = options 73 | re = Regex.compile!(string) 74 | Regex.match?(re,Path.basename(path)) 75 | end 76 | 77 | def compare_time(judge,options,path) do 78 | case options do 79 | %{ atime: arg } -> judge.(file_time(arg,:atime),file_time(path,:atime)) 80 | %{ ctime: arg } -> judge.(file_time(arg,:ctime),file_time(path,:ctime)) 81 | %{ mtime: arg } -> judge.(file_time(arg,:mtime),file_time(path,:mtime)) 82 | _ -> raise "compare_time called w/o time choice in #{inspect options}" 83 | end 84 | 85 | end 86 | 87 | def around(options,a,b) do 88 | value = 89 | case options do 90 | %{ delta: interval } -> String.to_integer(interval) 91 | _ -> @default_delta 92 | end 93 | if(abs(a - b) < value , do: true , else: false ) 94 | end 95 | 96 | def file_time(path,time_value) do 97 | File.stat!(path,[time: :posix]) 98 | |> Map.get(time_value) 99 | end 100 | 101 | 102 | # iex(48)st = %File.Stat{access: :read_write, atime: {{2014, 9, 17}, {13, 50, 51}}, 103 | # ctime: {{2014, 9, 16}, {13, 41, 41}}, gid: 501, inode: 44370747, links: 17, 104 | # major_device: 16777218, minor_device: 0, mode: 16877, 105 | # mtime: {{2014, 9, 16}, {13, 41, 41}}, size: 578, type: :directory, uid: 501} 106 | # iex(49)> st.access 107 | # :read_write 108 | # iex(50)> st.atime 109 | # {{2014, 9, 17}, {13, 50, 51}} 110 | 111 | end -------------------------------------------------------------------------------- /lib/elixgrep.ex: -------------------------------------------------------------------------------- 1 | defmodule Elixgrep do 2 | 3 | @moduledoc """ 4 | Usage: 5 | elixgrep [string] [files and/or directories] 6 | 7 | Options: 8 | -h, [--help] # Show this help message and quit. 9 | -c, [--count] filecount # Number of files to process in parallel 10 | 11 | Description: 12 | 13 | Runs a version of map/reduce on the file list given on the command 14 | line. Requires an elixir module that implements two functions. 15 | 16 | gr_map(options,path) -> {path,[]} 17 | gr_reduce(options) Expects to recieve two kinds of messages 18 | { item: { path, []}} 19 | { finalize: } -> Should output results and exit. 20 | """ 21 | 22 | @ofiles_per_core 8 23 | @plugin_path ["./plugins"] 24 | @plugin_funcs [gr_reduce: 1, gr_map: 2] 25 | 26 | def processing_units, do: :erlang.system_info(:logical_processors) 27 | 28 | require DirWalker 29 | 30 | def gr_reduce(options) do 31 | receive do 32 | { :item, path, results } -> 33 | results |> Enum.map(fn(str) -> IO.write("#{path}: #{str}") end ) 34 | gr_reduce(options) 35 | 36 | { :finalize } -> 37 | send options.master_pid, { :all_done_boss } 38 | exit(:normal) 39 | 40 | end 41 | end 42 | 43 | def gr_map(options,path) do 44 | %{ search: string } = options 45 | 46 | File.stream!(path) 47 | |> Stream.filter(fn(line) -> String.contains?(line,string) end ) 48 | |> Enum.map( fn(x) -> x end ) 49 | end 50 | 51 | def start_reduce(options) do 52 | spawn_link(fn -> options.reduce_func.(options) end) 53 | end 54 | 55 | def main(args) do 56 | args |> parse_args |> build_paths |> background |> process |> cleanup 57 | end 58 | 59 | def merge_opts(opts,bad_opts) do 60 | bad_opts |> rehabilitate_args |> Keyword.merge(opts) 61 | end 62 | 63 | def parse_args(args) do 64 | options = %{ :count => @ofiles_per_core * processing_units , 65 | :map_func => fn(opt,path) -> gr_map(opt,path) end , 66 | :reduce_func => fn(opt) -> gr_reduce(opt) end } 67 | 68 | cmd_opts = OptionParser.parse(args, 69 | switches: [help: :boolean , count: :integer, plugin: :string], 70 | aliases: [h: :help, c: :count, p: :plugin]) 71 | 72 | case cmd_opts do 73 | { [ help: true], _, _} -> :help 74 | { [], args, [] } -> { options, args } 75 | { opts, args, [] } -> { Enum.into(opts,options), args } 76 | { opts, args, bad_opts} -> { Enum.into(merge_opts(opts,bad_opts),options), args} 77 | _ -> :help 78 | end 79 | end 80 | 81 | def rehabilitate_args(bad_args) do 82 | bad_args 83 | |> Enum.flat_map(fn(x) -> Tuple.to_list(x) end) 84 | |> Enum.filter_map(fn(str) -> str end, fn(str) -> String.replace(str, ~r/^\-([^-]+)/, "--\\1") end ) 85 | |> OptionParser.parse 86 | |> Tuple.to_list 87 | |> List.first 88 | end 89 | 90 | # Avoid expanding the stream. 91 | def build_paths({options,[head | tail]}) do 92 | next_opts = options |> Map.put(:search,head) 93 | { next_opts, DirWalker.stream(tail) } 94 | end 95 | 96 | def build_paths(:help) do 97 | IO.puts @moduledoc 98 | System.halt(0) 99 | end 100 | 101 | def background({options, filestream}) do 102 | next_opt = 103 | case Map.has_key?(options, :plugin) do 104 | true -> load_plugin(options) 105 | _ -> options 106 | end 107 | reduce_opts = Map.put(next_opt, :master_pid, self()) 108 | pid = start_reduce(reduce_opts) 109 | { Map.put(next_opt, :reduce_pid, pid), filestream } 110 | end 111 | 112 | def process({options,[path]}) do 113 | send options.reduce_pid, { :item,path,options.map_func.(options,path) } 114 | options 115 | end 116 | 117 | def process({options,filestream}) do 118 | filestream 119 | |> Stream.chunk(options.count,options.count,[]) 120 | |> Enum.map(fn(filelist) -> Parallel.pmap(filelist, fn(path) -> process({options,[path]}) end ) end ) 121 | 122 | send options.reduce_pid, { :finalize } 123 | end 124 | 125 | def cleanup({ :finalize }) do 126 | receive do 127 | { :all_done_boss } -> 128 | System.halt(0) 129 | end 130 | end 131 | 132 | # We really need the options in this version. 133 | def cleanup(options) do 134 | send options.reduce_pid, { :finalize } 135 | receive do 136 | { :all_done_boss } -> 137 | System.halt(0) 138 | end 139 | end 140 | 141 | # Will fail with match error, which is sub-optimal 142 | def load_plugin(options) do 143 | {:ok, plugin } = Pluginator.load_with_signature(options.plugin,@plugin_funcs,@plugin_path) 144 | new_opts = %{ 145 | :map_func => fn(opt,path) -> plugin.gr_map(opt,path) end , 146 | :reduce_func => fn(opt) -> plugin.gr_reduce(opt) end } 147 | Map.merge(options,new_opts) 148 | end 149 | 150 | end 151 | --------------------------------------------------------------------------------