├── .gitignore
├── Makefile
├── README.md
├── TODO.txt
├── basis.dlisp
├── basis.erl
├── docs
    ├── Makefile
    ├── endnotes.sty
    ├── project-final-report.pdf
    ├── project-final-report.synctex.gz
    ├── project-final-report.tex
    ├── regression_results
    ├── regression_results.txt
    ├── results.csv
    └── usenix.sty
├── eval.erl
├── master.erl
├── mkparser.erl
├── parser.yrl
├── proposal.md
├── proposal.pdf
├── reader.erl
├── repl.erl
├── replmaster.sh
├── replworker.sh
├── scanner.xrl
├── stealingworker.erl
└── thread_pool.erl


/.gitignore:
--------------------------------------------------------------------------------
 1 | .eunit
 2 | deps
 3 | *.o
 4 | *.beam
 5 | *.plt
 6 | erl_crash.dump
 7 | ebin/*.beam
 8 | rel/example_project
 9 | .concrete/DEV_MODE
10 | .rebar
11 | parser.erl
12 | scanner.erl
13 | 
14 | # LaTeX artifacts
15 | *.aux
16 | *.log
17 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CC=erlc
 2 | SOURCES=basis.erl eval.erl parser.erl reader.erl repl.erl thread_pool.erl \
 3 | master.erl stealingworker.erl
 4 | OBJECTS=$(SOURCES:.erl=.beam)
 5 | 
 6 | all: $(OBJECTS) parser
 7 | 
 8 | %.beam: %.erl
 9 | 	$(CC) $<
10 | 
11 | parser: scanner.xrl parser.yrl
12 | 	./mkparser.erl
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DLisp
 2 | 
 3 | ## Writeup
 4 | 
 5 | [docs/project-final-report.pdf](docs/project-final-report.pdf)
 6 | 
 7 | ## How to run
 8 | 
 9 | On computers all on the same network with different IPs, run:
10 | 
11 | Master: `./replmaster.sh roundrobin|timed|memroundrobin`
12 | 
13 | Worker(s): `./replworker.sh masterIP percentSlow stealing|solitary`
14 | 
15 | Then you can enter expressions like `time(dmap(fac, range(0, 1000)));;` in the
16 | Master's REPL.
17 | 
18 | ## Authors
19 | 
20 | Maxwell Bernstein and Matthew Yaspan
21 | 


--------------------------------------------------------------------------------
/TODO.txt:
--------------------------------------------------------------------------------
1 | * Add simple type check (sym/int/list, etc)
2 | * Add full type check
3 | * Fix issue where 1\n2 == 12
4 | 


--------------------------------------------------------------------------------
/basis.dlisp:
--------------------------------------------------------------------------------
 1 | fun null?(xs) = xs == nil;
 2 | 
 3 | check_expect(null?(nil), #t);
 4 | check_expect(null?('[1 2 3]), #f);
 5 | 
 6 | fun bingte(a,b) = not(a < b);
 7 | fun binlte(a,b) = not(a > b);
 8 | 
 9 | fun max(a,b) = if a > b then a else b;
10 | fun min(a,b) = if a < b then a else b;
11 | 
12 | fun foldl(f, acc, xs) =
13 |   if null?(xs)
14 |   then acc
15 |   else foldl(f, f(car(xs), acc), cdr(xs));
16 | 
17 | fun cadr(xs) = [car [cdr xs]];
18 | 
19 | fun vartimes(...) = foldl(bintimes, 1, ...);
20 | fun fac(n) = apply(vartimes, range(1, n+1));
21 | 
22 | fun range(start, fin) =
23 |   if (start > fin) or (start == fin)
24 |   then nil
25 |   else cons(start, range(start+1, fin));
26 | 
27 | fun filter(p?, xs) =
28 |   if null?(xs)
29 |   then nil
30 |   else
31 |     if p?(car(xs))
32 |     then cons(car(xs), filter(p?, cdr(xs)))
33 |     else filter(p?, cdr(xs));
34 | 
35 | fun length(xs) =
36 |   if null?(xs) then 0 else 1+length(cdr(xs));
37 | 
38 | fun take(n, xs) =
39 |   if n<1 then nil else cons(car(xs), take(n-1, cdr(xs)));
40 | 
41 | fun drop(n, xs) =
42 |   if n<1 then xs else drop(n-1, cdr(xs));
43 | 
44 | fun merge(xs, ys) =
45 |   if null?(xs)
46 |   then ys
47 |   else if null?(ys)
48 |   then xs
49 |   else
50 |     if car(xs) < car(ys)
51 |     then cons(car(xs), merge(cdr(xs), ys))
52 |     else cons(car(ys), merge(xs, cdr(ys)));
53 | 
54 | fun mergesort(xs) =
55 |   if null?(xs) or null?(cdr(xs))
56 |   then xs
57 |   else
58 |     let* val size = length(xs),
59 |          val half = size/2,
60 |          val fsthalf = take(half, xs),
61 |          val sndhalf = drop(half, xs)
62 |     in
63 |       merge(mergesort(fsthalf), mergesort(sndhalf))
64 |     end;
65 | 
66 | fun quit() = 'quit;
67 | fun q() = quit();
68 | 
69 | fun o(f,g) = fn(x) = f(g(x));
70 | 
71 | fun currytwo(f) = fn(x) = fn(y) = f(x,y);
72 | 
73 | fun reverse(xs) =
74 |     let fun revapp(ys,acc) =
75 |                if null?(ys)
76 |                then acc
77 |                else revapp(cdr(ys), cons(car(ys), acc))
78 |     in
79 |          revapp(xs, nil)
80 |     end;
81 | 
82 | fun list(...) = reverse(foldl(cons, nil, ...));;
83 | 


--------------------------------------------------------------------------------
/basis.erl:
--------------------------------------------------------------------------------
  1 | -module(basis).
  2 | -export([basis/0]).
  3 | -export([binop/2, intdiv/2, map_proc/2, pmap_proc/2, dmap_proc/2, exp_proc/2,
  4 |          not_proc/2, and_proc/2, or_proc/2, cons_proc/2, car_proc/2,
  5 |          cdr_proc/2, worker_proc/2, check_expect/2, save_state/2, load_state/2,
  6 |          print_proc/2, compile_proc/2, env_proc/2, class_proc/2, time_proc/2,
  7 |          remove_prims/1, bif/1, parallel_map/2, timerstart/0, timerend/0]).
  8 | 
  9 | 
 10 | basis() ->
 11 |     Defs = reader:read_program(file, "basis.dlisp"),
 12 |     lists:foldl(fun (Cur, Env) ->
 13 |                         {_, NewEnv} = eval:evalexp(Cur, Env),
 14 |                         NewEnv
 15 |                 end, [
 16 |                       {'binplus', basis:bif(basis:binop(fun erlang:'+'/2, int))},
 17 |                       {'binminus', basis:bif(basis:binop(fun erlang:'-'/2, int))},
 18 |                       {'bintimes', basis:bif(basis:binop(fun erlang:'*'/2, int))},
 19 |                       {'bindiv', basis:bif(basis:binop(fun basis:intdiv/2, int))},
 20 |                       {'bineq', basis:bif(basis:binop(fun erlang:'=:='/2, bool))},
 21 |                       {'map', basis:bif(fun basis:map_proc/2)},
 22 |                       {'pmap', basis:bif(fun basis:pmap_proc/2)},
 23 |                       {'dmap', basis:bif(fun basis:dmap_proc/2)},
 24 |                       {'exp', basis:bif(fun basis:exp_proc/2)},
 25 |                       {'binlt', basis:bif(basis:binop(fun erlang:'<'/2, bool))},
 26 |                       {'bingt', basis:bif(basis:binop(fun erlang:'>'/2, bool))},
 27 |                       {'not', basis:bif(fun basis:not_proc/2)},
 28 |                       {'and', {prim, fun basis:and_proc/2}},
 29 |                       {'or', {prim, fun basis:or_proc/2}},
 30 |                       {'cons', basis:bif(fun basis:cons_proc/2)},
 31 |                       {'car', basis:bif(fun basis:car_proc/2)},
 32 |                       {'cdr', basis:bif(fun basis:cdr_proc/2)},
 33 |                       {'worker', basis:bif(fun basis:worker_proc/2)},
 34 |                       {'check_expect', {prim, fun basis:check_expect/2}},
 35 |                       {'save_state', basis:bif(fun basis:save_state/2)},
 36 |                       {'load_state', basis:bif(fun basis:load_state/2)},
 37 |                       {'print', basis:bif(fun basis:print_proc/2)},
 38 |                       {'c', basis:bif(fun basis:compile_proc/2)},
 39 |                       {'env', basis:bif(fun basis:env_proc/2)},
 40 |                       {'class', {prim, fun basis:class_proc/2}},
 41 |                       {'workers', {list, [{sym, node()}]}},
 42 |                       {'time', {prim, fun basis:time_proc/2}},
 43 |                       {'ok', {sym, ok}}
 44 |                      ],
 45 |                 Defs).
 46 | 
 47 | 
 48 | binop(F, RT) ->
 49 |     fun ([{_, AV}, {_, BV}], Env) ->
 50 |             {{RT, F(AV, BV)}, Env}
 51 |     end.
 52 | 
 53 | 
 54 | bif(F) ->
 55 |     {prim, fun (Args, Env) ->
 56 |             Vals = lists:map(fun (Exp) ->
 57 |                                      {V, _} = eval:evalexp(Exp, Env),
 58 |                                      V
 59 |                              end, Args),
 60 |             F(Vals, Env)
 61 |     end}.
 62 | 
 63 | 
 64 | intdiv(A, B) -> trunc(A/B).
 65 | 
 66 | 
 67 | map_proc([Fn, {list, Ls}], Env) ->
 68 |     FnApplications = lists:map(fun (Exp) ->
 69 |                                        {list, [Fn, Exp]}
 70 |                                end, Ls),
 71 |     Results = lists:map(fun(E) -> {V, _} = eval:evalexp(E, Env), V end,
 72 |                         FnApplications),
 73 |     {{list, Results}, Env}.
 74 | 
 75 | 
 76 | newrandom() ->
 77 |     rand:uniform(1000000).
 78 | 
 79 | 
 80 | parallel_map(Fun, List) ->
 81 |     Id = newrandom(),
 82 |     Last = lists:foldl(fun(Value, Parent) ->
 83 |           spawn(fun() ->
 84 |               MappedValue = Fun(Value),
 85 |               receive
 86 |                   {pmap, AnId, Rest} -> Parent ! {pmap, AnId, [MappedValue|Rest]}
 87 |               end
 88 |           end)
 89 |     end, self(), List),
 90 |     Last ! {pmap, Id, []},
 91 |     receive
 92 |         {pmap, _AnId, Result} -> Result
 93 |     end.
 94 | 
 95 | 
 96 | pmap_proc([Fn, {list, Ls}], Env) ->
 97 |     FnApplications = lists:map(fun (Exp) ->
 98 |                                        {list, [Fn, Exp]}
 99 |                                end, Ls),
100 |     Results = parallel_map(fun(E) -> {V, _} = eval:evalexp(E, Env), V end,
101 |                            FnApplications),
102 |     {{list, Results}, Env}.
103 | 
104 | 
105 | dmap_proc([Fn, {list, Ls}], Env) ->
106 |     FnApplications = lists:map(fun (Exp) -> {list, [Fn, Exp]} end, Ls),
107 |     Envs = lists:duplicate(length(Ls), Env),
108 |     WorkPackets = eval:tuplezip(FnApplications, Envs),
109 |     {sym, DelegatorAtom} = eval:lookup('__delegator', Env),
110 |     Delegator = list_to_pid(atom_to_list(DelegatorAtom)),
111 |     Self = self(),
112 |     Delegator ! {delegate, WorkPackets, Self},
113 |     receive
114 |         {results, Self, Results} -> {{list, Results}, Env}
115 |     end.
116 | 
117 | 
118 | exp_proc([{int, AV}, {int, BV}], Env) -> {{int, round(math:pow(AV, BV))}, Env}.
119 | 
120 | 
121 | not_proc([{bool, AV}], Env) -> {{bool, not AV}, Env}.
122 | 
123 | 
124 | and_proc([A, B], Env) -> {{bool, AV}, _} = eval:evalexp(A, Env),
125 |                          if
126 |                              AV == false -> {{bool, false}, Env};
127 |                              true        -> eval:evalexp(B, Env)
128 |                          end.
129 | 
130 | 
131 | or_proc([A, B], Env) -> {{bool, AV}, _} = eval:evalexp(A, Env),
132 |                         if
133 |                             AV == true -> {{bool, true}, Env};
134 |                             true       -> eval:evalexp(B, Env)
135 |                         end.
136 | 
137 | 
138 | cons_proc([X, {list, XSV}], Env) -> {{list, [X|XSV]}, Env}.
139 | 
140 | 
141 | car_proc([{list, [H|_T]}], Env) -> {H, Env}.
142 | 
143 | 
144 | cdr_proc([{list, [_H|T]}], Env) -> {{list, T}, Env}.
145 | 
146 | 
147 | worker_proc([NodeName], Env) ->
148 |     {list, Workers} = eval:lookup(workers, Env),
149 |     NewWorkers = {list, [NodeName|Workers]},
150 |     {NewWorkers, eval:bind(workers, NewWorkers, Env)}.
151 | 
152 | 
153 | check_expect([A, B], Env) ->
154 |     {{bool, AreEq}, _} = eval:evalexp({list, [{sym, 'bineq'}, A, B]}, Env),
155 |     if
156 |         AreEq -> %%io:format("check-expect passed~n", []),
157 |             {{bool, true}, Env};
158 |         true  -> io:format("check-expect failed~n", []),
159 |                  {{bool, false}, Env}
160 |     end.
161 | 
162 | 
163 | remove_prims({closure, Formals, Body, CapturedEnv}) ->
164 |     {closure, Formals, basis:remove_prims(Body), basis:remove_prims(CapturedEnv)};
165 | 
166 | remove_prims([]) -> [];
167 | remove_prims([{N, {closure, F, B, C}}|T]) ->
168 |     [{N, basis:remove_prims({closure, F, B, C})}|basis:remove_prims(T)];
169 | remove_prims([{_N, {prim, _}}|T]) -> basis:remove_prims(T);
170 | remove_prims([H|T]) -> [basis:remove_prims(H)|basis:remove_prims(T)];
171 | 
172 | remove_prims(O) ->
173 |     O.
174 | 
175 | 
176 | save_state([{sym, FileName}], Env) ->
177 |     EnvNoPrims = basis:remove_prims(Env),
178 |     ok = file:write_file(atom_to_list(FileName),
179 |                          io_lib:fwrite("~p.\n", [EnvNoPrims])),
180 |     {{bool, true}, Env}.
181 | 
182 | 
183 | load_state([{sym, FileName}], Env) ->
184 |     {ok, [NewEnv]} = file:consult(atom_to_list(FileName)),
185 |     {{bool, true}, eval:extend(NewEnv, Env, slim)}.
186 | 
187 | 
188 | print_proc([Val], Env) -> eval:printexp(Val),
189 |                           io:format("~n"),
190 |                           {{sym, ok}, Env}.
191 | 
192 | 
193 | compile_proc([{sym, Name}], _Env) ->
194 |     code:purge(Name),
195 |     {ok, Name} = compile:file(Name),
196 |     code:load_file(Name),
197 |     throw(code_reload).
198 | 
199 | 
200 | env_proc([], Env) ->
201 |     EnvNoPrims = basis:remove_prims(Env),
202 |     EnvWithSyms = lists:map(fun ({Name, V}) ->
203 |                                     {list, [{sym, Name}, V]}
204 |                             end, EnvNoPrims),
205 |     {{list, EnvWithSyms}, Env}.
206 | 
207 | 
208 | class_proc([Name, Parent], Env) ->
209 |     {{list, [Name, Parent]}, Env}.
210 | 
211 | timerstart() ->
212 |     statistics(runtime),
213 |     statistics(wall_clock).
214 | 
215 | timerend() ->
216 |     {_, Time1} = statistics(runtime),
217 |     {_, Time2} = statistics(wall_clock),
218 |     % Times in ms.
219 |     {cpu, Time1, wall, Time2}.
220 | 
221 | mklist(Ls) when is_list(Ls) ->
222 |     {list, Ls}.
223 | 
224 | time_proc([Exp], Env) ->
225 |     timerstart(),
226 |     {_Val, _} = eval:evalexp(Exp, Env),
227 |     {cpu, Time1, wall, Time2} = timerend(),
228 |     Diff = mklist([mklist([{sym,cpu}, {int,Time1}, {sym,ms}]),
229 |                    mklist([{sym,wall}, {int,Time2}, {sym,ms}])]),
230 |     {Diff, Env}.
231 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | all: clean compile
 2 | 
 3 | clean:
 4 | 	rm -f project-final-report.{aux,pdf}
 5 | 
 6 | compile:
 7 | 	pdflatex -shell-escape project-final-report
 8 | 
 9 | open:
10 | 	open project-final-report.pdf
11 | 


--------------------------------------------------------------------------------
/docs/endnotes.sty:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tekknolagi/distlisp/d20417418a1f9cd60adcae810b5670f1692328cb/docs/endnotes.sty


--------------------------------------------------------------------------------
/docs/project-final-report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tekknolagi/distlisp/d20417418a1f9cd60adcae810b5670f1692328cb/docs/project-final-report.pdf


--------------------------------------------------------------------------------
/docs/project-final-report.synctex.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tekknolagi/distlisp/d20417418a1f9cd60adcae810b5670f1692328cb/docs/project-final-report.synctex.gz


--------------------------------------------------------------------------------
/docs/project-final-report.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[letterpaper,twocolumn,10pt]{article}
  2 | \usepackage{usenix,epsfig,endnotes,enumerate,graphicx,multicol}
  3 | \begin{document}
  4 | 
  5 | %don't want date printed
  6 | \date{}
  7 | 
  8 | %make title bold and 14 pt font (Latex default is non-bold, 16 pt)
  9 | \title{\Large \bf DLisp: Automatically distributed computation}
 10 | 
 11 | \newcommand{\tuftsauthor}[1]{{\rm #1}\\
 12 |   Tufts University}
 13 | 
 14 | \author{
 15 |   \tuftsauthor{Maxwell Bernstein}
 16 |   \and
 17 |   \tuftsauthor{Matthew Yaspan}
 18 | }
 19 | 
 20 | \maketitle
 21 | 
 22 | \subsection*{Abstract}
 23 | Many of today's programs are written sequentially, and do not take advantage of
 24 | the computer's available resources. This is in a large part due to the
 25 | difficulty of using any given available threading or parallelization API.
 26 | Moreover, these programs often fail to take advantage of the network
 27 | to distribute work not just among processes to take advantage of the
 28 | scheduler, but use available computing power over the network.
 29 | 
 30 | We solve this problem by automatically parallelizing or distributing
 31 | computation across cores or even across a datacenter, then analyze the
 32 | performance of our distribution algorithms across several modes. We
 33 | created a toy programming language based on lisp and built the underlying
 34 | parser, basis, and evaluator in Erlang, which also handles our network
 35 | protocols and distribution algorithms.
 36 | 
 37 | \section{Design}
 38 | 
 39 | \subsection{Language}
 40 | 
 41 | We began with a Lisp-like language with support for most forms: \verb|fixnum|,
 42 | \verb|boolean|, \verb|symbol|, \verb|lambda|, \verb|funcall|, \verb|define|,
 43 | \verb|val|, \verb|quote|, \verb|if|, \verb|let|, \verb|let*|, \verb|eval|,
 44 | \verb|apply|, built-in functions, and closures. Then we decided that users
 45 | would be less comfortable with a Lisp than a programming language with a syntax
 46 | that mirrors existing programming languages like SML and OCaml, and changed the
 47 | syntax. Mergesort, for example, begin as:
 48 | 
 49 | \begin{verbatim}
 50 | (define mergesort (xs)
 51 |   (if (or (null? xs) (null? (cdr xs)))
 52 |     xs
 53 |     (let* ((size    (length xs))
 54 |            (half    (/ size 2))
 55 |            (fsthalf (take half xs))
 56 |            (sndhalf (drop half xs)))
 57 |       (merge (mergesort fsthalf)
 58 |              (mergesort sndhalf)))))
 59 | \end{verbatim}
 60 | 
 61 | and, after the syntax transformation, ended as:
 62 | 
 63 | \begin{verbatim}
 64 | fun mergesort(xs) =
 65 |   if null?(xs) or null?(cdr(xs))
 66 |   then xs
 67 |   else let* val size = length(xs),
 68 |             val half = size/2,
 69 |             val fsthalf = take(half, xs),
 70 |             val sndhalf = drop(half, xs)
 71 |        in merge(mergesort(fsthalf),
 72 |                 mergesort(sndhalf))
 73 |        end;;
 74 | \end{verbatim}
 75 | 
 76 | Both programs map to the same abstract syntax tree, which means that we even
 77 | allow mixing and matching of styles in the same program, as in:
 78 | 
 79 | \begin{verbatim}
 80 | fun mergesort(xs) =
 81 |   if [or [null? xs] [null? [cdr xs]]]
 82 |   then xs
 83 |   else let* val size = length(xs),
 84 |             val half = size/2,
 85 |             val fsthalf = take(half, xs),
 86 |             val sndhalf = drop(half, xs)
 87 |        in [merge mergesort(fsthalf)
 88 |                  mergesort(sndhalf)]
 89 |        end;;
 90 | \end{verbatim}
 91 | 
 92 | One language property that \textit{did not} change in the transition was
 93 | mutation; DLisp forces variable immutability. In forms that introduce new
 94 | environments, such as \verb|let| and \verb|lambda|, shadowing is allowed -- but
 95 | never mutation. This, as it turns out, is key when attempting to parallelize
 96 | computation.
 97 | 
 98 | For this reason, \verb|map|, \verb|pmap| (parallel map), and \verb|dmap|
 99 | (distributed map) are all built-in special forms.
100 | 
101 | \subsection{Network}
102 | 
103 | We use several terms across the span of this writeup:
104 | 
105 | \begin{itemize}
106 |     \item \textit{Master} --- Main controller computer from which the program
107 |         is run and distributed. Communicates with several Machines and Workers.
108 |     \item \textit{Machine} --- Logical or physical computer, which contains
109 |         many Agents and Workers.
110 |     \item \textit{WorkPacket} --- A serializable tuple of the form \verb|{Exp|
111 |         $x$ \verb|Env}| that is sent to Workers.
112 |     \item \textit{Worker} --- Process whose sole purpose is to receive
113 |         WorkPackets, evaluate them, and send the results back to the Master.
114 |     \item \textit{SlowWorker} --- A Worker that has been artificially slowed
115 |         down by a constant factor.
116 |     \item \textit{Agent} --- Process whose sole purpose is to manage a work
117 |         queue.
118 |     \item \textit{StealingAgent} --- Agent that occasionally steals from other
119 |         Agents when its worker is moving quickly.
120 |     \item \textit{RoundRobinMode} --- Distribution mode that uses a circular
121 |         queue to hand out work to Workers in order.
122 |     \item \textit{ByMachineMode} --- Distribution mode that uses per-Machine
123 |         statistics to determine which Worker should receive a given WorkPacket.
124 |         Currently, this has two sub-modes:
125 |         \begin{itemize}
126 |             \item \textit{LowLatency} --- Hand out work to whoever can respond
127 |                 the fastest to a HealthCheck.
128 |             \item \textit{ByMemory} --- Hand out work to whoever has the most
129 |                 computational power (currently measured by memory pressure)
130 |                 currently available.
131 |         \end{itemize}
132 | \end{itemize}
133 | 
134 | We use normal (non-stealing) Agents to begin with, then proceed to demonstrate
135 | the utility and speed gains by using StealingAgents. Additionally, we introduce
136 | some SlowWorkers into the Worker pool to demonstrate that work stealing is an
137 | effective means of combating heterogeneous computational power.
138 | 
139 | Additionally, we demonstrate the results of different distribution modes
140 | (enumerated above) and their affects on end-to-end computation speed.
141 | 
142 | \subsection{Startup Procedure}
143 | 
144 | A Master node is started, and runs on node \verb|M|. Independently, anywhere
145 | from 0 to N>0 Machines are started up on the same network, with knowledge of
146 | the Master. In this case, we use Erlang nodenames, such as the atom
147 | \verb|master@some.ip.address.here|, to identify the Master.
148 | 
149 | Each of those Machines will spawn some number of Agents (see \verb|calculate1|
150 | and \verb|calculate2| functions for details) based on the capacity of the
151 | machine. Currently, this is based primarily on the number of physical CPU
152 | cores.
153 | 
154 | Each Machine will then register with the Master, sending over its list of
155 | Agents and hardware stats. Machines can register at any time, but \verb|dmap|
156 | will fail if no machine has registered with the Master.
157 | 
158 | \section{Distribution Methods}
159 | 
160 | The goal of this project was to allow for a client with basic coding abilities
161 | to vastly improve the performance of their program which may take up a
162 | significant amount of memory or processing power by distributing it in a more
163 | effective way, either  by using Erlang's ability to spawn threads and collate
164 | responses to take advantage of the local scheduler, or by distributing the work
165 | across multiple machines over network.
166 | 
167 | \subsection{Local Parallelization}
168 | 
169 | The function \verb|pmap| in DLisp takes in a function, which can be anonymous,
170 | and a list, as arguments. The parser decomposes these inputs into a list of
171 | WorkPackets consisting an expression and the environment in which the
172 | expression is to be carried out. The expression consists of an operation and a
173 | member of the list on which it is to be evaluated. For each member of the list,
174 | an Erlang process is spawned in which our \verb|eval| module is called on the
175 | WorkPacket and then the evaluated result is sent back to the Master Erlang
176 | process. An \verb|assemble| function collates all of the results and returns
177 | the mapped list.
178 | 
179 | The advantage of this is that it allows for more optimal scheduling of
180 | processes that are not dependent on one another. Because this is not a reduce
181 | operation, there's no data dependency to resolve, and evaluating sequentially
182 | wastes scheduling time for no discernible benefit.
183 | 
184 | \subsection{Distributed Parallelization}
185 | 
186 | The function \verb|dmap| in DLisp works semantically just like \verb|pmap|.
187 | However, under the hood, a significant amount is different. There are three
188 | possible methods of distributed map: RoundRobin, LowLatency, and ByMemory.
189 | Each of these is a different load balancing technique that aims to distribute
190 | work in the most efficient way possible.
191 | 
192 | \subsubsection{RoundRobin}
193 | 
194 | In the round robin scheme, all machines that are to contain Worker processes
195 | are initialized with a number of processes calculate based on the amount of
196 | memory and/or number cores on the respective machines. Each machine sends their
197 | list of Worker processes identifiers to master, and the Master concatenates the
198 | lists into a queue and randomizes the order of the workers.  When dmap is
199 | invoked, The \verb|dmap| call is decomposed in the same way as \verb|pmap| into
200 | WorkPackets for each item in the list. In the case of \verb|dmap|, however, the
201 | WorkPacket is sent to processes selected from the aforementioned queue. A
202 | process is popped off the queue, sent a WorkPacket, and requeued at the back,
203 | in a RoundRobin scheme. After all of the work is sent, a list of IDs for each
204 | individual job is returned so that when Workers send results back, they can be
205 | collated and assembled into a mapped list by Master who sits in receiving mode
206 | until all the packets have been recovered.
207 | 
208 | \subsubsection{LowLatency}
209 | 
210 | In the LowLatency scheme, initialization occurs by Machine. When Machines
211 | register with Master, the Machine is appended to a list of Machine
212 | representations, each element containing the machine Pid, a queue of Processes
213 | local to the Machine. When \verb|dmap| is called, for each iteration through
214 | the list given as an argument, a message is sent to all of the worker Machines,
215 | and the first one to respond is given the WorkPacket. The process is then
216 | repeated until work is entirely allocated.
217 | 
218 | The goal of this algorithm is to account for disparities in latency between
219 | machines. Although this was not an issue we experienced, it is plausible that a
220 | large organization with multiple data centers could see this issue if the pool
221 | of machines contained nodes with a significant enough distance from each other
222 | or nodes that are simply slower. Collation works the same as in RoundRobin,
223 | where the Master waits for each packet to return before in order returning the
224 | collated results.
225 | 
226 | \subsubsection{ByMemory}
227 | 
228 | The By Memory scheme is similar to low latency, except in this case available
229 | memory is prioritized. Much like in the low latency scheme, initialization
230 | occurs by machine, such that each machine, when it connects, sends its worker
231 | queue and statistics and is represented in a list of Machines by Master having
232 | its own distinct WorkerQueue. For each element in the list, the corresponding
233 | WorkPacket is sent to the machine that presents as having the most available
234 | memory. Machines are polled through each iteration of this process. ByMemory
235 | was the scheme we tested the least, because it was difficult to contrive
236 | scenarios of heterogeneity between machines in terms of memory usage in the
237 | scope of the time we were given.
238 | 
239 | \subsection{Work Stealing}
240 | 
241 | A major component in how we distributed work is through work stealing. The
242 | principle of work stealing is that when certain processes are busy, others can
243 | take on more work. This is a way to augment the load balancing above in having
244 | a failsafe in case a process or processes stall on work. In order to accomplish
245 | this, when we create worker processes on a node, these are actually pairs of
246 | Workers and Agents, which were briefly defined above.
247 | 
248 | The motivation is that the worker simply waits for work, performs the work when
249 | the work order is received and returns it, then repeats ad infinitum. The
250 | Agent, however, is the point-of-contact for the worker and anyone who wishes to
251 | send it work. It is called an agent because it behaves like an agent for an
252 | actress, sports player, or celebrity: the worker does the work, and the agent
253 | manages the worker and gives it work from outside sources. The list of
254 | processes inside of Master is a list of agents, in reality. The Master sends
255 | the Agent work, who passes it along to the worker or builds up a queue of jobs
256 | for the worker to do.
257 | 
258 | The Agent also negotiates, sending a message to a random agent asking to steal
259 | work from their queue when the worker is idle. This setup allows for any
260 | inefficiencies in the system to be mitigated. If in the low latency scheme
261 | there is one clear favorite machine, its scheduler may be bogged down with
262 | potentially high-powered jobs. While it is processing work, an Agent on another
263 | machine can steal from the queues being built up by the agents on the lowest
264 | latency machine. To the Master, this is irrelevant, but it allows for work to
265 | get done as fast as possible with little central micromanagement.
266 | 
267 | \section{Results}
268 | 
269 | Our findings (located in \verb|results.csv|) indicates several things about the
270 | performance of \verb|map(fac, range(0, 1000));;| across the different types of
271 | map, and across the different configurations of \verb|dmap|. An integral part
272 | of our testing was artificially slow Workers.
273 | 
274 | \subsection{SlowWorkers}
275 | 
276 | To accomplish this, we added calls to \verb|timer:sleep/1|. This doubled the
277 | length of time needed to accomplish any given task. Of course, slowing down
278 | every machine does not help demonstrate anything --- we instead made a
279 | probabalistic model at startup time to determine which fraction of Workers
280 | would be "slow".
281 | 
282 | \subsection{Test Code}
283 | 
284 | In order to test our distribution method, we wrote some DLisp code.
285 | 
286 | \begin{verbatim}
287 | fun foldl(f, acc, xs) =
288 |   if null?(xs)
289 |   then acc
290 |   else foldl(f, f(car(xs), acc), cdr(xs));
291 | 
292 | fun vartimes(...) =
293 |     foldl(bintimes, 1, ...);
294 | fun fac(n) =
295 |     apply(vartimes, range(1, n+1));
296 | 
297 | fun range(start, fin) =
298 |   if (start > fin) or (start == fin)
299 |   then nil
300 |   else cons(start, range(start+1, fin));
301 | \end{verbatim}
302 | 
303 | We figured that \verb|dmap(fac, range(0, 1000));;| would be a good test
304 | because:
305 | 
306 | \begin{itemize}
307 |     \item Not all units of work are the same difficulty.
308 |     \item Most of those work items are not of insignificant difficulty,
309 |         especially given that they are not running at "native" Erlang speed.
310 |     \item There are many units of work, more than fit on any given Machine in
311 |         our setup.
312 | \end{itemize}
313 | 
314 | \subsection{Machine Setup}
315 | 
316 | We used one Master and three Machines, each with 8GB RAM and 4 cores. They were
317 | all on the same network, even in the same network closet.
318 | 
319 | Unfortunately, this is not a good test for the LowLatency mode, which is
320 | designed to handle systems with more heterogeneous network setups.
321 | 
322 | \subsection{Findings}
323 | 
324 | \begin{enumerate}
325 |     \item \verb|map| is slow.
326 |     \item \verb|pmap| is faster than \verb|map| if the machine has more than
327 |         one core.
328 |     \item \verb|dmap| is faster than \verb|map| and \verb|pmap| if you have
329 |         more than one machine helping out, and that machine is at least as
330 |         powerful as the Master.
331 |     \item \verb|dmap| with Timed/LowLatency mode is the fastest.
332 |     \item \verb|dmap| with WorkStealing enabled is the fastest.
333 | \end{enumerate}
334 | 
335 | \section{Conclusion}
336 | 
337 | The regression results (see \verb|regression_results.txt|) give a summary of
338 | the partial effects of each of the variables we incorporated based on the table
339 | shown previously. The intercept value (12476 ms) refers to the value of the
340 | reference observation, in which the type of parallel map used was Low Latency,
341 | there were no slow workers, and no work stealing. Outside of the intercept,
342 | none of the variables were statistically significant, which is almost certain
343 | to the lack of observations and high number of variables and interactions by
344 | comparison. With more time, a more robust dataset could be easily compiled and
345 | results much more definitive.
346 | 
347 | Generally speaking, RoundRobin clearly had an advantage over Low Latency. All
348 | else equal, it performed over 4 seconds faster, nearly halving the amount of
349 | time. These improvements were amplified when work stealing was enabled by about
350 | one more second. The changes were even robust to slower worker processes, with
351 | somewhat of a speedup indicated. It’s possible that some sleeping processes
352 | allowed for the schedulers on machines to more easily allocate work, but these
353 | results are not strong enough for any conclusions to be made in this regard.
354 | Curiously, work-stealing made the Low Latency algorithm dramatically slower
355 | when slow workers were used. This makes sense, but makes a peculiar
356 | juxtaposition to the miniscule slowdown of Round Robin algorithms, which
357 | arguably went faster, and work-stealing without any slow workers in the low
358 | latency algorithm, which was about the same speed.
359 | 
360 | \end{document}
361 | 


--------------------------------------------------------------------------------
/docs/regression_results:
--------------------------------------------------------------------------------
 1 | Call:
 2 | lm(formula = Speed ~ MapType * WorkStealing * PctSlow, data = res)
 3 | 
 4 | Residuals:
 5 |       1       2       3       4       5       6       7       8       9      10      11      13 
 6 |  -116.7  -111.7   223.3  -111.7   233.3  -116.7  -338.2 -4849.7  2424.8   676.3  -338.2  2424.8 
 7 | 
 8 | Coefficients:
 9 |                                          Estimate Std. Error t value Pr(>|t|)  
10 | (Intercept)                                 12476       2743   4.548   0.0104 *
11 | MapTypeRound Robin                          -4140       3880  -1.067   0.3461  
12 | WorkStealingY                                -654       3880  -0.169   0.8743  
13 | PctSlow                                      3299       4250   0.776   0.4809  
14 | MapTypeRound Robin:WorkStealingY            -1418       5486  -0.258   0.8088  
15 | MapTypeRound Robin:PctSlow                  -5709       6010  -0.950   0.3960  
16 | WorkStealingY:PctSlow                       10750       6010   1.789   0.1482  
17 | MapTypeRound Robin:WorkStealingY:PctSlow    -7526       8500  -0.885   0.4259  
18 | ---
19 | Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
20 | 
21 | Residual standard error: 3005 on 4 degrees of freedom
22 |   (2 observations deleted due to missingness)
23 | Multiple R-squared:  0.9202,	Adjusted R-squared:  0.7806 
24 | F-statistic: 6.592 on 7 and 4 DF,  p-value: 0.04374
25 | 


--------------------------------------------------------------------------------
/docs/regression_results.txt:
--------------------------------------------------------------------------------
 1 | Call:
 2 | lm(formula = Speed ~ MapType * WorkStealing * PctSlow, data = res)
 3 | 
 4 | Residuals:
 5 |       1       2       3       4       5       6       7       8       9      10      11      13
 6 |  -116.7  -111.7   223.3  -111.7   233.3  -116.7  -338.2 -4849.7  2424.8   676.3  -338.2  2424.8
 7 | 
 8 | Coefficients:
 9 |                                          Estimate Std. Error t value Pr(>|t|)
10 | (Intercept)                                 12476       2743   4.548   0.0104 *
11 | MapTypeRound Robin                          -4140       3880  -1.067   0.3461
12 | WorkStealingY                                -654       3880  -0.169   0.8743
13 | PctSlow                                      3299       4250   0.776   0.4809
14 | MapTypeRound Robin:WorkStealingY            -1418       5486  -0.258   0.8088
15 | MapTypeRound Robin:PctSlow                  -5709       6010  -0.950   0.3960
16 | WorkStealingY:PctSlow                       10750       6010   1.789   0.1482
17 | MapTypeRound Robin:WorkStealingY:PctSlow    -7526       8500  -0.885   0.4259
18 | ---
19 | Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
20 | 
21 | Residual standard error: 3005 on 4 degrees of freedom
22 |   (2 observations deleted due to missingness)
23 | Multiple R-squared:  0.9202,	Adjusted R-squared:  0.7806
24 | F-statistic: 6.592 on 7 and 4 DF,  p-value: 0.04374
25 | 
26 | ╔═════════════════════════════════╦═════════════╦═════════╗
27 | ║            Variable             ║ Coefficient ║ p-value ║
28 | ╠═════════════════════════════════╬═════════════╬═════════╣
29 | ║ Intercept                       ║       12476 ║ 0.0104  ║
30 | ║ IsRoundRobin                    ║       -4140 ║ 0.3461  ║
31 | ║ WorkStealingOn                  ║        -654 ║ 0.8743  ║
32 | ║ FractionSlowWorkers             ║        3299 ║ 0.4809  ║
33 | ║ RoundRobin*Workstealing         ║       -1418 ║ 0.8088  ║
34 | ║ RoundRobin*FractSlow            ║       -5709 ║ 3960    ║
35 | ║ WorkStealingOn*PctSLow          ║       10750 ║ 0.1482  ║
36 | ║ RoundRobin*WorkStealing*PctSlow ║       -7526 ║ 0.4259  ║
37 | ╚═════════════════════════════════╩═════════════╩═════════╝
38 | 


--------------------------------------------------------------------------------
/docs/results.csv:
--------------------------------------------------------------------------------
 1 | pmap Type,worker stealing?,Pct Slow Workers,Avg,Std
 2 | Round Robin,N,1,5810,68
 3 | Round Robin,Y,0,6153,512
 4 | Round Robin,Y,0.5,6895,312
 5 | Round Robin,Y,1,6967,809
 6 | Round Robin,N,0.5,7365,863
 7 | Round Robin,N,0,8220,1264
 8 | Low Latency,N,0,12138,467
 9 | Low Latency,Y,0.5,13997,132
10 | Low Latency,Y,0,14247,184
11 | Low Latency,N,0.5,14802,658
12 | Low Latency,N,1,15437,583
13 | Parallel Map Local,NA,NA,20869,490
14 | Low Latency,Y,1,28296,382
15 | Normal Map,NA,NA,39753,289
16 | 


--------------------------------------------------------------------------------
/docs/usenix.sty:
--------------------------------------------------------------------------------
 1 | % usenix.sty - to be used with latex2e for USENIX.
 2 | % To use this style file, look at the template usenix_template.tex
 3 | %
 4 | % $Id: usenix.sty,v 1.2 2005/02/16 22:30:47 maniatis Exp $
 5 | %
 6 | % The following definitions are modifications of standard article.sty
 7 | % definitions, arranged to do a better job of matching the USENIX
 8 | % guidelines.
 9 | % It will automatically select two-column mode and the Times-Roman
10 | % font.
11 | 
12 | %
13 | % USENIX papers are two-column.
14 | % Times-Roman font is nice if you can get it (requires NFSS,
15 | % which is in latex2e.
16 | 
17 | \if@twocolumn\else\input twocolumn.sty\fi
18 | \usepackage{mathptmx}  % times roman, including math (where possible)
19 | 
20 | %
21 | % USENIX wants margins of: 1" sides, 1" bottom, and 1" top.
22 | % 0.25" gutter between columns.
23 | % Gives active areas of 6.5" x 9"
24 | %
25 | \setlength{\textheight}{9.0in}
26 | \setlength{\columnsep}{0.25in}
27 | \setlength{\textwidth}{6.50in}
28 | 
29 | \setlength{\topmargin}{0.0in}
30 | 
31 | \setlength{\headheight}{0.0in}
32 | 
33 | \setlength{\headsep}{0.0in}
34 | 
35 | % Usenix wants no page numbers for camera-ready papers, so that they can
36 | % number them themselves.  But submitted papers should have page numbers
37 | % for the reviewers' convenience.
38 | % 
39 | %
40 | % \pagestyle{empty}
41 | 
42 | %
43 | % Usenix titles are in 14-point bold type, with no date, and with no
44 | % change in the empty page headers.  The whole author section is 12 point
45 | % italic--- you must use {\rm } around the actual author names to get
46 | % them in roman.
47 | %
48 | \def\maketitle{\par
49 |  \begingroup
50 |    \renewcommand\thefootnote{\fnsymbol{footnote}}%
51 |    \def\@makefnmark{\hbox to\z@{$\m@th^{\@thefnmark}$\hss}}%
52 |     \long\def\@makefntext##1{\parindent 1em\noindent
53 |             \hbox to1.8em{\hss$\m@th^{\@thefnmark}$}##1}%
54 |    \if@twocolumn
55 |      \twocolumn[\@maketitle]%
56 |      \else \newpage
57 |      \global\@topnum\z@
58 |      \@maketitle \fi\@thanks
59 |  \endgroup
60 |  \setcounter{footnote}{0}%
61 |  \let\maketitle\relax
62 |  \let\@maketitle\relax
63 |  \gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax}
64 | 
65 | \def\@maketitle{\newpage
66 |  \vbox to 2.5in{
67 |  \vspace*{\fill}
68 |  \vskip 2em
69 |  \begin{center}%
70 |   {\Large\bf \@title \par}%
71 |   \vskip 0.375in minus 0.300in
72 |   {\large\it
73 |    \lineskip .5em
74 |    \begin{tabular}[t]{c}\@author
75 |    \end{tabular}\par}%
76 |  \end{center}%
77 |  \par
78 |  \vspace*{\fill}
79 | % \vskip 1.5em
80 |  }
81 | }
82 | 
83 | %
84 | % The abstract is preceded by a 12-pt bold centered heading
85 | \def\abstract{\begin{center}%
86 | {\large\bf \abstractname\vspace{-.5em}\vspace{\z@}}%
87 | \end{center}}
88 | \def\endabstract{}
89 | 
90 | %
91 | % Main section titles are 12-pt bold.  Others can be same or smaller.
92 | %
93 | \def\section{\@startsection {section}{1}{\z@}{-3.5ex plus-1ex minus
94 |     -.2ex}{2.3ex plus.2ex}{\reset@font\large\bf}}
95 | 


--------------------------------------------------------------------------------
/eval.erl:
--------------------------------------------------------------------------------
  1 | -module(eval).
  2 | -export([evalexp/2]).
  3 | -export([lookup/2, bind/3, extend/2, extend/3]).
  4 | -export([printexp/1]).
  5 | -export([type/1]).
  6 | -export([tuplezip/2]).
  7 | 
  8 | -export([name_free/2]).
  9 | 
 10 | -define(IF,      {sym, 'ifx'}).
 11 | -define(LAMBDA,  {sym, 'lambda'}).
 12 | -define(QUOTE,   {sym, 'quote'}).
 13 | -define(LET,     {sym, 'letx'}).
 14 | -define(LETSTAR, {sym, 'letx*'}).
 15 | -define(DEFINE,  {sym, 'define'}).
 16 | -define(VAL,     {sym, 'valx'}).
 17 | -define(EVAL,    {sym, 'eval'}).
 18 | -define(APPLY,   {sym, 'apply'}).
 19 | 
 20 | 
 21 | lookup(Name, []) -> erlang:error({unbound_variable, Name});
 22 | lookup(Name, [{Name, V}|_T]) -> V;
 23 | lookup(Name, [{_K, _V}|T]) -> lookup(Name, T).
 24 | 
 25 | 
 26 | % Simple bind.
 27 | bind(Name, Val, Env) -> [{Name, Val}|Env].
 28 | 
 29 | 
 30 | remove(_Name, []) -> [];
 31 | remove(Name, [{Name, _}|T]) -> remove(Name, T);
 32 | remove(Name, [H|T]) -> [H|remove(Name, T)].
 33 | 
 34 | 
 35 | % Bind but removes duplicates.
 36 | slimbind(Name, Val, Env) -> [{Name, Val}|remove(Name, Env)].
 37 | 
 38 | 
 39 | extend([], Env) -> Env;
 40 | extend([{Name, Val}|T], Env) -> bind(Name, Val, extend(T, Env)).
 41 | extend([{Name, Val}|T], Env, slim) -> slimbind(Name, Val, extend(T, Env)).
 42 | 
 43 | 
 44 | printlist([]) -> ok;
 45 | printlist([E]) -> printexp(E);
 46 | printlist([H|T]) ->
 47 |     printexp(H),
 48 |     io:format(" "),
 49 |     printlist(T).
 50 | 
 51 | 
 52 | printexp({int, Val}) -> io:format("~B", [Val]);
 53 | printexp({sym, Val}) -> io:format("~s", [Val]);
 54 | printexp({bool, true}) -> io:format("#t");
 55 | printexp({bool, false}) -> io:format("#f");
 56 | printexp({quote, QuotedExp}) ->
 57 | %    io:format("'"),
 58 |     printexp(QuotedExp);
 59 | printexp({list, L}) ->
 60 |     io:format("("),
 61 |     printlist(L),
 62 |     io:format(")");
 63 | printexp({prim, _}) -> io:format("<prim>");
 64 | printexp({closure, Formals, Body, _CapturedEnv}) ->
 65 |     PrintableFormals = {list, lists:map(fun(Name) -> {sym,Name} end, Formals)},
 66 |     printexp({list, [{sym, lambda}, PrintableFormals, Body]});
 67 | printexp([]) -> io:format("");
 68 | printexp([H|T]) ->
 69 |     printexp(H),
 70 |     io:format("~n"),
 71 |     printexp(T).
 72 | 
 73 | 
 74 | type({T, _}) -> T.
 75 | 
 76 | 
 77 | tuplezip([], []) -> [];
 78 | tuplezip([HA|TA], [HB|TB]) -> [{HA,HB}|tuplezip(TA, TB)];
 79 | tuplezip(LA, LB) -> erlang:error({tuplezip_mismatch, LA, LB}).
 80 | 
 81 | 
 82 | member(_X, []) -> false;
 83 | member(X, [X|_T]) -> true;
 84 | member(X, [_|T]) -> member(X, T).
 85 | 
 86 | 
 87 | name_free({int, _}, _) -> false;
 88 | 
 89 | name_free({bool, _}, _) -> false;
 90 | 
 91 | name_free({quote, _}, _) -> false;
 92 | 
 93 | name_free({sym, V}, N) -> V =:= N;
 94 | 
 95 | name_free({list, [?IF, E1, E2, E3]}, N) ->
 96 |     name_free(E1, N) or
 97 |     name_free(E2, N) or
 98 |     name_free(E3, N);
 99 | 
100 | name_free({list, [?LET, {list, Bindings}, Body]}, N) ->
101 |     lists:any(fun ({list, [_, E]}) -> name_free(E, N) end, Bindings) or
102 |     (not member(N, lists:map(fun ({list, [Name, _]}) -> Name end, Bindings)) and
103 |      name_free(Body, N));
104 | 
105 | name_free({list, [?LETSTAR, {list, []}, Body]}, N) ->
106 |     name_free(Body, N);
107 | name_free({list, [?LETSTAR, {list, [B|BS]}, Body]}, N) ->
108 |     name_free({list, [?LET,
109 |                       {list, [B]},
110 |                       {list, [?LETSTAR, {list, BS}, Body]}]}, N);
111 | 
112 | name_free({list, [?LAMBDA, {list, Formals}, Body]}, N) ->
113 |     not member(N, lists:map(fun ({sym, Name}) -> Name end, Formals)) and
114 |     name_free(Body, N);
115 | 
116 | name_free({list, []}, _N) -> false;
117 | 
118 | name_free({list, [FnName|Args]}, N) ->
119 |     lists:any(fun (E) -> name_free(E, N) end, [FnName|Args]).
120 | 
121 | % fun improve (l, rho) = (l, List.filter (fn (x, _) => freeIn (LAMBDA l) x) rho)
122 | improve({Formals, Body}, Env) -> {{Formals, Body},
123 |                     lists:filter(fun ({X, _}) ->
124 |                                          name_free({list, [?LAMBDA,
125 |                                                            {list, Formals},
126 |                                                            Body]}, X)
127 |                                  end,
128 |                                  Env)}.
129 | 
130 | evalexps(Es, Env) ->
131 |     lists:map(fun (E) -> {Val, _} = evalexp(E, Env),
132 |                          Val
133 |               end, Es).
134 | 
135 | evalexp({int, Val}, Env) -> {{int, Val}, Env};
136 | 
137 | evalexp({bool, Val}, Env) -> {{bool, Val}, Env};
138 | 
139 | evalexp({sym, nil}, Env) -> {{list, []}, Env};
140 | 
141 | evalexp({sym, Name}, Env) -> {lookup(Name, Env), Env};
142 | 
143 | evalexp({quote, {list, Ls}}, Env) ->
144 |     {{list, lists:map(fun (E) -> {quote, E} end, Ls)}, Env};
145 | 
146 | evalexp({quote, Exp}, Env) -> {Exp, Env};
147 | 
148 | evalexp({list, []}, Env) -> {{list, []}, Env};
149 | 
150 | evalexp({list, [?LETSTAR, {list, []}, Body]}, Env) ->
151 |     evalexp(Body, Env);
152 | evalexp({list, [?LETSTAR,
153 |                 {list, [{list, [{sym, Name}, Exp]}|T]},
154 |                 Body]}, Env) ->
155 |     {Val, _} = evalexp(Exp, Env),
156 |     {BodyVal, _} = evalexp({list, [?LETSTAR, {list, T}, Body]},
157 |                            bind(Name, Val, Env)),
158 |     {BodyVal, Env};
159 | 
160 | evalexp({list, [?LET, {list, Bindings}, Body]}, Env) ->
161 |     BoundVars = lists:map(fun ({list, [{sym, Name}, Exp]}) ->
162 |                                   {Val, _} = evalexp(Exp, Env),
163 |                                   {Name, Val}
164 |                           end, Bindings),
165 |     NewEnv = extend(BoundVars, Env),
166 |     {Val, _} = evalexp(Body, NewEnv),
167 |     {Val, Env};
168 | 
169 | evalexp({list, [?DEFINE, {sym, Name}, Formals, Body]}, Env) ->
170 |     {Closure, _} = evalexp({list, [?LAMBDA, Formals, Body]}, Env),
171 |     {Closure, bind(Name, Closure, Env)};
172 | 
173 | evalexp({list, [?VAL, {sym, Name}, Exp]}, Env) ->
174 |     {Val, _} = evalexp(Exp, Env),
175 |     {Val, bind(Name, Val, Env)};
176 | 
177 | evalexp({list, [{closure, ['...'], Body, CapturedEnv}|Actuals]}, Env) ->
178 |     ActualValues = {list, evalexps(Actuals, Env)},
179 |     CombinedEnv = bind('...', ActualValues, extend(CapturedEnv, Env)),
180 |     evalexp(Body, CombinedEnv);
181 | 
182 | evalexp({list, [{closure, Formals, Body, CapturedEnv}|Actuals]}, Env) ->
183 |     ActualValues = evalexps(Actuals, Env),
184 |     FormalsEnv = tuplezip(Formals, ActualValues),
185 |     CombinedEnv = extend(FormalsEnv, extend(CapturedEnv, Env)),
186 |     evalexp(Body, CombinedEnv);
187 | 
188 | evalexp({list, [?LAMBDA, {list, Formals}, Body]}, Env) ->
189 |     FormalNames = lists:map(fun ({sym, Name}) -> Name end, Formals),
190 |     {_, ImprovedEnv} = improve({Formals, Body}, Env),
191 |     {{closure, FormalNames, Body, ImprovedEnv}, Env};
192 | 
193 | evalexp({list, [?IF, Cond, E1, E2]}, Env) ->
194 |     {CondV, _} = evalexp(Cond, Env),
195 |     case CondV of
196 |         {bool, true} -> evalexp(E1, Env);
197 |         {bool, false} -> evalexp(E2, Env);
198 |         _ -> erlang:error({bad_if, Cond})
199 |     end;
200 | 
201 | evalexp({list, [{prim, PrimFn}|Args]}, Env) -> PrimFn(Args, Env);
202 | 
203 | evalexp({list, [?EVAL, GivenExp]}, Env) ->
204 |     {ExpVal, _} = evalexp(GivenExp, Env),
205 |     evalexp(ExpVal, Env);
206 | 
207 | evalexp({list, [?APPLY, FnExp, ArgExps]}, Env) ->
208 |     {{list, ArgVals}, _} = evalexp(ArgExps, Env),
209 |     evalexp({list, [FnExp|ArgVals]}, Env);
210 | 
211 | evalexp({list, [LispFn|Args]}, Env) ->
212 |     {FnVal, _} = evalexp(LispFn, Env),
213 |     evalexp({list, [FnVal|Args]}, Env);
214 | 
215 | evalexp([], Env) -> {ok, Env};
216 | evalexp([E], Env) -> evalexp(E, Env);
217 | evalexp([FirstExp|RestExps], Env) ->
218 |     {_V, Env2} = evalexp(FirstExp, Env),
219 |     evalexp(RestExps, Env2).
220 | 


--------------------------------------------------------------------------------
/master.erl:
--------------------------------------------------------------------------------
  1 | -module(master).
  2 | % -export([connect_worker_nodes/3, timed_pollhealth/1, parallel_map/4]).
  3 | -export([idserver/0, freshid/1, test_rr/1, test_mrr/1, test_timed/1]).
  4 | -export([delegator/1]).
  5 | 
  6 | 
  7 | shuffle(Ls) ->
  8 |    [X || {_, X} <- lists:sort([ {random:uniform(), N} || N <- Ls])].
  9 | 
 10 | %%%% Need a requeue function to re-assign dead IDs
 11 | %%%
 12 | %%%  Need wways to assign IDs
 13 | %%%
 14 | %%%  Need ways to test failures
 15 | %%%
 16 | %%%  Need to implement schemes besides round robin
 17 | 
 18 | %% aggregate_workers(Type) ->
 19 | %%    register(node(), self()),
 20 | %%    receive
 21 | %%      {worker, Node, Pid, Mem, Cpu} ->
 22 | %%        [{Node, Pid, Mem, Cpu}|aggregate_workers()];
 23 | %%      {last, Node, Pid, Mem, Cpu} ->
 24 | %%        [{Node, Pid, Mem, Cpu}|[]]
 25 | %%    end.
 26 | 
 27 | 
 28 | timed_pollhealth([]) -> [];
 29 | timed_pollhealth([{Node, Pid, ThreadPool, SysCpu, SysMem, Time}|T]) ->
 30 |    PrevTime = erlang:system_time(),
 31 |    Pid ! system_check,
 32 |    receive {Pid, SysCpu, {Total, Alloc, _Worst}} ->
 33 |      Elapsed = erlang:system_time() - PrevTime,
 34 |      [{Node, Pid, ThreadPool, SysCpu, Total - Alloc, 0.875*Time + 0.125*Elapsed}|
 35 |       timed_pollhealth(T)]
 36 |    after 1000 ->
 37 |      Elapsed = erlang:system_time(),
 38 |      [{Node, Pid, ThreadPool, SysCpu, SysMem, 0.875*Time + 0.125*Elapsed}|
 39 |       timed_pollhealth(T)]
 40 |    end;
 41 | timed_pollhealth(_) -> error.
 42 | 
 43 | send_request([]) -> sent;
 44 | send_request([{_Node, Pid, _ThreadPool, _Cpu, _Mem, _Time}|T]) ->
 45 |     Pid ! system_check,
 46 |     send_request(T).
 47 | 
 48 | time_machines([{_Node, _Pid, _ThreadPool, _Cpu, _Mem, _Time}=H|T]) ->
 49 |     send_request([H|T]),
 50 |     receive {Fastest, _Cpu, {_Total, _Alloc, _Worst}} ->
 51 |         lists:keytake(Fastest, 2, [H|T])
 52 |     end.
 53 | 
 54 | 
 55 | idserver() ->
 56 |     idserver(0).
 57 | idserver(Current) ->
 58 |     receive
 59 |         {need_id, From} -> From ! {fresh_id, Current},
 60 |                            idserver(Current+1)
 61 |     end.
 62 | 
 63 | 
 64 | freshid(IdServer) ->
 65 |     IdServer ! {need_id, self()},
 66 |     receive
 67 |         {fresh_id, FreshId} -> FreshId
 68 |     end.
 69 | 
 70 | 
 71 | delegator(Modes) ->
 72 |     register(master, self()),
 73 |     delegator(Modes, spawn(fun idserver/0), [], [], queue:new()).
 74 | delegator(Modes={DistMode}, IdServer, Machines, FlatAgentsList, FlatAgentsQueue) ->
 75 |     receive
 76 |         {register, Pid, Name, Cpu, {Total, Alloc, _Worst}, AgentsQueue} ->
 77 |             io:format("Machine ~p has joined the pool.~n", [Name]),
 78 |             AgentsList = queue:to_list(AgentsQueue),
 79 |             NewAgentsList = AgentsList ++ FlatAgentsList,
 80 |             NewAgentsQueue = queue:join(AgentsQueue, FlatAgentsQueue),
 81 |             lists:map(fun (Agent) -> Agent ! {other_agents, AgentsQueue} end,
 82 |                       FlatAgentsList),
 83 |             lists:map(fun (Agent) -> Agent ! {other_agents, FlatAgentsQueue} end,
 84 |                       AgentsList),
 85 |             Machine = {Name, Pid, AgentsQueue, Cpu, Total-Alloc, 0},
 86 |             delegator(Modes, IdServer, [Machine|Machines],
 87 |                       NewAgentsList, NewAgentsQueue);
 88 |         {delegate, WorkPackets, CallbackPid} ->
 89 |             Receivers = case DistMode of
 90 |                             roundrobin -> FlatAgentsList;
 91 |                             timed -> Machines;
 92 |                             memroundrobin -> Machines
 93 |                         end,
 94 |             Results = parallel_map(IdServer, WorkPackets, Receivers, DistMode),
 95 |             CallbackPid ! {results, CallbackPid, Results},
 96 |             delegator(Modes, IdServer, Machines,
 97 |                       FlatAgentsList, FlatAgentsQueue);
 98 |         Other -> io:format("Received malformed message ~p~n", [Other])
 99 |     end.
100 | 
101 | 
102 | parallel_map_timed(_IdServer, [], _Machines) -> [];
103 | parallel_map_timed(IdServer, [{Exp, Env}|T], Machines) ->
104 |   {value, Fastest, _Rest} = time_machines(Machines),
105 |   {Node, _Pid, Workers, _Cpu, _Mem, _Time} = Fastest,
106 |   FreshId = freshid(IdServer),
107 |   Worker = queue:get(Workers),
108 |   Worker ! {delegate, FreshId, Exp, Env},
109 |   queue:in(Worker, Workers),
110 |   [FreshId | parallel_map_timed(IdServer, T, Machines)].
111 | 
112 | 
113 | parallel_map_memrr(_IdServer, [], _Procs) -> [];
114 | parallel_map_memrr(IdServer, [{Exp, Env}|T], Machines) ->
115 |     [{_Node, _Pid, Workers, _Cpu, _Mem, _Time} | _Ms] = lists:keysort(5, Machines),
116 |     FreshId = freshid(IdServer),
117 |     Worker = queue:get(Workers),
118 |     Worker ! {delegate, FreshId, Exp, Env},
119 |     queue:in(Worker, Workers),
120 |     NewMs = timed_pollhealth(Machines),
121 |    [FreshId | parallel_map_memrr(IdServer, T, NewMs)].
122 | 
123 | 
124 | parallel_map_rr(_IdServer, [], _MachineQueue) -> [];
125 | parallel_map_rr(IdServer, [{Exp,Env}|T], ProcQueue) ->
126 |     {Proc, NewProcQueue} = next_node(ProcQueue),
127 |     FreshId = freshid(IdServer),
128 |     Proc ! {delegate, FreshId, Exp, Env},
129 |     [FreshId|parallel_map_rr(IdServer, T, NewProcQueue)].
130 | 
131 | 
132 | next_node(Nodes) ->
133 |     {{value, FirstNode}, RestNodes} = queue:out(Nodes),
134 |     {FirstNode, queue:in(FirstNode, RestNodes)}.
135 | 
136 | 
137 | parallel_map(IdServer, WorkPackets, Processes, roundrobin) ->
138 |     Avengers = parallel_map_rr(IdServer, WorkPackets, queue:from_list(Processes)),
139 |     IdValPairs = assemble(Avengers),
140 |     lists:map(fun({_Id, Val}) -> Val end, IdValPairs);
141 | parallel_map(IdServer, WorkPackets, Machines, memroundrobin) ->
142 |     Avengers = parallel_map_memrr(IdServer, WorkPackets, Machines),
143 |     IdValPairs =  assemble(Avengers),
144 |     lists:map(fun({_Id, Val}) -> Val end, IdValPairs);
145 | parallel_map(IdServer, WorkPackets, Machines, timed) ->
146 |     Avengers = parallel_map_timed(IdServer, WorkPackets, Machines),
147 |     IdValPairs  = assemble(Avengers),
148 |     lists:map(fun({_Id, Val}) -> Val end, IdValPairs).
149 | 
150 | 
151 | assemble([]) -> [];
152 | assemble([Id|Rest]) ->
153 |                     receive
154 |                         {result, Id, Val} ->
155 |                                 [{Id, Val}|assemble(Rest)]
156 |                     end.
157 | 
158 | test_rr(Machines) ->
159 |     io:format("~p is extrmely me~n", [self()]),
160 |     global:register_name(iogl, self()),
161 |     IdServer = spawn(fun idserver/0),
162 |     StartingEnv = basis:basis(),
163 |     io:format("Calling pmap...~n"),
164 |     Pids = master:connect_worker_nodes(Machines, flat, 1),
165 |     WP = [{{list, [{sym, binplus}, {int, 3},{int, 3}]}, StartingEnv},
166 |           {{list, [{sym, binplus}, {int, 5}, {int, 5}]}, StartingEnv},
167 |           {{list, [{sym, binplus}, {int, 7}, {int, 5}]}, StartingEnv}],
168 |    io:format("Currently connected to: ~p~n", [nodes()]),
169 |    master:parallel_map(IdServer, WP, Pids, roundrobin).
170 | 
171 | 
172 | test_mrr(Machines) ->
173 |   IdServer = spawn(fun idserver/0),
174 |   StartingEnv = basis:basis(),
175 |   Ms = master:connect_worker_nodes(Machines, bymachine, 2),
176 |   WP = [{{list, [{sym, binplus}, {int, 3},{int, 3}]}, StartingEnv},
177 |         {{list, [{sym, binplus}, {int, 5}, {int, 5}]}, StartingEnv},
178 |         {{list, [{sym, binplus}, {int, 7}, {int, 5}]}, StartingEnv}],
179 |   master:parallel_map(IdServer, WP, Ms, memroundrobin).
180 | 
181 | test_timed(Machines) ->
182 |   IdServer = spawn(fun idserver/0),
183 |   StartingEnv = basis:basis(),
184 |   Ms = master:connect_worker_nodes(Machines, bymachine, 2),
185 |   WP = [{{list, [{sym, binplus}, {int, 3},{int, 3}]}, StartingEnv},
186 |         {{list, [{sym, binplus}, {int, 5}, {int, 5}]}, StartingEnv},
187 |         {{list, [{sym, binplus}, {int, 7}, {int, 5}]}, StartingEnv}],
188 |   master:parallel_map(IdServer, WP, Ms, timed).
189 | 


--------------------------------------------------------------------------------
/mkparser.erl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env escript
 2 | %% -*- erlang -*-
 3 | main([]) ->
 4 |     {ok, Parser} = yecc:file("parser.yrl"),
 5 |     {ok, Scanner} = leex:file("scanner.xrl"),
 6 |     compile:file(parser),
 7 |     compile:file(scanner),
 8 |     io:format("Parser now available at ~p and scanner at ~p~n",
 9 |               [Parser, Scanner]);
10 | main(_) -> usage().
11 | 
12 | usage() ->
13 |     io:format("usage: ./mkparser\n"),
14 |     halt(1).
15 | 


--------------------------------------------------------------------------------
/parser.yrl:
--------------------------------------------------------------------------------
  1 | Nonterminals
  2 | prog exp funcall explist arglist name list letexp bindlist binding opexp define
  3 | letkind lambda lisplist opfn oper unaryoper quotable.
  4 | 
  5 | Terminals
  6 | sym int bool ';' '=' '(' ')' ',' '[' ']' 'in' 'let' 'let*' 'end' '*' '+' '/'
  7 | '-' '>' '<' '<=' '>=' '==' 'and' 'or' 'not' '!' '\'' 'val' 'fun' 'if' 'then'
  8 | 'else' 'fn' 'op' ';;'.
  9 | 
 10 | Rootsymbol prog.
 11 | 
 12 | % TODO: FIX OPERATOR PRECEDENCE
 13 | % AS IN: 1 < 5 or 1 == 5
 14 | Unary 100 '!'.
 15 | Unary 100 'not'.
 16 | Left 200 '+'.
 17 | Left 200 '-'.
 18 | Left 300 '*'.
 19 | Left 300 '/'.
 20 | Left 400 '<'.
 21 | Left 400 '>'.
 22 | Left 400 '>='.
 23 | Left 400 '<='.
 24 | Left 500 '=='.
 25 | Left 600 'and'.
 26 | Left 600 'or'.
 27 | 
 28 | prog -> explist ';;' : {prog, '$1'}.
 29 | 
 30 | define ->
 31 |     'fun' name '(' arglist ')' '=' exp : mklist([{sym,'define'}, '$2', mklist('$4'), '$7']).
 32 | define ->
 33 |     'fun' name '(' ')' '=' exp : mklist([{sym,'define'}, '$2', mklist([]), '$6']).
 34 | 
 35 | funcall -> name '(' ')' : mklist(['$1']).
 36 | funcall -> name '(' arglist ')': mklist(['$1'|'$3']).
 37 | 
 38 | explist -> exp : ['$1'].
 39 | explist -> exp ';' explist : ['$1'|'$3'].
 40 | 
 41 | arglist -> exp : ['$1'].
 42 | arglist -> exp ',' arglist : ['$1'|'$3'].
 43 | 
 44 | bindlist -> binding : ['$1'].
 45 | bindlist -> binding ',' bindlist : ['$1'|'$3'].
 46 | 
 47 | letkind -> 'let' : 'letx'.
 48 | letkind -> 'let*' : 'letx*'.
 49 | 
 50 | letexp ->
 51 |     letkind bindlist 'in' exp 'end' : mklist([{sym,'$1'}, mklist('$2'), '$4']).
 52 | 
 53 | binding -> 'val' name '=' exp : {list, ['$2', '$4']}.
 54 | binding ->
 55 |     'fun' name '(' arglist ')' '=' exp :
 56 |         {list, ['$2', {list, [{sym,lambda}, mklist('$4'), '$7']}]}.
 57 | 
 58 | list -> '[' ']' : {list, []}.
 59 | list -> '[' lisplist ']' : {list, '$2'}.
 60 | 
 61 | lisplist -> exp : ['$1'].
 62 | lisplist -> exp lisplist : ['$1'|'$2'].
 63 | 
 64 | name -> sym : remtok('$1').
 65 | 
 66 | oper -> '*' : {sym,bintimes}.
 67 | oper -> '+' : {sym,binplus}.
 68 | oper -> '/' : {sym,bindiv}.
 69 | oper -> '-' : {sym,binminus}.
 70 | oper -> '>' : {sym,bingt}.
 71 | oper -> '<' : {sym,binlt}.
 72 | oper -> '<=' : {sym,binlte}.
 73 | oper -> '>=' : {sym,bingte}.
 74 | oper -> '==' : {sym,bineq}.
 75 | oper -> 'or' : {sym,'or'}.
 76 | oper -> 'and' : {sym,'and'}.
 77 | 
 78 | unaryoper -> '!' : {sym,'not'}.
 79 | unaryoper -> 'not' : {sym,'not'}.
 80 | 
 81 | quotable -> name : '$1'.
 82 | quotable -> bool : '$1'.
 83 | quotable -> int : '$1'.
 84 | quotable -> list : '$1'.
 85 | 
 86 | opexp -> exp oper exp : mklist(['$2', '$1', '$3']).
 87 | opexp -> unaryoper exp : mklist(['$1', '$2']).
 88 | opexp -> '\'' quotable : {quote,'$2'}.
 89 | 
 90 | lambda ->
 91 |     'fn' '(' arglist ')' '=' exp : mklist([{sym, 'lambda'}, mklist('$3'), '$6']).
 92 | 
 93 | opfn -> 'op' oper : '$2'.
 94 | 
 95 | exp -> name : '$1'.
 96 | exp -> bool : remtok('$1').
 97 | exp -> int : remtok('$1').
 98 | exp -> list : '$1'.
 99 | exp -> funcall : '$1'.
100 | exp -> letexp : '$1'.
101 | exp -> opexp : '$1'.
102 | exp -> define : '$1'.
103 | exp -> opfn : '$1'.
104 | exp -> 'val' name '=' exp : mklist([{sym,'valx'},'$2','$4']).
105 | exp -> 'if' exp 'then' exp 'else' exp : mklist([{sym,'ifx'},'$2','$4','$6']).
106 | exp -> '(' exp ')' : '$2'.
107 | exp -> lambda : '$1'.
108 | 
109 | Erlang code.
110 | mklist(Ls) -> {list, Ls}.
111 | remtok({T, _L, V}) -> {T, V}.
112 | 


--------------------------------------------------------------------------------
/proposal.md:
--------------------------------------------------------------------------------
 1 | # DMR
 2 | 
 3 | DMR is a programming language with a focus on automatic or simple distributed
 4 | computation.
 5 | 
 6 | by Matthew Yaspan and Maxwell Bernstein
 7 | 
 8 | ## Goals
 9 | 
10 | DMR aims to speed up computation by fanning out calls like map, filter, foldl,
11 | etc to a cluster of threads or even other computers. It does data dependency
12 | resolution to ensure that there are no race conditions in the computation.
13 | 
14 | A stretch goal is to create a parser/lexer for DMR, so the user does not have
15 | to manually input an AST. Perhaps even a metacircular evaluator, so that DMR is
16 | itself distributed.
17 | 
18 | ## Planning Milestones
19 | 
20 | ### Create foundation for threaded computation
21 | 
22 | * Create a protocol in which work computers and main computer communicate
23 | * implement load balancer and reliability features using Erlang
24 | * Create a test environment in which we can implement protocol as well as test
25 |   different schemes of distributing work
26 | 
27 | ### Bridge gap between threaded and distributed computation
28 | 
29 | * Ensure that there is no difference (except in speed) between sending work to
30 |   a thread and sending work to another computer entirely
31 | * Finalize thread pool API
32 | 
33 | ### Write data-dependency resolver
34 | 
35 | * Determine if/how it is possible to statically analyze an AST and assign
36 |   "types" that allow the program to distribute work
37 | * Create an algorithm that detects data-dependencies and ensures there are no
38 |   race conditions
39 | 
40 | ### Implement `map`, `reduce` in DMR context
41 | 
42 | * Ensure `map` can distribute work across computers atomically
43 |   * DONE (for local threads)
44 | * Determine if it is possible for `reduce` to be distributed efficiently
45 | * Implement other functions in terms of `map`
46 | 


--------------------------------------------------------------------------------
/proposal.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tekknolagi/distlisp/d20417418a1f9cd60adcae810b5670f1692328cb/proposal.pdf


--------------------------------------------------------------------------------
/reader.erl:
--------------------------------------------------------------------------------
 1 | -module(reader).
 2 | -export([repl/2, repl/3, read_program/2]).
 3 | 
 4 | read_program(string, Data) ->
 5 |     {ok, T, _} = scanner:string(Data),
 6 |     {ok, {prog, Prog}} = parser:parse(T),
 7 |     Prog;
 8 | 
 9 | read_program(file, FileName) ->
10 |     {ok, Data} = file:read_file(FileName),
11 |     read_program(string, binary:bin_to_list(Data)).
12 | 
13 | read_term(Acc) ->
14 |     case io:request(standard_io, {get_until, '', scanner, token, [1]}) of
15 |         {ok, EndToken={';;', _}, _} -> Acc ++ [EndToken];
16 |         {ok, Token, _} -> read_term(Acc ++ [Token]);
17 |         {error, token} -> {error, scanning_error};
18 |         {eof, _} -> Acc
19 |     end.
20 | read_term() -> read_term([]).
21 | 
22 | eval_input(Env) ->
23 |     case read_term() of
24 |         {error, Reason} -> error({syntax_error, Reason});
25 |         Tokens ->
26 |             {ok, {prog, Prog}} = parser:parse(Tokens),
27 |             eval:evalexp(Prog, Env)
28 |     end.
29 | 
30 | -define(NEXT, reader:repl(Num+1, Env)).
31 | -define(NEXTWITHIT, reader:repl(Num+1, eval:bind(it, Val, NewEnv))).
32 | 
33 | goodbye() -> io:format("Thank you for trying DLisp.~n"),
34 |              halt(0).
35 | 
36 | repl(Num, Env) -> repl(Num, Env, true).
37 | repl(Num, Env, ShouldPrint) ->
38 |     case ShouldPrint of
39 |         true -> io:format("~p> ", [Num]);
40 |         false -> ok
41 |     end,
42 |     try eval_input(Env) of
43 |         {{sym, quit}, _} ->
44 |             goodbye();
45 |         {Val, NewEnv} when Val =:= {sym, ok} ->
46 |             ?NEXTWITHIT;
47 |         {Val, NewEnv} ->
48 |             eval:printexp(Val),
49 |             io:format("~n"),
50 |             %io:format(" : ~p~n", [eval:type(Val)]),
51 |             ?NEXTWITHIT
52 |     catch
53 |         throw:code_reload ->
54 |             io:format("WARNING: Code reloaded~n"),
55 |             reader:repl(Num+1, eval:bind(it, {sym, ok}, Env));
56 |         throw:nothing ->
57 |             reader:repl(Num, Env, false);
58 |         throw:eof ->
59 |             goodbye();
60 |         error:{unbound_variable,V} ->
61 |             io:format("ERROR: Unbound variable ~p~n", [V]),
62 |             ?NEXT;
63 |         error:{badmatch, G} ->
64 |             io:format("ERROR: Type mismatch: ~p~n", [G]),
65 |             ?NEXT;
66 |         error:{syntax_error, Reason} ->
67 |             io:format("ERROR: ~p~n", [Reason]);
68 |             % ?NEXT;
69 |         error:{tuplezip_mismatch, _, _} ->
70 |             io:format("ERROR: Wrong number of arguments~n"),
71 |             ?NEXT
72 | %            ;
73 | %        error:E ->
74 | %            io:format("UNKNOWN ERROR: ~p~n", [E]),
75 | %            ?NEXT
76 |     end.
77 | 


--------------------------------------------------------------------------------
/repl.erl:
--------------------------------------------------------------------------------
 1 | -module(repl).
 2 | -export([main/1,main/0]).
 3 | 
 4 | % http://stackoverflow.com/questions/8817171
 5 | shuffle(Ls) ->
 6 |     [X||{_,X} <- lists:sort([ {rand:uniform(), N} || N <- Ls])].
 7 | 
 8 | main() ->
 9 |     reader:repl(1, basis:basis()).
10 | main([DistMode]) when is_atom(DistMode) ->
11 |     erlang:set_cookie(node(), dlisp),
12 |     Delegator = spawn(master, delegator, [{DistMode}]),
13 |     DelegatorAtom = {sym, list_to_atom(pid_to_list(Delegator))},
14 |     %{FlatAgentList, AgentStore} =
15 |     %        master:connect_worker_nodes(Machines, InitMode, 2),
16 | %%     Map = fun basis:parallel_map/2,
17 | %%     Map(fun(Agent) ->
18 | %%                 % Don't let the agent request work from itself.
19 | %%                 OtherAgents = FlatAgentList -- [Agent],
20 | %%                 % And randomize the list so they don't all kill one node
21 | %%                 % at a time.
22 | %%                 Agent ! {other_agents, shuffle(OtherAgents)}
23 | %%         end, FlatAgentList),
24 |     reader:repl(1, eval:bind('__delegator', DelegatorAtom, basis:basis()));
25 | main(_) -> usage().
26 | 
27 | usage() ->
28 |     io:format("usage: ./repl.erl mode machineA [machine B [...]]\n"),
29 |     halt(1).
30 | 


--------------------------------------------------------------------------------
/replmaster.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # erl -setcookie dlisp -pa ./repl.erl -run repl main -run init stop -noshell
3 | function getip { ifconfig | grep -Eo 'inet (addr:)?([0-9]*\.){3}[0-9]*' | grep -Eo '([0-9]*\.){3}[0-9]*' | grep -v '127.0.0.1' | head -1 ;}
4 | IP=$(getip)
5 | MODE=${1}
6 | echo "IP is ${IP}"
7 | echo "Mode is ${MODE}"
8 | ledit erl -noshell -eval "repl:main([${1}])" -name "m@${IP}" -setcookie dlisp
9 | 


--------------------------------------------------------------------------------
/replworker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # erl -setcookie dlisp -pa ./repl.erl -run repl main -run init stop -noshell
 3 | function getip { ifconfig | grep -Eo 'inet (addr:)?([0-9]*\.){3}[0-9]*' | grep -Eo '([0-9]*\.){3}[0-9]*' | grep -v '127.0.0.1' | head -1 ;}
 4 | IP=$(getip)
 5 | MASTER="${1}"
 6 | PERCSLOW="${2}"
 7 | STEALSTATUS="${3}"
 8 | echo "IP is ${IP}"
 9 | echo "Master IP is ${MASTER}"
10 | echo "Percent slow is ${PERCSLOW}"
11 | echo "Stealing status is ${STEALSTATUS}"
12 | erl -noshell \
13 |     -eval "thread_pool:selfstart('m@${MASTER}', 1, {{slow, ${PERCSLOW}}, ${STEALSTATUS}})." \
14 |     -name "w@${IP}" -setcookie dlisp
15 | 


--------------------------------------------------------------------------------
/scanner.xrl:
--------------------------------------------------------------------------------
 1 | Definitions.
 2 | 
 3 | D   = [0-9]
 4 | L   = [a-zA-Z_][A-Za-z_\.\?]*
 5 | WS  = ([\000-\s]|\-\-.*|[\t\n\s\r])
 6 | 
 7 | Rules.
 8 | 
 9 | (op|if|not|or|and|let|let\*|in|end|val|fun|fn|else|then|;;) :
10 |     {token,{list_to_atom(TokenChars),TokenLine}}.
11 | (>=|<=|[*\-+/><=!]|==) : {token,{list_to_atom(TokenChars),TokenLine}}.
12 | [\(\)\[\]\',\;] : {token,{list_to_atom(TokenChars),TokenLine}}.
13 | \#[tf]  : {token,{bool,TokenLine,list_to_bool(TokenChars)}}.
14 | {D}+    : {token,{int,TokenLine,list_to_integer(TokenChars)}}.
15 | {L}     : {token,{sym,TokenLine,list_to_atom(TokenChars)}}.
16 | \.\.\.  : {token,{sym,TokenLine,list_to_atom(TokenChars)}}.
17 | {WS}+   : skip_token.
18 | 
19 | Erlang code.
20 | list_to_bool([$#|"t"]) -> true;
21 | list_to_bool([$#|"f"]) -> false.
22 | 


--------------------------------------------------------------------------------
/stealingworker.erl:
--------------------------------------------------------------------------------
 1 | -module(stealingworker).
 2 | -export([agent_loop/1]).
 3 | 
 4 | %%% Worker section.
 5 | 
 6 | waitforwork(Agent, {SpeedMode}) ->
 7 |     Agent ! {request_for_work, self()},
 8 |     receive
 9 |         {work, Master, {Id, Exp, Env}} ->
10 |             case SpeedMode of
11 |                 fullspeed ->
12 |                     {Val, _} = eval:evalexp(Exp, Env),
13 |                     Master ! {result, Id, Val};
14 |                 slow ->
15 |                     basis:timerstart(),
16 |                     {Val, _} = eval:evalexp(Exp, Env),
17 |                     {_, _, wall, Time} = basis:timerend(),
18 |                     timer:sleep(Time), % Sloooooow worker. Takes 2x as long.
19 |                     Master ! {result, Id, Val}
20 |             end;
21 |         nothing_yet ->
22 |             timer:sleep(100);
23 |         What ->
24 |             io:format("Received invalid response of ~p~n", [What])
25 |     end,
26 |     waitforwork(Agent, {SpeedMode}).
27 | 
28 | 
29 | %%% Agent section.
30 | 
31 | % SpeedMode options: fullspeed, slow
32 | % StealingEnabled options: stealing, solitary
33 | agent_loop({SpeedMode, StealingEnabled}) ->
34 |     receive
35 |         {master, Master} ->
36 |             Self = self(),
37 |             agent_loop(spawn(fun() -> waitforwork(Self, {SpeedMode}) end),
38 |                        {Master, queue:new(), queue:new(), {StealingEnabled}})
39 |     end.
40 | 
41 | 
42 | checkforwork(OtherAgent, MyWorker) ->
43 |     OtherAgent ! {steal_yo_work, MyWorker}.
44 | 
45 | agent_loop(Worker, Info={Master, WorkQueue, OtherAgents, Options}) ->
46 |     receive
47 |         {send_state, Requester} ->
48 |             Requester ! {state, Worker, Master, queue:to_list(WorkQueue)},
49 |             agent_loop(Worker, Info);
50 |         {delegate, Id, Exp, Env} ->
51 |             NewWorkQueue = queue:in({work, Master, {Id, Exp, Env}}, WorkQueue),
52 |             agent_loop(Worker, {Master, NewWorkQueue, OtherAgents, Options});
53 |         {steal_yo_work, WorkerFromAgent} ->
54 |             case queue:out(WorkQueue) of
55 |                 {empty, WorkQueue} ->
56 |                     WorkerFromAgent ! nothing_yet,
57 |                     agent_loop(Worker, Info);
58 |                 {{value, NextItem}, NewWorkQueue} ->
59 |                     WorkerFromAgent ! NextItem,
60 |                     agent_loop(Worker, {Master, NewWorkQueue, OtherAgents,
61 |                                Options})
62 |             end;
63 |         {request_for_work, Requester} ->
64 |             case queue:out(WorkQueue) of
65 |                 {empty, WorkQueue} ->
66 |                     case Options of
67 |                         {solitary} ->
68 |                             Requester ! nothing_yet,
69 |                             agent_loop(Worker, Info);
70 |                         {stealing} ->
71 |                             case queue:out(OtherAgents) of
72 |                                 {empty, OtherAgents} ->
73 |                                     Requester ! nothing_yet,
74 |                                     agent_loop(Worker, Info);
75 |                                 {{value, FirstAgent}, NewAgentQueue} ->
76 |                                     checkforwork(FirstAgent, Requester),
77 |                                     agent_loop(Worker, {Master, WorkQueue,
78 |                                                queue:in(FirstAgent,
79 |                                                         NewAgentQueue),
80 |                                                Options})
81 |                             end
82 |                     end;
83 |                 {{value, NextItem}, NewWorkQueue} ->
84 |                     Requester ! NextItem,
85 |                     agent_loop(Worker, {Master, NewWorkQueue, OtherAgents,
86 |                                         Options})
87 |             end;
88 |         {other_agents, AgentsQueue} ->
89 |             agent_loop(Worker, {Master, WorkQueue, queue:join(AgentsQueue,
90 |                                                               OtherAgents),
91 |                                 Options});
92 |         _SomethingElse -> agent_loop(Worker, Info)
93 |     end.
94 | 


--------------------------------------------------------------------------------
/thread_pool.erl:
--------------------------------------------------------------------------------
  1 | -module(thread_pool).
  2 | -export([create/2, add/1, add/2]).
  3 | -export([waitforwork/0, loop/1, calculate1/0, calculate2/0, reap/3]).
  4 | -export([selfstart/3]).
  5 | 
  6 | 
  7 | selfstart(MasterNode, CalcAlg, AgentOpts) ->
  8 |     Comp = net_kernel:connect_node(MasterNode),
  9 |     MasterPid = {master, MasterNode},
 10 |     case Comp of
 11 |         true ->
 12 |             application:load(sasl),
 13 |             application:set_env(sasl, sasl_error_logger, {file, "/dev/null"}),
 14 |             application:start(sasl),
 15 | 
 16 |             application:load(os_mon),
 17 |             application:set_env(os_mon, os_mon_error_logger, {file, "/dev/null"}),
 18 |             application:start(os_mon),
 19 | 
 20 |             NumProcs = case CalcAlg of
 21 |                            1 -> calculate1();
 22 |                            2 -> calculate2()
 23 |                        end,
 24 | 
 25 |             AgentsQueue = create(NumProcs, AgentOpts),
 26 |             AgentsList = queue:to_list(AgentsQueue),
 27 | 
 28 |             lists:map(fun(Agent) ->
 29 |                               Agent ! {master, MasterPid},
 30 |                               Agent ! {other_agents, AgentsQueue}
 31 |                       end, AgentsList),
 32 | 
 33 |             MasterPid ! {register, self(), node(),
 34 |                          erlang:system_info(logical_processors_available),
 35 |                          memsup:get_memory_data(), AgentsQueue};
 36 |         false -> error({selfstart, masterconnect_failed})
 37 |     end,
 38 |     loop(MasterPid).
 39 | 
 40 | 
 41 | newagent(Server, Modes) ->
 42 |     spawn(Server, stealingworker, agent_loop, [Modes]).
 43 | 
 44 | newagent(Server) ->
 45 |     newagent(Server, {fullspeed, stealing}).
 46 | 
 47 | 
 48 | create(0, _Modes) -> queue:new();
 49 | 
 50 | %create(1, ServerPool, AgentOpts, t) ->
 51 | %    {NextServer, _NewPool} = next_node(ServerPool),
 52 | %    Agent = newagent(NextServer, AgentOpts),
 53 | %    queue:from_list([Agent]);
 54 | 
 55 | create(NumNodes, Modes={{slow, SlowProb}, IsStealing}) when NumNodes >= 1 ->
 56 |     Rand = rand:uniform(),
 57 |     Agent = if
 58 |                 Rand < SlowProb ->
 59 |                     newagent(node(), {slow, IsStealing});
 60 |                 true ->
 61 |                     newagent(node(), {fullspeed, IsStealing})
 62 |             end,
 63 |     queue:in(Agent, create(NumNodes-1, Modes)).
 64 | 
 65 | 
 66 | add(Nodes, 0) -> Nodes;
 67 | 
 68 | add(Nodes, NumNodes) when NumNodes > 0 ->
 69 |     queue:in(spawn(fun waitforwork/0), add(Nodes, NumNodes-1)).
 70 | 
 71 | add(Nodes) ->
 72 |     add(Nodes, 1).
 73 | 
 74 | percore() -> 50.
 75 | 
 76 | calculate1() ->
 77 |     {Total, Alloc, _} = memsup:get_memory_data(),
 78 |     erlang:min((Total - Alloc)  div 1048576, percore()).
 79 | 
 80 | calculate2() ->
 81 |     {Total, Alloc, _} = memsup:get_memory_data(),
 82 |     Cores = erlang:system_info(logical_processors_available),
 83 |     erlang:min((Total - Alloc) div 1048576 * 4, percore()*Cores).
 84 | 
 85 | 
 86 | list_delete([], _) -> [];
 87 | list_delete([HA|TA], []) -> [HA|TA];
 88 | list_delete([HA|TA], [H|T]) -> list_delete(lists:delete(H, [HA|TA]), T);
 89 | list_delete([HA|TA], K) -> lists:delete(K, [HA|TA]);
 90 | list_delete(_, _) -> error.
 91 | 
 92 | key_delete([], _) -> [];
 93 | key_delete([Pair | Ps], [Pid|T]) ->
 94 |     key_delete(lists:keydelete(Pid,1, [Pair|Ps]) , T);
 95 | key_delete([Pair | Ps], []) -> [Pair | Ps];
 96 | key_delete(_, _) -> error.
 97 | 
 98 | 
 99 | reap(ThreadPool, Pairs, DeadPids) ->
100 |     {list_delete(ThreadPool, DeadPids), key_delete(Pairs, DeadPids)}.
101 | 
102 | %% init_workers (Master, CalcAlg) ->
103 | %%   application:start(sasl),
104 | %%   application:start(os_mon),
105 | %%   ThreadPool = case CalcAlg of
106 | %%     1 -> create(calculate1());
107 | %%     2 -> create(calculate2())
108 | %%   end,
109 | %%   lists:map(fun(Agent) -> Agent ! {master, Master} end,
110 | %%             queue:to_list(ThreadPool)),
111 | %%   Master ! {workers, ThreadPool}.
112 | %% 
113 | %% init_machine (Master, CalcAlg) ->
114 | %%   application:start(sasl),
115 | %%   application:start(os_mon),
116 | %%   ThreadPool = case CalcAlg of
117 | %%     1 -> create(calculate1());
118 | %%     2 -> create(calculate2())
119 | %%   end,
120 | %%   lists:map(fun(Agent) -> Agent ! {master, Master} end,
121 | %%             queue:to_list(ThreadPool)),
122 | %%   Master ! {self(), ThreadPool, erlang:system_info(logical_processors_available),
123 | %%             memsup:get_memory_data()},
124 | %%   loop(Master).
125 | 
126 | loop(Master) ->
127 |     receive
128 |         system_check ->
129 |             Master ! {self(), erlang:system_info(logical_processors_available),
130 |                       memsup:get_memory_data()},
131 |             loop(Master);
132 |         Other -> io:format("Machine received ~p~n", [Other]),
133 |                  loop(Master)
134 |     end.
135 | 
136 | 
137 | 
138 | 
139 | waitforwork() ->
140 |     receive
141 |         {work, Sender, Id, {Exp, Env} } -> Sender ! {result, Id, eval:evalexp(Exp,Env)}
142 |     end,
143 |     waitforwork().
144 | % Various ways to calculate init number of processes
145 | 
146 | % Master
147 | %      Waits for 
148 | %      Sends Init machine to machines after
149 | %      Periodically keeps track of machine health
150 | %              Available memory
151 | %              num processors
152 | %              how much computation power is being used
153 | %              Number of idle processes
154 | %              Dead processes - how to handle?
155 | %      Various implementations of parallel map
156 | %              Round robin (already there)
157 | %              Weighted round robin
158 | %              Hashing
159 | %              Most idle processes
160 | %      Can tell the machine to spin up more processes
161 | %      
162 | 


--------------------------------------------------------------------------------