name of HTML header"
33 | }
34 |
35 | for i in "$@"
36 | do
37 | case $i in
38 | --output=*)
39 | output="${i#*=}"
40 | shift
41 | ;;
42 | --css=*)
43 | css_file="${i#*=}"
44 | shift
45 | ;;
46 | --html)
47 | target="html"
48 | shift
49 | ;;
50 | --header=*)
51 | html_header="${i#*=}"
52 | shift
53 | ;;
54 | --pdf)
55 | target="pdf"
56 | output="report.pdf"
57 | shift
58 | ;;
59 | --native)
60 | target="native"
61 | shift
62 | ;;
63 | --help)
64 | usage
65 | exit 0
66 | ;;
67 | --*)
68 | echo "unknown argument: ${i}"
69 | usage
70 | exit 1
71 | ;;
72 | *)
73 | sources="${sources} ${i}"
74 | ;;
75 | esac
76 | done
77 |
78 | case ${target} in
79 | html)
80 | mkdir -p "$(dirname ${output})"
81 | # cp "${css_file}" "$(dirname ${output})/style.css"
82 | pandoc ${sources} -f ${format} ${html_args} -H ${html_header} -t html5 -o "${output}"
83 | ;;
84 | pdf)
85 | pandoc ${sources} -f ${format} ${pdf_args} -t latex -o "${output}" --pdf-engine=xelatex
86 | ;;
87 | native)
88 | pandoc ${sources} -f ${format} -s -t native
89 | ;;
90 | *)
91 | echo "ERROR: unknown target '${target}'"
92 | exit 1
93 | ;;
94 | esac
95 |
--------------------------------------------------------------------------------
/test/lib/test_streams.py:
--------------------------------------------------------------------------------
1 | from pytest import raises
2 | from noodles.lib import (
3 | pull, push, pull_map, push_map, sink_map,
4 | broadcast, branch, patch, pull_from)
5 |
6 |
7 | def test_pull_chaining():
8 | @pull
9 | def square(source):
10 | for x in source():
11 | yield x*x
12 |
13 | squares = pull_from(range(10)) >> square
14 |
15 | assert list(squares) == [i**2 for i in range(10)]
16 |
17 |
18 | def test_pull_mapping():
19 | @pull_map
20 | def square(x):
21 | return x*x
22 |
23 | squares = pull_from(range(10)) >> square
24 |
25 | assert list(squares) == [i**2 for i in range(10)]
26 |
27 |
28 | def test_function_chaining():
29 | squares = pull_from(range(10)) >> (lambda x: x*x)
30 |
31 | assert list(squares) == [i**2 for i in range(10)]
32 |
33 |
34 | def test_wrong_chainging_raises_error():
35 | @push_map
36 | def square(x):
37 | return x*x
38 |
39 | with raises(TypeError):
40 | pull_from(range(10)) >> square
41 |
42 |
43 | def test_push_chaining():
44 | def square(x):
45 | return x*x
46 |
47 | squares = []
48 | patch(pull_from(range(10)), push_map(square) >> sink_map(squares.append))
49 |
50 | assert squares == [i**2 for i in range(10)]
51 |
52 |
53 | def test_branch():
54 | squares = []
55 | cubes = []
56 |
57 | square = push_map(lambda x: x**2) >> sink_map(squares.append)
58 | cube = push_map(lambda x: x**3) >> sink_map(cubes.append)
59 | numbers = list(pull_from(range(10)) >> branch(square, cube))
60 | assert numbers == list(range(10))
61 | assert cubes == [i**3 for i in range(10)]
62 | assert squares == [i**2 for i in range(10)]
63 |
64 |
65 | def test_broadcast():
66 | result1 = []
67 | result2 = []
68 | sink = broadcast(sink_map(result1.append), sink_map(result2.append))
69 | patch(pull_from(range(10)), sink)
70 |
71 | assert result1 == result2 == list(range(10))
72 |
73 |
74 | def test_pull_00():
75 | @pull
76 | def f(source):
77 | for i in source():
78 | yield i**2
79 |
80 | inp = pull(lambda: iter(range(5)))
81 |
82 | def out(lst):
83 | @pull
84 | def g(source):
85 | for i in source():
86 | lst.append(i)
87 |
88 | return g
89 |
90 | result = []
91 | pipeline = inp >> f >> out(result)
92 | pipeline()
93 |
94 | assert result == [0, 1, 4, 9, 16]
95 |
96 |
97 | def test_push_00():
98 | @push
99 | def f(sink):
100 | sink = sink()
101 | while True:
102 | i = yield
103 | sink.send(i**2)
104 |
105 | inp = pull(lambda: iter(range(5)))
106 |
107 | def out(lst):
108 | @push
109 | def g():
110 | while True:
111 | i = yield
112 | lst.append(i)
113 |
114 | return g
115 |
116 | result = []
117 | pipeline = f >> out(result)
118 | patch(inp, pipeline)
119 |
120 | assert result == [0, 1, 4, 9, 16]
121 |
--------------------------------------------------------------------------------
/scripts/tangle.lua:
--------------------------------------------------------------------------------
1 | -- Copyright 2018 Johan Hidding
2 |
3 | -- Licensed under the Apache License, Version 2.0 (the "License");
4 | -- you may not use this file except in compliance with the License.
5 | -- You may obtain a copy of the License at
6 |
7 | -- http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | -- Unless required by applicable law or agreed to in writing, software
10 | -- distributed under the License is distributed on an "AS IS" BASIS,
11 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | -- See the License for the specific language governing permissions and
13 | -- limitations under the License.
14 |
15 | -- filename: tangle.lua
16 | -- description: Pandoc filter that generates a Bash script that generates
17 | -- files defined in the code blocks, thus *tangling* the contents.
18 |
19 | local vars = {}
20 | local files = {}
21 | local preamble = [[
22 | #!/bin/bash
23 |
24 | prepare() {
25 | echo "$1"
26 | mkdir -p $(dirname $1)
27 | }
28 |
29 | echo "Tangling ... "
30 |
31 | tangle_dir=$(mktemp -d /tmp/tangle.XXXXXXXXXX)
32 | target_dir=$(pwd)
33 |
34 | cd "${tangle_dir}"
35 | ]]
36 |
37 | local postamble = [[
38 | cd "${target_dir}"
39 |
40 | echo -e "\nSyncronising source files ..."
41 | rsync -vrcup ${tangle_dir}/* .
42 | sync
43 | rm -rf ${tangle_dir}
44 | ]]
45 |
46 | function CodeBlock (elem)
47 | if elem.identifier then
48 | t = vars[elem.identifier] or ""
49 | vars[elem.identifier] = t .. "\n" .. elem.text
50 | end
51 |
52 | for k, v in pairs(elem.attr[3]) do
53 | if k == "file" then
54 | files[v] = elem.text
55 | end
56 | end
57 | return nil
58 | end
59 |
60 | function string:split(delimiter)
61 | local result = { }
62 | local from = 1
63 | local delim_from, delim_to = string.find( self, delimiter, from )
64 | while delim_from do
65 | table.insert( result, string.sub( self, from , delim_from-1 ) )
66 | from = delim_to + 1
67 | delim_from, delim_to = string.find( self, delimiter, from )
68 | end
69 | table.insert( result, string.sub( self, from ) )
70 | return result
71 | end
72 |
73 | function expandCode (pre, key)
74 | local x = ""
75 | for i, line in ipairs(vars[key]:split("\n")) do
76 | x = x .. pre .. line:gsub("(%s*)<<(%g+)>>", expandCode) .. "\n"
77 | end
78 | return x
79 | end
80 |
81 | function expandFile (key)
82 | local x = ""
83 | for i, line in ipairs(files[key]:split("\n")) do
84 | x = x .. line:gsub("(%s*)<<(%g+)>>", expandCode) .. "\n"
85 | end
86 | return x
87 | end
88 |
89 | function Pandoc (elem)
90 | local content = { pandoc.Str(preamble) }
91 | for filename, code in pairs(files) do
92 | code = "prepare " .. filename .. "\n" ..
93 | "cat > " .. filename .. " << EOF\n" ..
94 | expandFile(filename):gsub("\\", "\\\\"):gsub("%$", "\\$"):gsub("`", "\\`") ..
95 | "EOF\n\n"
96 | table.insert(content, pandoc.Str(code))
97 | end
98 | table.insert(content, pandoc.Str(postamble))
99 | return pandoc.Pandoc(pandoc.Plain(content))
100 | end
101 |
--------------------------------------------------------------------------------
/test/test_merge_workflow.py:
--------------------------------------------------------------------------------
1 | from pytest import raises
2 | from noodles.workflow import (
3 | Empty, ArgumentAddress,
4 | ArgumentKind, is_workflow, get_workflow, Workflow)
5 | from noodles import run_single, schedule, gather
6 |
7 |
8 | def dummy(a, b, c, *args, **kwargs):
9 | pass
10 |
11 |
12 | def test_is_workflow():
13 | assert is_workflow(Workflow(root=None, nodes={}, links={}))
14 |
15 |
16 | def test_get_workflow():
17 | assert get_workflow(4) is None
18 |
19 |
20 | @schedule
21 | def value(a):
22 | return a
23 |
24 |
25 | @schedule
26 | def add(a, b):
27 | return a+b
28 |
29 |
30 | @schedule
31 | def sub(a, b):
32 | return a - b
33 |
34 |
35 | def test_private():
36 | a = add(1, 1)
37 | a._private = 3
38 | assert a._private == 3
39 | assert not hasattr(run_single(a), '_private')
40 |
41 |
42 | def test_merge_workflow():
43 | A = value(1)
44 | B = value(2)
45 | C = add(A, B)
46 |
47 | assert is_workflow(C)
48 | C = get_workflow(C)
49 | A = get_workflow(A)
50 | B = get_workflow(B)
51 | assert C.nodes[C.root].bound_args.args == (Empty, Empty)
52 | assert (C.root, ArgumentAddress(ArgumentKind.regular, 'a', None)) \
53 | in C.links[A.root]
54 | assert (C.root, ArgumentAddress(ArgumentKind.regular, 'b', None)) \
55 | in C.links[B.root]
56 |
57 |
58 | def test_binder():
59 | A = value(1)
60 | B = value(2)
61 | C = gather(A, B)
62 |
63 | C = get_workflow(C)
64 | A = get_workflow(A)
65 | B = get_workflow(B)
66 |
67 | assert is_workflow(C)
68 | assert C.nodes[C.root].bound_args.args == (Empty, Empty)
69 | assert (C.root, ArgumentAddress(ArgumentKind.variadic, 'a', 0)) \
70 | in C.links[A.root]
71 | assert (C.root, ArgumentAddress(ArgumentKind.variadic, 'a', 1)) \
72 | in C.links[B.root]
73 |
74 |
75 | @schedule
76 | def takes_keywords(s, **kwargs):
77 | return s
78 |
79 |
80 | def test_with_keywords():
81 | A = value(1)
82 | B = value(2)
83 | C = takes_keywords(a=A, b=B, s="regular!")
84 | C = get_workflow(C)
85 | A = get_workflow(A)
86 | B = get_workflow(B)
87 |
88 | assert is_workflow(C)
89 | assert C.nodes[C.root].bound_args.args == ("regular!",)
90 | assert C.nodes[C.root].bound_args.kwargs == {'a': Empty, 'b': Empty}
91 |
92 |
93 | class Normal:
94 | pass
95 |
96 |
97 | @schedule
98 | class Scheduled:
99 | pass
100 |
101 |
102 | def test_arg_by_ref():
103 | n = Normal()
104 | s = Scheduled()
105 |
106 | n.x = 4
107 | s.x = n
108 | n.x = 5
109 | s.y = n
110 |
111 | result = run_single(s)
112 | assert result.x.x == 4
113 | assert result.y.x == 5
114 |
115 |
116 | def test_hidden_promise():
117 | with raises(TypeError):
118 | a = Normal()
119 | b = Scheduled()
120 | c = Scheduled()
121 |
122 | a.x = b
123 | c.x = a
124 |
125 |
126 | def test_tuple_unpack():
127 | a = Scheduled()
128 | b = Scheduled()
129 |
130 | a.x, a.y = 2, 3
131 | b.x, b.y = sub(a.x, a.y), sub(a.y, a.x)
132 |
133 | result = run_single(b)
134 | assert result.x == -1
135 | assert result.y == 1
136 |
--------------------------------------------------------------------------------
/noodles/patterns/functional_patterns.py:
--------------------------------------------------------------------------------
1 | from .find_first import find_first
2 | from noodles import (gather, schedule, unpack)
3 | from typing import (Any, Callable, Iterable)
4 |
5 |
6 | @schedule
7 | def all(pred: Callable, xs: Iterable):
8 | """
9 | Check whether all the elements of the iterable `xs`
10 | fullfill predicate `pred`.
11 |
12 | :param pred:
13 | predicate function
14 | :param xs:
15 | iterable object.
16 | :returns: boolean
17 | """
18 | for x in xs:
19 | if not pred(x):
20 | return False
21 |
22 | return True
23 |
24 |
25 | @schedule
26 | def any(pred: Callable, xs: Iterable):
27 | """
28 | Check if at least one element of the iterable `xs`
29 | fullfills predicate `pred`.
30 |
31 | :param pred:
32 | predicate function.
33 | :param xs:
34 | iterable object.
35 | :returns: boolean
36 | """
37 | b = find_first(pred, xs)
38 |
39 | return True if b is not None else False
40 |
41 |
42 | @schedule
43 | def filter(pred: Callable, xs: Iterable):
44 | """
45 | Applied a predicate to a list returning a :py:class:`PromisedObject`
46 | containing the values satisfying the predicate.
47 |
48 | :param pred: predicate function.
49 | :param xs: iterable object.
50 | :returns: :py:class:`PromisedObject`
51 | """
52 | generator = (x for x in xs if pred(x))
53 |
54 | return gather(*generator)
55 |
56 |
57 | @schedule
58 | def fold(
59 | fun: Callable, state: Any, xs: Iterable):
60 | """
61 | Traverse an iterable object while performing stateful computations
62 | with the elements. It returns a :py:class:`PromisedObject` containing
63 | the result of the stateful computations.
64 |
65 | For a general definition of folding see:
66 | https://en.wikipedia.org/wiki/Fold_(higher-order_function)
67 |
68 | :param fun: stateful function.
69 | :param state: initial state.
70 | :param xs: iterable object.
71 | :returns: :py:class:`PromisedObject`
72 | """
73 | def generator(state):
74 | for x in xs:
75 | state, r = unpack(fun(state, x), 2)
76 | yield r
77 |
78 | return gather(*generator(state))
79 |
80 |
81 | @schedule
82 | def map(fun: Callable, xs: Iterable):
83 | """
84 | Traverse an iterable object applying function `fun`
85 | to each element and finally creats a workflow from it.
86 |
87 | :param fun:
88 | function to call in each element of the iterable
89 | object.
90 | :param xs:
91 | Iterable object.
92 |
93 | returns::py:class:`PromisedObject`
94 | """
95 | generator = (fun(x) for x in xs)
96 |
97 | return gather(*generator)
98 |
99 |
100 | @schedule
101 | def zip_with(fun: Callable, xs: Iterable, ys: Iterable):
102 | """
103 | Fuse two Iterable object using the function `fun`.
104 | Notice that if the two objects have different len,
105 | the shortest object gives the result's shape.
106 |
107 | :param fun:
108 | function taking two argument use to process
109 | element x from `xs` and y from `ys`.
110 |
111 | :param xs:
112 | first iterable.
113 |
114 | :param ys:
115 | second iterable.
116 |
117 | returns::py:class:`PromisedObject`
118 | """
119 | generator = (fun(*rs) for rs in zip(xs, ys))
120 |
121 | return gather(*generator)
122 |
--------------------------------------------------------------------------------
/noodles/display/dumb_term.py:
--------------------------------------------------------------------------------
1 | from .pretty_term import OutStream
2 | from ..workflow import FunctionNode
3 | from inspect import Parameter
4 | import sys
5 |
6 |
7 | def _format_arg_list(a, v):
8 | if len(a) == 0:
9 | if v:
10 | return "(\u2026)"
11 | else:
12 | return "()"
13 |
14 | s = "({0}{1})"
15 | for i in a[:-1]:
16 | s = s.format(str(i) if i != Parameter.empty else "\u2014", ", {0}{1}")
17 |
18 | if v:
19 | return s.format("\u2026", "")
20 |
21 | return s.format(str(a[-1]) if a[-1] != Parameter.empty else "\u2014", "")
22 |
23 |
24 | class DumbDisplay:
25 | """Monochrome, dumb term display"""
26 | def __init__(self, error_filter=None):
27 | self.jobs = {}
28 | self.out = OutStream(sys.stdout)
29 | self.errors = []
30 | self.error_filter = error_filter
31 | self.messages = []
32 |
33 | def print_message(self, key, msg):
34 | if key in self.jobs:
35 | print("{1:12} | {2}".format(
36 | key, '['+msg.upper()+']', self.jobs[key]['name']),
37 | file=sys.stderr)
38 |
39 | def add_job(self, key, name):
40 | self.jobs[key] = {'name': name}
41 |
42 | def error_handler(self, job, xcptn):
43 | self.errors.append((job, xcptn))
44 |
45 | def report(self):
46 | if len(self.errors) == 0:
47 | self.out << "[success]\n"
48 |
49 | else:
50 | self.out << "[ERROR!]\n\n"
51 |
52 | for job, e in self.errors:
53 | msg = 'ERROR '
54 | if 'display' in job.hints:
55 | msg += job.hints['display'].format(
56 | **job.bound_args.arguments)
57 | else:
58 | msg += 'calling {} with {}'.format(
59 | job.foo.__name__, dict(job.bound_args.arguments)
60 | )
61 |
62 | print(msg)
63 | err_msg = self.error_filter(e)
64 | if err_msg:
65 | print(err_msg)
66 | else:
67 | print(e)
68 |
69 | def __call__(self, msg):
70 | key, status, data, err = msg
71 |
72 | if isinstance(data, FunctionNode) and hasattr(data, 'hints'):
73 | job = data
74 | if job.hints and 'display' in job.hints:
75 | msg = job.hints['display'].format(**job.bound_args.arguments)
76 | else:
77 | msg = "{0} {1}".format(
78 | job.foo.__name__,
79 | _format_arg_list(job.bound_args.args, None))
80 |
81 | self.add_job(key, msg)
82 |
83 | if hasattr(self, status):
84 | getattr(self, status)(key, data, err)
85 | else:
86 | self.print_message(key, status)
87 |
88 | def __enter__(self):
89 | return self
90 |
91 | def __exit__(self, exc_type, exc_val, exc_tb):
92 | # self.wait()
93 |
94 | if exc_type:
95 | if exc_type is KeyboardInterrupt:
96 | self.out << "\n" << "User interrupt detected, abnormal exit.\n"
97 | return True
98 |
99 | print("Internal error encountered. Contact the developers.")
100 | return False
101 |
102 | self.report()
103 |
104 | def wait(self):
105 | self.q.wait()
106 |
--------------------------------------------------------------------------------
/noodles/interface/maybe.py:
--------------------------------------------------------------------------------
1 | """
2 | Maybe
3 | =====
4 |
5 | Facility to handle non-fatal errors in Noodles.
6 | """
7 |
8 | from functools import (wraps)
9 | from itertools import (chain)
10 | import inspect
11 | from ..lib import (object_name)
12 |
13 |
14 | class Fail:
15 | """Signifies a failure in a computation that was wrapped by a ``@maybe``
16 | decorator. Because Noodles runs all functions from the same context, it
17 | is not possible to use Python stack traces to find out where an error
18 | happened. In stead we use a ``Fail`` object to store information about
19 | exceptions and the subsequent continuation of the failure."""
20 | def __init__(self, func, fails=None, exception=None):
21 | try:
22 | self.name = "{} ({}:{})".format(
23 | object_name(func),
24 | inspect.getsourcefile(func),
25 | inspect.getsourcelines(func)[1])
26 | except AttributeError:
27 | self.name = "<{} instance>".format(func.__class__.__name__)
28 |
29 | self.fails = fails or []
30 | self.trace = []
31 | self.exception = exception
32 |
33 | def add_call(self, func):
34 | """Add a call to the trace."""
35 | self.trace.append("{} ({}:{})".format(
36 | object_name(func),
37 | inspect.getsourcefile(func),
38 | inspect.getsourcelines(func)[1]))
39 |
40 | return self
41 |
42 | @property
43 | def is_root_cause(self):
44 | """If the field ``exception`` is set in this object, it means
45 | that we are looking at the root cause of the failure."""
46 | return self.exception is not None
47 |
48 | def __bool__(self):
49 | return False
50 |
51 | def __str__(self):
52 | msg = "Fail: " + " -> ".join(self.trace + [self.name])
53 | if self.exception is not None:
54 | msg += "\n* {}: ".format(type(self.exception).__name__)
55 | msg += "\n ".join(l for l in str(self.exception).split('\n'))
56 | elif self.fails:
57 | msg += "\n* failed arguments:\n "
58 | msg += "\n ".join(
59 | "{} `{}` ".format(func, source) + "\n ".join(
60 | l for l in str(fail).split('\n'))
61 | for func, source, fail in self.fails)
62 | return msg
63 |
64 |
65 | def failed(obj):
66 | """Returns True if ``obj`` is an instance of ``Fail``."""
67 | return isinstance(obj, Fail)
68 |
69 |
70 | def maybe(func):
71 | """Calls `f` in a try/except block, returning a `Fail` object if
72 | the call fails in any way. If any of the arguments to the call are Fail
73 | objects, the call is not attempted."""
74 |
75 | name = object_name(func)
76 |
77 | @wraps(func)
78 | def maybe_wrapped(*args, **kwargs):
79 | """@maybe wrapped version of ``func``."""
80 | fails = [
81 | (name, k, v)
82 | for k, v in chain(enumerate(args), kwargs.items())
83 | if isinstance(v, Fail)]
84 |
85 | if fails:
86 | return Fail(func, fails=fails)
87 |
88 | try:
89 | result = func(*args, **kwargs)
90 |
91 | except Exception as exc:
92 | return Fail(func, exception=exc)
93 |
94 | else:
95 | if isinstance(result, Fail):
96 | result.add_call(func)
97 |
98 | return result
99 |
100 | return maybe_wrapped
101 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to making participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 |
55 | ## Enforcement
56 |
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at j.hidding@esciencecenter.nl. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 |
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 |
68 | ## Attribution
69 |
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 |
73 | [homepage]: https://www.contributor-covenant.org
74 |
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 |
--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | Noodles - parallel programming in Python
8 |
14 |
15 |
18 |
19 |
20 |
21 |
24 |
31 |

32 |
33 |
34 | - Write readable code
35 | - Parallelise with a dash of Noodle sauce!
36 | - Scale your applications from laptop to HPC using Xenon
37 |
40 | - Read our documentation, including tutorials on:
41 |
48 |
49 |
50 | What is Noodles?
51 | Noodles is a task-based parallel programming model in Python that offers the same intuitive interface when running complex workflows on your laptop or on large computer clusters.
52 | Installation
53 | To install the latest version from PyPI:
54 | pip install noodles
55 | To enable the Xenon backend for remote job execution,
56 | pip install noodles[xenon]
57 | This requires a Java Runtime to be installed, you may check this by running
58 | java --version
59 | which should print the version of the currently installed JRE.
60 | Documentation
61 | All the latest documentation is available on Read the Docs.
62 |
63 |
64 |
--------------------------------------------------------------------------------
/notebooks/inspecting_db.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "!rm -f tutorial.db"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 2,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "import noodles\n",
19 | "from noodles.run.single.sqlite3 import run_single"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 12,
25 | "metadata": {},
26 | "outputs": [
27 | {
28 | "data": {
29 | "text/plain": [
30 | "[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]"
31 | ]
32 | },
33 | "execution_count": 12,
34 | "metadata": {},
35 | "output_type": "execute_result"
36 | }
37 | ],
38 | "source": [
39 | "@noodles.schedule\n",
40 | "def double(x):\n",
41 | " return x*2\n",
42 | "\n",
43 | "workflow = noodles.gather_all(double(i) for i in range(10))\n",
44 | "run_single(workflow, registry=noodles.serial.base,\n",
45 | " db_file='tutorial.db')"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 4,
51 | "metadata": {},
52 | "outputs": [],
53 | "source": [
54 | "from noodles.prov.sqlite import JobDB"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 5,
60 | "metadata": {},
61 | "outputs": [],
62 | "source": [
63 | "db = JobDB('tutorial.db', registry=noodles.serial.base)"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": 6,
69 | "metadata": {},
70 | "outputs": [
71 | {
72 | "name": "stdout",
73 | "output_type": "stream",
74 | "text": [
75 | " 1: double(0)\n",
76 | " 2: double(1)\n",
77 | " 3: double(2)\n",
78 | " 4: double(3)\n",
79 | " 5: double(4)\n",
80 | " 6: double(5)\n",
81 | " 7: double(6)\n",
82 | " 8: double(7)\n",
83 | " 9: double(8)\n",
84 | " 10: double(9)\n",
85 | " 11: gather(0, 2, 4, 6, 8, 10, 12, 14, 16, 18)\n"
86 | ]
87 | }
88 | ],
89 | "source": [
90 | "for k, v in db.list_jobs().items():\n",
91 | " print('{:8}: {}'.format(k, v))"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": 9,
97 | "metadata": {},
98 | "outputs": [
99 | {
100 | "data": {
101 | "text/plain": [
102 | "[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]"
103 | ]
104 | },
105 | "execution_count": 9,
106 | "metadata": {},
107 | "output_type": "execute_result"
108 | }
109 | ],
110 | "source": [
111 | "db.get_result(11)"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": 11,
117 | "metadata": {},
118 | "outputs": [],
119 | "source": [
120 | "db.connection.close()"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": null,
126 | "metadata": {},
127 | "outputs": [],
128 | "source": []
129 | }
130 | ],
131 | "metadata": {
132 | "kernelspec": {
133 | "display_name": "Python 3",
134 | "language": "python",
135 | "name": "python3"
136 | },
137 | "language_info": {
138 | "codemirror_mode": {
139 | "name": "ipython",
140 | "version": 3
141 | },
142 | "file_extension": ".py",
143 | "mimetype": "text/x-python",
144 | "name": "python",
145 | "nbconvert_exporter": "python",
146 | "pygments_lexer": "ipython3",
147 | "version": "3.7.3"
148 | }
149 | },
150 | "nbformat": 4,
151 | "nbformat_minor": 2
152 | }
153 |
--------------------------------------------------------------------------------
/doc/source/cooking.rst:
--------------------------------------------------------------------------------
1 | Cooking of Noodles (library docs)
2 | =================================
3 |
4 | The cooking of good Noodles can be tricky. We try to make it as easy as possible, but to write good Noodles you need to settle in a *functional style* of programming. The functions you design cannot write to some global state, or modify its arguments and expect these modifications to persist throughout the program. This is not a restriction of Noodles itself, this is a fundamental principle that applies to all possible frameworks for parallel and distributed programming. So get used to it!
5 |
6 | Every function call in Noodles (that is, calls to scheduled function) can be visualised as a node in a call graph. You should be able to draw this graph conceptually when designing the program. Luckily there is (almost) always a way to write down non-functional code in a functional way.
7 |
8 | .. NOTE:: Golden Rule: if you modify something, return it.
9 |
10 |
11 | Call by value
12 | -------------
13 |
14 | Suppose we have the following program
15 |
16 | ::
17 |
18 | from noodles import (schedule, run_single)
19 |
20 | @schedule
21 | def double(x):
22 | return x['value'] * 2
23 |
24 | @schedule
25 | def add(x, y):
26 | return x + y
27 |
28 | a = {'value': 4}
29 | b = double(a)
30 | a['value'] = 5
31 | c = double(a)
32 | d = add(b, c)
33 |
34 | print(run_single(d))
35 |
36 | If this were undecorated Python, the answer would be 18. However, the computation of this answer depends on the time-dependency of the Python interpreter. In Python, dictionaries are passed by reference. The promised object `b` then contains a reference to the dictionary in `a`. If we then change the value in this dictionary, the call producing the value of `b` is retroactively changed to double the value 5 instead of 4.
37 |
38 | If Noodles is to evaluate this program correctly it needs to :py:func:`deepcopy` every argument to a scheduled function. There is another way to have the same semantics produce a correct result. This is by making `a` a promised object in the first place. The third solution is to teach your user *functional programming*.
39 | Deep copying function arguments can result in a significant performance penalty on the side of the job scheduler. In most applications that we target this is not the bottle neck.
40 |
41 | Since we aim for the maximum ease of use for the end-user, we chose to enable call-by-value by default.
42 |
43 |
44 | Monads (sort of)
45 | ----------------
46 |
47 | We still have ways to do object oriented programming and assignments. The :py:class:`PromisedObject` class has several magic methods overloaded to translate to functional equivalents.
48 |
49 | Member assignment
50 | ~~~~~~~~~~~~~~~~~
51 |
52 | Especially member assignment is treated in a particular way. Suppose ``a`` is a :py:class:`PromisedObject`, then the statement
53 |
54 | ::
55 |
56 | a.b = 3
57 |
58 | is (conceptually) transformed into
59 |
60 | ::
61 |
62 | a = _setattr(a, 'b', 3)
63 |
64 | where :py:func:`_setattr` is a scheduled function. The :py:class:`PromisedObject` contains a representation of the complete workflow representing the computation to get to the value of `a`. In member assignment, this workflow is replaced with the new workflow containing this last instruction.
65 |
66 | This is not a recommended way of programming. Every assignment results in a nested function call. The `statefulness` of the program is then implemented in the composition of functions, similar to how other functional languages do it using `monads`. It results in sequential code that will not parallelise so well.
67 |
68 | Other magic methods
69 | ~~~~~~~~~~~~~~~~~~~
70 |
71 | Next to member assignment, we also (obviously) support member reference, method function call and object function call (with `__call__`).
72 |
73 |
74 | Storable
75 | --------
76 |
77 |
78 |
79 | Serialisation
80 | -------------
81 |
--------------------------------------------------------------------------------
/noodles/run/xenon/xenon.py:
--------------------------------------------------------------------------------
1 | import uuid
2 | import xenon
3 |
4 | from ..remote.worker_config import WorkerConfig
5 |
6 |
7 | class XenonJobConfig(WorkerConfig):
8 | def __init__(
9 | self, *, queue_name=None, environment=None,
10 | time_out=1000, scheduler_arguments=None, **kwargs):
11 | super(XenonJobConfig, self).__init__(**kwargs)
12 | self.time_out = time_out
13 |
14 | executable, arguments = self.command_line()
15 | self.xenon_job_description = xenon.JobDescription(
16 | executable=str(executable),
17 | arguments=arguments,
18 | working_directory=str(self.working_dir),
19 | queue_name=queue_name,
20 | environment=environment,
21 | scheduler_arguments=scheduler_arguments)
22 |
23 |
24 | class Machine(object):
25 | """Configuration to the Xenon library.
26 |
27 | Xenon is a Java library that offers a uniform interface to execute jobs.
28 | These jobs may be run locally, over ssh ar against a queue manager like
29 | SLURM.
30 |
31 | [Documentation to PyXenon can be found online](http://pyxenon.rtfd.io/)
32 |
33 | :param name:
34 | The quasi human readable name to give to this Xenon instance.
35 | This defaults to a generated UUID.
36 |
37 | :param jobs_scheme:
38 | The scheme by which to schedule jobs. Should be one of 'local', 'ssh',
39 | 'slurm' etc. See the Xenon documentation.
40 |
41 | :param files_scheme:
42 | The scheme by which to transfer files. Should be 'local' or 'ssh'.
43 | See the Xenon documentation.
44 |
45 | :param location:
46 | A location. This can be the host of the 'ssh' or 'slurm' server.
47 |
48 | :param credential:
49 | To enter a server through ssh, we need to have some credentials.
50 | Preferably, you have a private/public key pair by which you can
51 | identify yourself. Otherwise, this would be a combination of
52 | username/password. This functions that can create a credential
53 | object can be found in Xenon.credentials in the Xenon documentation.
54 |
55 | :param jobs_properties:
56 | Configuration to the Xenon.jobs module.
57 |
58 | :param files_properties:
59 | Configuration to the Xenon.files module.
60 | """
61 | def __init__(self, *, name=None, scheduler_adaptor='local',
62 | location=None, credential=None, jobs_properties=None,
63 | files_properties=None):
64 | self.name = name or ("xenon-" + str(uuid.uuid4()))
65 | self.scheduler_adaptor = scheduler_adaptor
66 | self.location = location
67 | self.credential = credential
68 | self.jobs_properties = jobs_properties
69 | self.files_properties = files_properties
70 | self._scheduler = None
71 | self._file_system = None
72 |
73 | @property
74 | def scheduler_args(self):
75 | args = {'adaptor': self.scheduler_adaptor,
76 | 'location': self.location,
77 | 'properties': self.jobs_properties}
78 |
79 | if isinstance(self.credential, xenon.PasswordCredential):
80 | args['password_credential'] = self.credential
81 | if isinstance(self.credential, xenon.CertificateCredential):
82 | args['certificate_credential'] = self.credential
83 |
84 | return args
85 |
86 | @property
87 | def scheduler(self):
88 | """Returns the scheduler object."""
89 | if self._scheduler is None:
90 | self._scheduler = xenon.Scheduler.create(**self.scheduler_args)
91 |
92 | return self._scheduler
93 |
94 | @property
95 | def file_system(self):
96 | """Gets the filesystem corresponding to the open scheduler."""
97 | if self._file_system is None:
98 | self._file_system = self.scheduler.get_file_system()
99 |
100 | return self._file_system
101 |
--------------------------------------------------------------------------------
/docs/style.css:
--------------------------------------------------------------------------------
1 | body
2 | {
3 | margin: 0 auto 0 250pt;
4 | max-width: 600pt;
5 | background: white;
6 | padding: 0 10pt 0 0;
7 | /* font-family: "Cantarell", "Verdana", sans-serif; */
8 | font-family: "Akkurat Light", arial;
9 | font-size: 14pt;
10 | counter-reset: h1;
11 | }
12 |
13 | .splash {
14 | background: #00aeef;
15 | color: white;
16 | font-family: "Akkurat", arial;
17 | text-shadow: 1px 1px 2px rgba(0,0,0,0.3);
18 | padding: 10pt 10pt 15pt 20pt;
19 | margin-bottom: 50pt;
20 | margin-top: 50pt;
21 | font-size: 18pt;
22 | line-height: 30pt;
23 | border: thin solid black;
24 | /*border-radius: 10pt;*/
25 | box-shadow: 10px 10px 10px 0px rgba(0,0,0,0.3);
26 | }
27 |
28 | .splash ul {
29 | margin-bottom: 10pt;
30 | }
31 |
32 | .splash a {
33 | font-family: "Akkurat Light", arial;
34 | /* color: #cceeff; */
35 | color: white;
36 | text-decoration: underline;
37 | }
38 |
39 | .splash a:hover {
40 | color: white;
41 | }
42 |
43 | p {
44 | text-align: justify;
45 | }
46 |
47 | a {
48 | color: rgb(102, 41, 41);
49 | text-decoration: none;
50 | }
51 |
52 | .TODO {
53 | background: pink;
54 | border: solid thin black;
55 | margin: 5pt 30pt 5pt 30pt;
56 | padding: 3pt;
57 | box-shadow: 8pt 8pt 10pt 0pt #aaa;
58 | }
59 |
60 | .TODO :before {
61 | content: "👷 TODO: ";
62 | font-weight: bold;
63 | }
64 |
65 | h2 {
66 | color: #00aeef;
67 | counter-reset: h2;
68 | }
69 |
70 | h3 {
71 | counter-reset: h3;
72 | }
73 |
74 | h2:before {
75 | content: counter(h1) ".\0000a0\0000a0";
76 | counter-increment: h1;
77 | }
78 |
79 | h3:before {
80 | content: counter(h1) "." counter(h2) ".\0000a0\0000a0";
81 | counter-increment: h2;
82 | }
83 |
84 | h4:before {
85 | content: counter(h1) "." counter(h2) "." counter(h3) ".\0000a0\0000a0";
86 | counter-increment: h3;
87 | }
88 |
89 | #TOC
90 | {
91 | background-color: #00aeef;
92 | background-image:url('nlesc-logo.svg');
93 | background-repeat: no-repeat;
94 | background-position: center 20pt;
95 | font-family: "Akkurat", arial;
96 | font-size: smaller;
97 | position: fixed;
98 | top: 0pt;
99 | left: 0pt;
100 | bottom: 0pt;
101 | border-right: solid thin rgb(255, 255, 255);
102 | padding: 20pt;
103 | padding-right: 25pt;
104 | padding-top: 50pt;
105 | width: 150pt;
106 | color: #FFF;
107 | border-right: thin solid black;
108 | }
109 |
110 | #TOC a {
111 | color: #eee;
112 | text-decoration: none;
113 | }
114 |
115 | #TOC ul {
116 | padding: 3pt 0 3pt 10pt;
117 | }
118 |
119 | .noweb {
120 | font-style: italic;
121 | font-size: smaller;
122 | }
123 |
124 | code
125 | {
126 | font-family: "Inconsolata", monospace;
127 | }
128 |
129 | .sourceCode, .elm
130 | {
131 | font-size: 10pt;
132 | border: solid thin #aaa;
133 | border-radius: 5pt;
134 | background: #eff2f2;
135 | padding: 10pt;
136 |
137 | width: 40em;
138 | position: relative;
139 | left: 25pt;
140 | }
141 |
142 | .elm pre, .sourceCode pre, .sourceCode code
143 | {
144 | padding: unset;
145 | background: none;
146 | border: none;
147 | width: unset;
148 | left: unset;
149 | }
150 |
151 | figure
152 | {
153 | border: solid 1px #aaa;
154 | border-radius: 5pt;
155 | padding: 10pt;
156 | background: white;
157 | box-shadow: 8pt 8pt 10pt 0pt #aaa;
158 | text-align: center;
159 | }
160 |
161 | figcaption
162 | {
163 | text-align: justify;
164 | font-style: italic;
165 | }
166 |
167 | figure img
168 | {
169 | width: 100%;
170 | margin: 0 auto 0 auto;
171 | }
172 |
173 | blockquote {
174 | background: #eff2f2;
175 | color: black;
176 | padding: 10pt 30pt;
177 | font-family: Akkurat;
178 | border: thin solid black;
179 | border-radius: 10pt;
180 | /* box-shadow: 10pt 10pt #00aeef;*/
181 | }
182 |
183 |
--------------------------------------------------------------------------------
/noodles/run/job_keeper.py:
--------------------------------------------------------------------------------
1 | import uuid
2 | import time
3 | import json
4 | import sys
5 |
6 | from threading import Lock
7 | from ..lib import (coroutine, EndOfQueue)
8 | from .messages import (JobMessage, EndOfWork)
9 |
10 |
11 | class JobKeeper(dict):
12 | def __init__(self, keep=False):
13 | super(JobKeeper, self).__init__()
14 | self.keep = keep
15 | self.lock = Lock()
16 | self.workflows = {}
17 |
18 | def register(self, job):
19 | with self.lock:
20 | key = str(uuid.uuid4())
21 | job.db_id = None
22 | job.log = []
23 | job.log.append((time.time(), 'register', None, None))
24 | self[key] = job
25 |
26 | return JobMessage(key, job.node)
27 |
28 | def __delitem__(self, key):
29 | if not self.keep:
30 | super(JobKeeper, self).__delitem__(key)
31 |
32 | def store_result(self, key, status, value, err):
33 | if status != 'done':
34 | return
35 |
36 | if key not in self:
37 | print("WARNING: store_result called but job not in registry:\n"
38 | " race condition? Not doing anything.\n", file=sys.stderr)
39 | return
40 |
41 | with self.lock:
42 | job = self[key]
43 | job.node.result = value
44 |
45 | @coroutine
46 | def message(self):
47 | while True:
48 | msg = yield
49 |
50 | if msg is EndOfQueue:
51 | return
52 | if msg is None:
53 | print("Warning: `None` received where not expected.",
54 | file=sys.stderr)
55 | return
56 |
57 | key, status, value, err = msg
58 |
59 | with self.lock:
60 | if key not in self:
61 | continue
62 |
63 | job = self[key]
64 | job.log.append((time.time(), status, value, err))
65 |
66 |
67 | class JobTimer(dict):
68 | def __init__(self, timing_file, registry=None):
69 | super(JobTimer, self).__init__()
70 | self.workflows = {}
71 |
72 | if isinstance(timing_file, str):
73 | self.fo = open(timing_file, 'w')
74 | self.owner = True
75 | else:
76 | self.fo = timing_file
77 | self.owner = False
78 |
79 | def register(self, job):
80 | key = str(uuid.uuid4())
81 | job.sched_time = time.time()
82 | self[key] = job
83 | return JobMessage(key, job.node)
84 |
85 | def __delitem__(self, key):
86 | pass
87 |
88 | # def message(self, key, status, value, err):
89 | @coroutine
90 | def message(self):
91 | while True:
92 | msg = yield
93 | if msg is EndOfWork:
94 | return
95 | key, status, value, err = msg
96 | if hasattr(self, status):
97 | getattr(self, status)(key, value, err)
98 |
99 | def start(self, key, value, err):
100 | self[key].start_time = time.time()
101 |
102 | def done(self, key, value, err):
103 | job = self[key]
104 | now = time.time()
105 | if job.node.hints and 'display' in job.node.hints:
106 | msg_obj = {
107 | 'description': job.node.hints['display'].format(
108 | **job.node.bound_args.arguments),
109 | 'schedule_time': time.strftime(
110 | '%Y-%m-%dT%H:%M:%SZ', time.gmtime(job.sched_time)),
111 | 'start_time': time.strftime(
112 | '%Y-%m-%dT%H:%M:%SZ', time.gmtime(job.start_time)),
113 | 'done_time': time.strftime(
114 | '%Y-%m-%dT%H:%M:%SZ', time.gmtime(now)),
115 | 'run_duration': now - job.start_time}
116 | self.fo.write('{record},\n'.format(record=json.dumps(
117 | msg_obj, indent=2)))
118 |
119 | def __enter__(self):
120 | return self
121 |
122 | def __exit__(self, e_type, e_value, e_tb):
123 | if self.owner:
124 | self.fo.close()
125 |
--------------------------------------------------------------------------------
/noodles/run/threading/sqlite3.py:
--------------------------------------------------------------------------------
1 | """
2 | Implements parallel worker with Sqlite database.
3 | """
4 |
5 | from itertools import repeat
6 | import logging
7 |
8 | from ..scheduler import (Scheduler)
9 | from ..messages import (ResultMessage)
10 | from ..worker import (worker)
11 | from ..logging import make_logger
12 |
13 | from ...workflow import (get_workflow)
14 | from ...prov.sqlite import (JobDB)
15 | from ...lib import (
16 | Queue, pull, thread_pool, Connection, EndOfQueue,
17 | pull_map, push_map)
18 |
19 |
20 | def pass_job(db: JobDB, result_queue: Queue, always_cache=False):
21 | """Create a pull stream that receives jobs and passes them on to the
22 | database. If the job already has a result, that result is pushed onto
23 | the `result_queue`.
24 | """
25 | @pull
26 | def pass_job_stream(job_source):
27 | """Pull stream instance created by `pass_job`."""
28 | result_sink = result_queue.sink()
29 |
30 | for message in job_source():
31 | if message is EndOfQueue:
32 | return
33 |
34 | key, job = message
35 | if always_cache or ('store' in job.hints):
36 | status, retrieved_result = db.add_job_to_db(key, job)
37 |
38 | if status == 'retrieved':
39 | result_sink.send(retrieved_result)
40 | continue
41 |
42 | elif status == 'attached':
43 | continue
44 |
45 | yield message
46 |
47 | return pass_job_stream
48 |
49 |
50 | def pass_result(db: JobDB, always_cache=False):
51 | """Creates a pull stream receiving results, storing them in the database,
52 | then sending them on. At this stage, the database may return a list of
53 | attached jobs which also need to be sent on to the scheduler."""
54 | @pull
55 | def pass_result_stream(worker_source):
56 | """Pull stream instance created by `pass_result`."""
57 | for result in worker_source():
58 | if result is EndOfQueue:
59 | return
60 |
61 | attached = db.store_result_in_db(
62 | result, always_cache=always_cache)
63 |
64 | yield result
65 | yield from (ResultMessage(key, 'attached', result.value, None)
66 | for key in attached)
67 |
68 | return pass_result_stream
69 |
70 |
71 | def run_parallel(
72 | workflow, *, n_threads, registry, db_file, echo_log=True,
73 | always_cache=False):
74 | """Run a workflow in parallel threads, storing results in a Sqlite3
75 | database.
76 |
77 | :param workflow: Workflow or PromisedObject to evaluate.
78 | :param n_threads: number of threads to use (in addition to the scheduler).
79 | :param registry: serialization Registry function.
80 | :param db_file: filename of Sqlite3 database, give `':memory:'` to
81 | keep the database in memory only.
82 | :param echo_log: set log-level high enough
83 | :param always_cache: enable caching by schedule hint.
84 | :return: Evaluated result.
85 | """
86 | if echo_log:
87 | logging.getLogger('noodles').setLevel(logging.DEBUG)
88 | logging.debug("--- start log ---")
89 |
90 | with JobDB(db_file, registry) as db:
91 | job_queue = Queue()
92 | result_queue = Queue()
93 |
94 | job_logger = make_logger("worker", push_map, db)
95 | result_logger = make_logger("worker", pull_map, db)
96 |
97 | worker_pool = job_queue.source \
98 | >> pass_job(db, result_queue, always_cache) \
99 | >> thread_pool(*repeat(worker, n_threads), results=result_queue)
100 | job_front_end = job_logger >> job_queue.sink
101 | result_front_end = worker_pool \
102 | >> pass_result(db, always_cache) \
103 | >> result_logger
104 |
105 | scheduler = Scheduler(job_keeper=db)
106 | parallel_sqlite_worker = Connection(result_front_end, job_front_end)
107 |
108 | result = scheduler.run(parallel_sqlite_worker, get_workflow(workflow))
109 |
110 | return registry().dereference(result)
111 |
--------------------------------------------------------------------------------
/notebooks/first_steps.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# First Steps\n",
8 | "\n",
9 | "**This tutorial is also available in the form of a Jupyter Notebook. Try it out, and play!**\n",
10 | "\n",
11 | "Noodles is there to make your life easier, *in parallel*! The reason why Noodles can be easy and do parallel Python at the same time is its *functional* approach. In one part you'll define a set of functions that you'd like to run with Noodles, in an other part you'll compose these functions into a *workflow graph*. To make this approach work a function should not have any *side effects*. Let's not linger and just start noodling! First we define some functions to use."
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 1,
17 | "metadata": {
18 | "collapsed": true
19 | },
20 | "outputs": [],
21 | "source": [
22 | "from noodles import schedule\n",
23 | "\n",
24 | "@schedule\n",
25 | "def add(x, y):\n",
26 | " return x + y\n",
27 | "\n",
28 | "@schedule\n",
29 | "def mul(x,y):\n",
30 | " return x * y"
31 | ]
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "metadata": {},
36 | "source": [
37 | "Now we can create a workflow composing several calls to this function."
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 2,
43 | "metadata": {
44 | "collapsed": true
45 | },
46 | "outputs": [],
47 | "source": [
48 | "a = add(1, 1)\n",
49 | "b = mul(a, 2)\n",
50 | "c = add(a, a)\n",
51 | "d = mul(b, c)"
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "metadata": {},
57 | "source": [
58 | "That looks easy enough; the funny thing is though, that nothing has been computed yet! Noodles just created the workflow graphs corresponding to the values that still need to be computed. Until such time, we work with the *promise* of a future value. Using some function in `pygraphviz` we can look at the call graphs."
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 3,
64 | "metadata": {},
65 | "outputs": [
66 | {
67 | "data": {
68 | "text/markdown": [
69 | "| a | b | c | d |\n",
70 | "| --- | --- | --- | --- |\n",
71 | "|  |  |  |  |"
72 | ],
73 | "text/plain": [
74 | ""
75 | ]
76 | },
77 | "metadata": {},
78 | "output_type": "display_data"
79 | }
80 | ],
81 | "source": [
82 | "from noodles.tutorial import display_workflows\n",
83 | "\n",
84 | "display_workflows(prefix='first_steps-workflow',\n",
85 | " a=a, b=b, c=c, d=d)"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "Now, to compute the result we have to tell Noodles to evaluate the program."
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": 4,
98 | "metadata": {},
99 | "outputs": [
100 | {
101 | "data": {
102 | "text/plain": [
103 | "16"
104 | ]
105 | },
106 | "execution_count": 4,
107 | "metadata": {},
108 | "output_type": "execute_result"
109 | }
110 | ],
111 | "source": [
112 | "from noodles import run_parallel\n",
113 | "\n",
114 | "run_parallel(d, n_threads=2)"
115 | ]
116 | }
117 | ],
118 | "metadata": {
119 | "kernelspec": {
120 | "display_name": "Python 3",
121 | "language": "python",
122 | "name": "python3"
123 | },
124 | "language_info": {
125 | "codemirror_mode": {
126 | "name": "ipython",
127 | "version": 3
128 | },
129 | "file_extension": ".py",
130 | "mimetype": "text/x-python",
131 | "name": "python",
132 | "nbconvert_exporter": "python",
133 | "pygments_lexer": "ipython3",
134 | "version": "3.7.3"
135 | }
136 | },
137 | "nbformat": 4,
138 | "nbformat_minor": 2
139 | }
140 |
--------------------------------------------------------------------------------
/noodles/run/hybrid.py:
--------------------------------------------------------------------------------
1 | import threading
2 |
3 | from ..workflow import get_workflow
4 | from ..lib import Queue, Connection, push, patch, EndOfQueue, FlushQueue
5 | from .scheduler import Scheduler
6 | from .worker import run_job
7 |
8 |
9 | def hybrid_coroutine_worker(selector, workers):
10 | """Runs a set of workers, all of them in the main thread.
11 | This runner is here for testing purposes.
12 |
13 | :param selector:
14 | A function returning a worker key, given a job.
15 | :type selector: function
16 |
17 | :param workers:
18 | A dict of workers.
19 | :type workers: dict
20 | """
21 | jobs = Queue()
22 |
23 | worker_source = {}
24 | worker_sink = {}
25 |
26 | for k, w in workers.items():
27 | worker_source[k], worker_sink[k] = w.setup()
28 |
29 | def get_result():
30 | source = jobs.source()
31 |
32 | for msg in source:
33 | key, job = msg
34 | worker = selector(job)
35 | if worker is None:
36 | yield run_job(key, job)
37 | else:
38 | # send the worker a job and wait for it to return
39 | worker_sink[worker].send(msg)
40 | result = next(worker_source[worker])
41 | yield result
42 |
43 | return Connection(get_result, jobs.sink)
44 |
45 |
46 | def hybrid_threaded_worker(selector, workers):
47 | """Runs a set of workers, each in a separate thread.
48 |
49 | :param selector:
50 | A function that takes a hints-tuple and returns a key
51 | indexing a worker in the `workers` dictionary.
52 | :param workers:
53 | A dictionary of workers.
54 |
55 | :returns:
56 | A connection for the scheduler.
57 | :rtype: Connection
58 |
59 | The hybrid worker dispatches jobs to the different workers
60 | based on the information contained in the hints. If no hints
61 | were given, the job is run in the main thread.
62 |
63 | Dispatching is done in the main thread. Retrieving results is
64 | done in a separate thread for each worker. In this design it is
65 | assumed that dispatching a job takes little time, while waiting for
66 | one to return a result may take a long time.
67 | """
68 | result_queue = Queue()
69 |
70 | job_sink = {k: w.sink() for k, w in workers.items()}
71 |
72 | @push
73 | def dispatch_job():
74 | default_sink = result_queue.sink()
75 |
76 | while True:
77 | msg = yield
78 |
79 | if msg is EndOfQueue:
80 | for k in workers.keys():
81 | try:
82 | job_sink[k].send(EndOfQueue)
83 | except StopIteration:
84 | pass
85 | return
86 |
87 | if msg is FlushQueue:
88 | for k in workers.keys():
89 | try:
90 | job_sink[k].send(FlushQueue)
91 | except StopIteration:
92 | pass
93 | return
94 |
95 | worker = selector(msg.node)
96 | if worker:
97 | job_sink[worker].send(msg)
98 | else:
99 | default_sink.send(run_job(*msg))
100 |
101 | for key, worker in workers.items():
102 | t = threading.Thread(
103 | target=patch,
104 | args=(worker.source, result_queue.sink))
105 | t.daemon = True
106 | t.start()
107 |
108 | return Connection(result_queue.source, dispatch_job)
109 |
110 |
111 | def run_hybrid(wf, selector, workers):
112 | """
113 | Returns the result of evaluating the workflow; runs through several
114 | supplied workers in as many threads.
115 |
116 | :param wf:
117 | Workflow to compute
118 | :type wf: :py:class:`Workflow` or :py:class:`PromisedObject`
119 |
120 | :param selector:
121 | A function selecting the worker that should be run, given a hint.
122 | :param workers:
123 | A dictionary of workers
124 |
125 | :returns:
126 | result of running the workflow
127 | """
128 | worker = hybrid_threaded_worker(selector, workers)
129 | return Scheduler().run(worker, get_workflow(wf))
130 |
--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
1 | .. Noodles documentation master file, created by
2 | sphinx-quickstart on Wed Nov 11 13:52:27 2015.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to Noodles's documentation!
7 | ===================================
8 |
9 | Introduction
10 | ------------
11 | Often, a computer program can be sped up by executing parts of its code *in
12 | parallel* (simultaneously), as opposed to *synchronously* (one part after
13 | another).
14 |
15 | A simple example may be where you assign two variables, as follows ``a = 2 * i``
16 | and ``b = 3 * i``. Either statement is only dependent on ``i``, but whether you
17 | assign ``a`` before ``b`` or vice versa, does not matter for how your program
18 | works. Whenever this is the case, there is potential to speed up a program,
19 | because the assignment of ``a`` and ``b`` could be done in parallel, using
20 | multiple cores on your computer's CPU. Obviously, for simple assignments like
21 | ``a = 2 * i``, there is not much time to be gained, but what if ``a`` is the
22 | result of a time-consuming function, e.g. ``a = very_difficult_function(i)``?
23 | And what if your program makes many calls to that function, e.g. ``list_of_a =
24 | [very_difficult_function(i) for i in list_of_i]``? The potential speed-up could
25 | be tremendous.
26 |
27 | So, parallel execution of computer programs is great for improving performance,
28 | but how do you tell the computer which parts should be executed in parallel, and
29 | which parts should be executed synchronously? How do you identify the order in
30 | which to execute each part, since the optimal order may be different from the
31 | order in which the parts appear in your program. These questions quickly become
32 | nearly impossible to answer as your program grows and changes during
33 | development. Because of this, many developers accept the slow execution of their
34 | program only because it saves them from the headaches associated with keeping
35 | track of which parts of their program depend on which other parts.
36 |
37 | Enter Noodles.
38 |
39 | Noodles is a Python package that can automatically construct a *callgraph*
40 | for a given Python program, listing exactly which parts depend on which parts.
41 | Moreover, Noodles can subsequently use the callgraph to execute code in parallel
42 | on your local machine using multiple cores. If you so choose, you can even
43 | configure Noodles such that it will execute the code remotely, for example on a
44 | big compute node in a cluster computer.
45 |
46 | Copyright & Licence
47 | -------------------
48 |
49 | Noodles 0.3.0 is copyright by the *Netherlands eScience Center (NLeSC)* and released under the Apache v2 License.
50 |
51 | See http://www.esciencecenter.nl for more information on the NLeSC.
52 |
53 | Installation
54 | ------------
55 |
56 | .. WARNING:: We don't support Python versions lower than 3.5.
57 |
58 | The core of Noodles runs on **Python 3.5** and above. To run Noodles on your own machine, no extra dependencies are required. It is advised to install Noodles in a virtualenv. If you want support for `Xenon`_, install `pyxenon`_ too.
59 |
60 | .. code-block:: bash
61 |
62 | # create the virtualenv
63 | virtualenv -p python3
64 | . /bin/activate
65 |
66 | # install noodles
67 | pip install noodles
68 |
69 | Noodles has several optional dependencies. To be able to use the Xenon job scheduler, install Noodles with::
70 |
71 | pip install noodles[xenon]
72 |
73 | The provenance/caching feature needs TinyDB installed::
74 |
75 | pip install noodles[prov]
76 |
77 | To be able to run the unit tests::
78 |
79 | pip install noodles[test]
80 |
81 | Documentation Contents
82 | ======================
83 |
84 | .. toctree::
85 | :maxdepth: 2
86 |
87 | Introduction
88 | eating
89 | cooking
90 | tutorials
91 | implementation
92 |
93 |
94 | Indices and tables
95 | ==================
96 |
97 | * :ref:`genindex`
98 | * :ref:`modindex`
99 | * :ref:`search`
100 |
101 | .. _Xenon: http://nlesc.github.io/Xenon/
102 | .. _pyxenon: http://github.com/NLeSC/pyxenon
103 | .. _`generating SSH keys`: https://help.github.com/articles/generating-ssh-keys/
104 | .. _`decorators`: https://www.thecodeship.com/patterns/guide-to-python-function-decorators/
105 |
--------------------------------------------------------------------------------