├── .gitignore
├── .travis.yml
├── COPYING
├── LICENSE
├── README.md
├── build.sbt
├── docs
├── build.sbt
└── src
│ └── main
│ └── paradox
│ ├── back-end.md
│ ├── experimenting.md
│ ├── front-end.md
│ ├── images
│ ├── cluster-nodes.png
│ ├── cluster-nodes.svg
│ ├── master-worker-message-flow.png
│ ├── master-worker-message-flow.svg
│ ├── singleton-manager.png
│ └── singleton-manager.svg
│ ├── index.md
│ ├── master-in-detail.md
│ ├── next-steps.md
│ └── worker.md
├── project
├── build.properties
├── default.properties
├── giter8.sbt
└── paradox.sbt
└── src
└── main
└── g8
├── build.sbt
├── default.properties
├── project
└── build.properties
└── src
├── main
├── resources
│ ├── application.conf
│ └── logback.xml
└── scala
│ └── worker
│ ├── FrontEnd.scala
│ ├── Main.scala
│ ├── Master.scala
│ ├── MasterSingleton.scala
│ ├── MasterWorkerProtocol.scala
│ ├── Work.scala
│ ├── WorkExecutor.scala
│ ├── WorkResultConsumer.scala
│ ├── WorkState.scala
│ └── Worker.scala
└── test
└── scala
└── worker
└── DistributedWorkerSpec.scala
/.gitignore:
--------------------------------------------------------------------------------
1 | *#
2 | *.iml
3 | *.ipr
4 | *.iws
5 | *.pyc
6 | *.tm.epoch
7 | *.vim
8 | */project/boot
9 | */project/build/target
10 | */project/project.target.config-classes
11 | *-shim.sbt
12 | *~
13 | .#*
14 | .*.swp
15 | .DS_Store
16 | .cache
17 | .cache
18 | .classpath
19 | .codefellow
20 | .ensime*
21 | .eprj
22 | .history
23 | .idea
24 | .manager
25 | .multi-jvm
26 | .project
27 | .scala_dependencies
28 | .scalastyle
29 | .settings
30 | .tags
31 | .tags_sorted_by_file
32 | .target
33 | .worksheet
34 | Makefile
35 | TAGS
36 | _akka_cluster/
37 | _dump
38 | _mb
39 | activemq-data
40 | akka-contrib/rst_preprocessed/
41 | akka-docs/_build/
42 | akka-docs/exts/
43 | akka-docs/rst_preprocessed/
44 | akka-osgi/src/main/resources/*.conf
45 | akka.sublime-project
46 | akka.sublime-workspace
47 | akka.tmproj
48 | beanstalk/
49 | bin/
50 | data
51 | deploy/*.jar
52 | etags
53 | lib_managed
54 | logs
55 | manifest.mf
56 | mongoDB/
57 | multiverse.log
58 | out
59 | project/akka-build.properties
60 | project/boot/*
61 | project/plugins/project
62 | redis/
63 | reports
64 | run-codefellow
65 | schoir.props
66 | semantic.cache
67 | src_managed
68 | storage
69 | tags
70 | target
71 | tm*.lck
72 | tm*.log
73 | tm.out
74 | worker*.log
75 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: scala
2 | scala:
3 | - 2.12.6
4 | jdk:
5 | - openjdk8
6 |
7 | script:
8 | - sbt -Dfile.encoding=UTF8 -J-XX:ReservedCodeCacheSize=256M test docs/paradox
9 |
10 | before_cache:
11 | - find $HOME/.ivy2 -name "ivydata-*.properties" -print -delete
12 | - find $HOME/.sbt -name "*.lock" -print -delete
13 |
14 | cache:
15 | directories:
16 | - $HOME/.ivy2/cache
17 | - $HOME/.sbt/boot
18 |
--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
1 | Creative Commons Legal Code
2 |
3 | CC0 1.0 Universal
4 |
5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
12 | HEREUNDER.
13 |
14 | Statement of Purpose
15 |
16 | The laws of most jurisdictions throughout the world automatically confer
17 | exclusive Copyright and Related Rights (defined below) upon the creator
18 | and subsequent owner(s) (each and all, an "owner") of an original work of
19 | authorship and/or a database (each, a "Work").
20 |
21 | Certain owners wish to permanently relinquish those rights to a Work for
22 | the purpose of contributing to a commons of creative, cultural and
23 | scientific works ("Commons") that the public can reliably and without fear
24 | of later claims of infringement build upon, modify, incorporate in other
25 | works, reuse and redistribute as freely as possible in any form whatsoever
26 | and for any purposes, including without limitation commercial purposes.
27 | These owners may contribute to the Commons to promote the ideal of a free
28 | culture and the further production of creative, cultural and scientific
29 | works, or to gain reputation or greater distribution for their Work in
30 | part through the use and efforts of others.
31 |
32 | For these and/or other purposes and motivations, and without any
33 | expectation of additional consideration or compensation, the person
34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she
35 | is an owner of Copyright and Related Rights in the Work, voluntarily
36 | elects to apply CC0 to the Work and publicly distribute the Work under its
37 | terms, with knowledge of his or her Copyright and Related Rights in the
38 | Work and the meaning and intended legal effect of CC0 on those rights.
39 |
40 | 1. Copyright and Related Rights. A Work made available under CC0 may be
41 | protected by copyright and related or neighboring rights ("Copyright and
42 | Related Rights"). Copyright and Related Rights include, but are not
43 | limited to, the following:
44 |
45 | i. the right to reproduce, adapt, distribute, perform, display,
46 | communicate, and translate a Work;
47 | ii. moral rights retained by the original author(s) and/or performer(s);
48 | iii. publicity and privacy rights pertaining to a person's image or
49 | likeness depicted in a Work;
50 | iv. rights protecting against unfair competition in regards to a Work,
51 | subject to the limitations in paragraph 4(a), below;
52 | v. rights protecting the extraction, dissemination, use and reuse of data
53 | in a Work;
54 | vi. database rights (such as those arising under Directive 96/9/EC of the
55 | European Parliament and of the Council of 11 March 1996 on the legal
56 | protection of databases, and under any national implementation
57 | thereof, including any amended or successor version of such
58 | directive); and
59 | vii. other similar, equivalent or corresponding rights throughout the
60 | world based on applicable law or treaty, and any national
61 | implementations thereof.
62 |
63 | 2. Waiver. To the greatest extent permitted by, but not in contravention
64 | of, applicable law, Affirmer hereby overtly, fully, permanently,
65 | irrevocably and unconditionally waives, abandons, and surrenders all of
66 | Affirmer's Copyright and Related Rights and associated claims and causes
67 | of action, whether now known or unknown (including existing as well as
68 | future claims and causes of action), in the Work (i) in all territories
69 | worldwide, (ii) for the maximum duration provided by applicable law or
70 | treaty (including future time extensions), (iii) in any current or future
71 | medium and for any number of copies, and (iv) for any purpose whatsoever,
72 | including without limitation commercial, advertising or promotional
73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
74 | member of the public at large and to the detriment of Affirmer's heirs and
75 | successors, fully intending that such Waiver shall not be subject to
76 | revocation, rescission, cancellation, termination, or any other legal or
77 | equitable action to disrupt the quiet enjoyment of the Work by the public
78 | as contemplated by Affirmer's express Statement of Purpose.
79 |
80 | 3. Public License Fallback. Should any part of the Waiver for any reason
81 | be judged legally invalid or ineffective under applicable law, then the
82 | Waiver shall be preserved to the maximum extent permitted taking into
83 | account Affirmer's express Statement of Purpose. In addition, to the
84 | extent the Waiver is so judged Affirmer hereby grants to each affected
85 | person a royalty-free, non transferable, non sublicensable, non exclusive,
86 | irrevocable and unconditional license to exercise Affirmer's Copyright and
87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the
88 | maximum duration provided by applicable law or treaty (including future
89 | time extensions), (iii) in any current or future medium and for any number
90 | of copies, and (iv) for any purpose whatsoever, including without
91 | limitation commercial, advertising or promotional purposes (the
92 | "License"). The License shall be deemed effective as of the date CC0 was
93 | applied by Affirmer to the Work. Should any part of the License for any
94 | reason be judged legally invalid or ineffective under applicable law, such
95 | partial invalidity or ineffectiveness shall not invalidate the remainder
96 | of the License, and in such case Affirmer hereby affirms that he or she
97 | will not (i) exercise any of his or her remaining Copyright and Related
98 | Rights in the Work or (ii) assert any associated claims and causes of
99 | action with respect to the Work, in either case contrary to Affirmer's
100 | express Statement of Purpose.
101 |
102 | 4. Limitations and Disclaimers.
103 |
104 | a. No trademark or patent rights held by Affirmer are waived, abandoned,
105 | surrendered, licensed or otherwise affected by this document.
106 | b. Affirmer offers the Work as-is and makes no representations or
107 | warranties of any kind concerning the Work, express, implied,
108 | statutory or otherwise, including without limitation warranties of
109 | title, merchantability, fitness for a particular purpose, non
110 | infringement, or the absence of latent or other defects, accuracy, or
111 | the present or absence of errors, whether or not discoverable, all to
112 | the greatest extent permissible under applicable law.
113 | c. Affirmer disclaims responsibility for clearing rights of other persons
114 | that may apply to the Work or any use thereof, including without
115 | limitation any person's Copyright and Related Rights in the Work.
116 | Further, Affirmer disclaims responsibility for obtaining any necessary
117 | consents, permissions or other rights required for any use of the
118 | Work.
119 | d. Affirmer understands and acknowledges that Creative Commons is not a
120 | party to this document and has no duty or obligation with respect to
121 | this CC0 or use of the Work.
122 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Activator Template by Typesafe
2 |
3 | Licensed under Public Domain (CC0)
4 |
5 | To the extent possible under law, the person who associated CC0 with
6 | this Activator Tempate has waived all copyright and related or neighboring
7 | rights to this Activator Template.
8 |
9 | You should have received a copy of the CC0 legalcode along with this
10 | work. If not, see .
11 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Akka Distributed Workers Quickstart
2 |
3 | ## Project has moved
4 |
5 | This example has moved to [akka-samples](https://github.com/akka/akka-samples/tree/2.6/akka-sample-distributed-workers-scala)
6 | and is not maintained here anymore.
7 |
8 | ## Template license
9 |
10 | Written in 2017 by Lightbend, Inc.
11 |
12 | To the extent possible under law, the author(s) have dedicated all copyright and related and
13 | neighboring rights to this template to the public domain worldwide. This template is distributed
14 | without any warranty. See http://creativecommons.org/publicdomain/zero/1.0/.
--------------------------------------------------------------------------------
/build.sbt:
--------------------------------------------------------------------------------
1 | // This build is for this Giter8 template.
2 | // To test the template run `g8` or `g8Test` from the sbt session.
3 | // See http://www.foundweekends.org/giter8/testing.html#Using+the+Giter8Plugin for more details.
4 | lazy val root = (project in file("."))
5 | .settings(
6 | name := "akka-scala-seed",
7 | test in Test := {
8 | val _ = (g8Test in Test).toTask("").value
9 | },
10 | scriptedLaunchOpts ++= List("-Xms1024m", "-Xmx1024m", "-XX:ReservedCodeCacheSize=128m", "-XX:MaxPermSize=256m", "-Xss2m", "-Dfile.encoding=UTF-8")
11 | )
12 |
13 | // Documentation for this project:
14 | // sbt "project docs" "~ paradox"
15 | // open docs/target/paradox/site/main/index.html
16 | lazy val docs = (project in file("docs"))
17 | .enablePlugins(ParadoxPlugin)
18 |
19 | resolvers in ThisBuild += Resolver.url("typesafe", url("http://repo.typesafe.com/typesafe/ivy-releases/"))(Resolver.ivyStylePatterns)
20 |
--------------------------------------------------------------------------------
/docs/build.sbt:
--------------------------------------------------------------------------------
1 | // Uses the out of the box generic theme.
2 | paradoxTheme := Some(builtinParadoxTheme("generic"))
3 |
4 | scalaVersion := "2.12.6"
5 |
6 | paradoxProperties in Compile ++= Map(
7 | "snip.g8root.base_dir" -> "../../../../src/main/g8",
8 | "snip.g8src.base_dir" -> "../../../../src/main/g8/src/main/",
9 | "snip.g8srctest.base_dir" -> "../../../../src/main/g8/src/test/"
10 | )
--------------------------------------------------------------------------------
/docs/src/main/paradox/back-end.md:
--------------------------------------------------------------------------------
1 | # The Back-End Nodes
2 |
3 | The back-end nodes host the `Master` actor, which manages work, keeps track of available workers, and notifies registered workers when new work is available. The single `Master` actor is the heart of the solution, with built-in resilience provided by the [Akka Cluster Singleton](http://doc.akka.io/docs/akka/current/scala/guide/modules.html#cluster-singleton).
4 |
5 | ## The Master singleton
6 |
7 | The [Cluster Singleton](http://doc.akka.io/docs/akka/current/scala/guide/modules.html#cluster-singleton) tool in Akka makes sure an actor only runs concurrently on one node within the subset of nodes marked with the role `back-end` at any given time. It will run on the oldest back-end node. If the node on which the 'Master' is running is removed from the cluster, Akka starts a new `Master` on the next oldest node. Other nodes in the cluster interact with the `Master` through the `ClusterSingletonProxy` without knowing the explicit location. You can see this interaction in the `FrontEnd` and `Worker` actors.
8 |
9 | In case of the master node crashing and being removed from the cluster another master actor is automatically started on the new oldest node.
10 |
11 | 
12 |
13 | You can see how the master singleton is started in the method `startSingleton`
14 | in `MasterSingleton`:
15 |
16 | @@snip [MasterSingleton.scala]($g8src$/scala/worker/MasterSingleton.scala) { #singleton }
17 |
18 | The singleton accepts the `Prop`s of the actual singleton actor, as well as configuration which allows us to decide that the singleton actors should only run on the nodes with the role `back-end`.
19 |
20 | The proxy is similarly configured, with the role where the singleton will be running, and also a path where the singleton manager actor can be found:
21 |
22 | @@snip [MasterSingleton.scala]($g8src$/scala/worker/MasterSingleton.scala) { #proxy }
23 |
24 |
25 | The state of the master is recovered on the standby node in the case of the node being lost through event sourcing.
26 |
27 | An alternative to event sourcing and the singleton master would be to keep track of all jobs in a central database, but that is more complicated and not as scalable. In the end of the tutorial we will describe how multiple masters can be supported with a small adjustment.
28 |
29 | Let's now explore the implementation of the `Master` actor in depth.
30 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/experimenting.md:
--------------------------------------------------------------------------------
1 | ## Experimenting with the example
2 |
3 | When running the appliction without parameters it runs a six node cluster within the same JVM and starts a Cassandra database. It can be more interesting to run them in separate processes. Open four terminal windows.
4 |
5 | In the first terminal window, start the Cassandra database with the following command:
6 |
7 | ```bash
8 | sbt "runMain worker.Main cassandra"
9 | ```
10 |
11 | The Cassandra database will stay alive as long as you do not kill this process, when you want to stop it you can do that with `Ctrl + C`. Without the database the back-end nodes will not be able to start up.
12 |
13 | You could also run your own local installation of Cassandra given that it runs on the default port on localhost and does not require a password.
14 |
15 |
16 | With the database running, go to the second terminal window and start the first seed node with the following command:
17 |
18 | ```bash
19 | sbt "runMain worker.Main 2551"
20 | ```
21 |
22 | 2551 corresponds to the port of the first seed-nodes element in the configuration. In the log output you see that the cluster node has been started and changed status to 'Up'.
23 |
24 | In the third terminal window, start the front-end node with the following command:
25 |
26 | ```bash
27 | sbt "runMain worker.Main 3001"
28 | ```
29 |
30 | 3001 is to the port of the node. In the log output you see that the cluster node has been started and joins the 2551 node and becomes a member of the cluster. Its status changed to 'Up'.
31 |
32 | Switch over to the second terminal window and see in the log output that the member joined. So far, no `Worker` has not been started, i.e. jobs are produced and accepted but not processed.
33 |
34 | In the fourth terminal window, start a worker node with the following command:
35 |
36 | ```bash
37 | sbt "runMain worker.Main 5001 3"
38 | ```
39 |
40 | 5001 means the node will be a worker node, and the second parameter `3` means that it will host three worker actors.
41 |
42 | Look at the log output in the different terminal windows. In the second window (front-end) you should see that the produced jobs are processed and logged as `"Consumed result"`.
43 |
44 | Take a look at the logging that is done in `WorkProducer`, `Master` and `Worker`. Identify the corresponding log entries in the 3 terminal windows with Akka nodes.
45 |
46 | Shutdown the worker node (fourth terminal window) with `ctrl-c`. Observe how the `"Consumed result"` logs in the front-end node (second terminal window) stops. Start the worker node again.
47 |
48 | ```bash
49 | sbt "runMain worker.Main 5001 3"
50 | ```
51 |
52 | You can also start more such worker nodes in new terminal windows.
53 |
54 | You can start more cluster back-end nodes using port numbers between 2000-2999.
55 |
56 | ```bash
57 | sbt "runMain worker.Main 2552"
58 | ```
59 |
60 | The nodes with port 2551 to 2554 are configured to be used as "seed nodes" in this sample, if you shutdown all or start none of these the other nodes will not know how to join the cluster. If all four are shut down and 2551 is started it will join itself and form a new cluster.
61 |
62 | As long as one of the four nodes is alive the cluster will keep working. You can read more about this in the [Akka documentation section on seed nodes](http://doc.akka.io/docs/akka/current/scala/cluster-usage.html).
63 |
64 | You can start more cluster front-end nodes using port numbers between 3000-3999:
65 |
66 | ```bash
67 | sbt "runMain worker.Main 3002"
68 | ```
69 |
70 | Any port outside these ranges creates a worker node, for which you can also play around with the number of worker actors on using the second parameter.
71 |
72 | ```bash
73 | sbt "runMain worker.Main 5009 4"
74 | ```
75 |
76 | ## The journal
77 |
78 | The files of the Cassandra database are saved in the target directory and when you restart the application the state is recovered. You can clean the state with:
79 |
80 | ```bash
81 | sbt clean
82 | ```
--------------------------------------------------------------------------------
/docs/src/main/paradox/front-end.md:
--------------------------------------------------------------------------------
1 | # Front-End Nodes
2 |
3 | Typically in systems built with Akka, clients submit requests using a RESTful API. Either [Akka HTTP](http://doc.akka.io/docs/akka-http/current/scala/http/introduction.html) or [Play Framework](https://www.playframework.com) are great choices for implementing an HTTP API for the front-end. To limit the scope of this example, we have chosen to emulate client activity with two ordinary actors:
4 |
5 | * The `FrontEnd` actor generates payloads at random intervals and sends them to the 'Master' actor.
6 | * The `WorkResultConsumerActor` that consumes results and logs them.
7 |
8 |
9 | The `FrontEnd` actor only concerns itself with posting workloads, and does not care when the work has been completed. When a workload has been processed successfully and passed to the `Master` actor it publishes the result to all interested cluster nodes through Distributed Pub-Sub.
10 |
11 | The `WorkResultConsumerActor` subscribes to the completion events and logs when a workload has completed.
12 |
13 | Now, let's take a look at the code that accomplishes this front-end behavior.
14 |
15 | ## The Front-end Actor
16 |
17 | @@snip [FrontEnd.scala]($g8src$/scala/worker/FrontEnd.scala) { #front-end }
18 |
19 | Note in the source code that as the 'FrontEnd' actor starts up, it:
20 |
21 | 1. Schedules 'Tick' messages to itself.
22 | 1. Each 'Tick' message:
23 | 1. Triggers creation of a new 'Work' message.
24 | 1. Sends the 'Work' message to the 'Master' actor of a 'back-end' node.
25 | 1. Switches to a new 'busy' behavior.
26 |
27 | As you can see the `FrontEnd` actor schedules `Tick` messages to itself when starting up. the `Tick` message then triggers creation of a new `Work`, sending the work to the `Master` actor on a `back-end` node and switching to a new `busy` behavior.
28 |
29 | The cluster contains one `Master` actor. The `FrontEnd` actor does not need to know the exact location because it sends work to the master using the `ClusterSingletonProxy`.
30 |
31 | The 'Master' actor can accept or deny a work request and we need to deal with unexpected errors:
32 |
33 | * If the 'Master' accepts the request, the actor schedules a new tick to itself and toggles back to the idle behavior.
34 | * To deal with failures, the message uses the [ask pattern](http://doc.akka.io/docs/akka/current/scala/actors.html#ask-send-and-receive-future) to add a timeout to wait for a reply. If the timeout expires before the master responds, the returned 'Future' fails with an akka.pattern.AskTimeoutException.
35 | * We transform timeouts or denials from the 'Master' into a 'NotOK' value. The 'Future' is piped to the 'FrontEnd' actor as a message with the completed value, either the successful result, or 'NotOK'. If the work is not accepted or there is no response, for example if the message or response got lost, the FrontEnd actor backs off a bit and then sends the work again.
36 |
37 |
38 | The future is then `pipe`d to the actor itself, meaning that when it completes the value it is completed with is sent to the actor as a message.
39 |
40 | When a workload has been acknowledged by the master, the actor schedules a new tick to itself and toggles back to the `idle` behavior.
41 |
42 | If the work is not accepted or there is no response, for example if the message or response got lost, the `FrontEnd` actor backs off a bit and then sends the work again.
43 |
44 | You can see the how the actors on a front-end node is started in the method `Main.startFrontEnd`:
45 |
46 | @@snip [Main.scala]($g8src$/scala/worker/Main.scala) { #front-end }
47 |
48 | ## The Work Result Consumer Actor
49 | As mentioned in the introduction, results are published using Distributed Pub-Sub. The 'WorkResultConsumerActor' subscribes to completion events and logs when a workload has completed:
50 |
51 | @@snip [Main.scala]($g8src$/scala/worker/WorkResultConsumer.scala) { #work-result-consumer }
52 |
53 | In an actual application you would probably want a way for clients to poll or stream the status changes of the submitted work.
54 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/images/cluster-nodes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akka/akka-distributed-workers-scala.g8/55fb750cfafdfec68444c967d047f1f72a5ac231/docs/src/main/paradox/images/cluster-nodes.png
--------------------------------------------------------------------------------
/docs/src/main/paradox/images/cluster-nodes.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/images/master-worker-message-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akka/akka-distributed-workers-scala.g8/55fb750cfafdfec68444c967d047f1f72a5ac231/docs/src/main/paradox/images/master-worker-message-flow.png
--------------------------------------------------------------------------------
/docs/src/main/paradox/images/master-worker-message-flow.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/images/singleton-manager.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akka/akka-distributed-workers-scala.g8/55fb750cfafdfec68444c967d047f1f72a5ac231/docs/src/main/paradox/images/singleton-manager.png
--------------------------------------------------------------------------------
/docs/src/main/paradox/images/singleton-manager.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/index.md:
--------------------------------------------------------------------------------
1 | # Akka Distributed Workers with Scala Guide
2 |
3 | Akka is a toolkit and runtime for building highly concurrent, distributed, and fault-tolerant event-driven applications on the JVM. Akka can be used with both Java and Scala.
4 |
5 | This guide introduces Akka clusters by describing the Scala version of a distributed workers example.
6 |
7 | A Java version of the guide is not yet available but will be soon, so check back in a while!
8 |
9 | The guide contains advanced usage of Akka and requires familiarity with Akka and Actors. If you have no previous experience with Actors you should start with the [Akka Quickstart with Scala](http://developer.lightbend.com/guides/akka-quickstart-scala/), which goes through the basics.
10 |
11 | ## Example overview
12 |
13 | To be reactive, distributed applications must deal gracefully with temporary and prolonged outages as well as have the ability to scale up and down to make the best use of resources. Akka clustering provides these capabilities so that you don't have to implement them yourself. The distributed workers example demonstrates the following Akka clustering capabilities:
14 |
15 | * elastic addition and removal of the front-end actors that accept client requests
16 | * elastic addition and removal of the back-end actors that perform the work distribution of actors across different nodes
17 | * how jobs are re-tried in the face of failures
18 |
19 | But before we dive into how the example accomplishes these goals, download the example and try it out!
20 |
21 | The design is based on Derek Wyatt's blog post [Balancing Workload Across Nodes with Akka 2](http://letitcrash.com/post/29044669086/balancing-workload-across-nodes-with-akka-2) from 2009, which is a bit old, but still a good description of the advantages of letting the workers pull work from the master instead of pushing work to the workers.
22 |
23 | ## Downloading the example
24 |
25 | The Akka Distributed Workers example for Scala is a zipped project that includes a distribution of sbt (build tool). You can run it on Linux, MacOS, or Windows. The only prerequisite is Java 8.
26 |
27 | Download and unzip the example:
28 |
29 | 1. Download the zip file from [Lightbend Tech Hub](https://developer.lightbend.com/start/?group=akka&project=akka-distributed-workers-scala) by clicking `CREATE A PROJECT FOR ME`.
30 | 1. Extract the zip file to a convenient location:
31 | - On Linux and MacOS systems, open a terminal and use the command `unzip akka-distributed-workers-scala.zip`. Note: On MacOS, if you unzip using Archiver, you also have to make the sbt files executable:
32 | ```
33 | $ chmod u+x ./sbt
34 | $ chmod u+x ./sbt-dist/bin/sbt
35 | ```
36 | - On Windows, use a tool such as File Explorer to extract the project.
37 |
38 | ## Running the example
39 |
40 | To run the sample application, which starts a small cluster inside of the same JVM instance:
41 |
42 | 1. In a console, change directories to the top level of the unzipped project.
43 |
44 | For example, if you used the default project name, akka-distributed-workers-scala, and extracted the project to your root directory,
45 | from the root directory, enter: `cd akka-distributed-workers-scala`
46 |
47 | 1. Enter `./sbt` on MacOS/Linux or `sbt.bat` on Windows to start sbt.
48 |
49 | sbt downloads project dependencies. The `>` prompt indicates sbt has started in interactive mode.
50 |
51 | 1. At the sbt prompt, enter `run`.
52 |
53 | sbt builds the project and runs the `Main` of the project:
54 |
55 | After waiting a few seconds for the cluster to form the output should start look _something_ like this (scroll all the way to the right to see the Actor output):
56 |
57 | ```
58 | [INFO] [07/21/2017 17:41:53.320] [ClusterSystem-akka.actor.default-dispatcher-16] [akka.tcp://ClusterSystem@127.0.0.1:51983/user/producer] Produced work: 3
59 | [INFO] [07/21/2017 17:41:53.322] [ClusterSystem-akka.actor.default-dispatcher-3] [akka.tcp://ClusterSystem@127.0.0.1:2551/user/master/singleton] Accepted work: 3bce4d6d-eaae-4da6-b316-0c6f566f2399
60 | [INFO] [07/21/2017 17:41:53.328] [ClusterSystem-akka.actor.default-dispatcher-3] [akka.tcp://ClusterSystem@127.0.0.1:2551/user/master/singleton] Giving worker 2b646020-6273-437c-aa0d-4aad6f12fb47 some work 3bce4d6d-eaae-4da6-b316-0c6f566f2399
61 | [INFO] [07/21/2017 17:41:53.328] [ClusterSystem-akka.actor.default-dispatcher-2] [akka.tcp://ClusterSystem@127.0.0.1:51980/user/worker] Got work: 3
62 | [INFO] [07/21/2017 17:41:53.328] [ClusterSystem-akka.actor.default-dispatcher-16] [akka.tcp://ClusterSystem@127.0.0.1:51980/user/worker] Work is complete. Result 3 * 3 = 9.
63 | [INFO] [07/21/2017 17:41:53.329] [ClusterSystem-akka.actor.default-dispatcher-19] [akka.tcp://ClusterSystem@127.0.0.1:2551/user/master/singleton] Work 3bce4d6d-eaae-4da6-b316-0c6f566f2399 is done by worker 2b646020-6273-437c-aa0d-4aad6f12fb47
64 | ```
65 |
66 | Congratulations, you just ran your first Akka Cluster app. Now take a look at what happened under the covers.
67 |
68 | ## What happens when you run it
69 |
70 | When `Main` is run without any parameters, it starts six `ActorSystem`s in the same JVM. These six `ActorSystem`s form a single cluster. The six nodes include two each that perform front-end, back-end, and worker tasks:
71 |
72 | * The front-end nodes simulate an external interface, such as a REST API, that accepts workloads from clients.
73 | * The worker nodes have worker actors that accept and process workloads.
74 | * The back-end nodes contain a Master actor that coordinates workloads, keeps track of the workers, and delegates work to available workers. One of the nodes is active and one is on standby. If the active Master goes down, the standby takes over.
75 |
76 | A bird's eye perspective of the architecture looks like this:
77 |
78 | 
79 |
80 | Let's look at the details of each part of the application, starting with the front-end.
81 |
82 | @@@index
83 |
84 | * [The Front-end Nodes](front-end.md)
85 | * [The Back-end Nodes](back-end.md)
86 | * [The Master Actor in Detail](master-in-detail.md)
87 | * [The Worker Nodes](worker.md)
88 | * [Experimenting with the example](experimenting.md)
89 | * [Next Steps](next-steps.md)
90 |
91 | @@@
92 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/master-in-detail.md:
--------------------------------------------------------------------------------
1 | # The Master Actor in Detail
2 |
3 | The `Master` actor is without question the most involved component in this example. This is because it is designed to deal with failures. While the Akka cluster takes care of restarting the `Master` in case of a failure, we also want to make sure that the new `Master` can arrive at the same state as the failed `Master`. We use event sourcing and Akka Persistence to achieve this.
4 |
5 |
6 |
7 | The `Master` actor is it without question the most involved component in the guide.
8 |
9 | If the `back-end` node hosting the `Master` actor would crash the Akka Cluster Singleton makes sure it starts up on a different node, but we would also want it to reach the exact same state as the crashed node `Master`. This is achieved through use of event sourcing and [Akka Persistence](http://doc.akka.io/docs/akka/current/scala/persistence.html).
10 |
11 | ## Tracking current work items
12 |
13 | The current set of work item is modelled in the `WorkState` class. It keeps track of the current set of work that is pending, has been accepted by a worker, has completed etc. Every change to the `WorkState` is modelled as a domain event:
14 |
15 | @@snip [WorkState.scala]($g8src$/scala/worker/WorkState.scala) { #events }
16 |
17 | This allows us to capture and store each such event that happens, we can later replay all of them on an empty model and arrive at the exact same state. This is how event sourcing and [Akka Persistence](http://doc.akka.io/docs/akka/current/scala/persistence.html) allows the actor to start on any node and reach the same state as a previous instance.
18 |
19 | If the `Master` fails and is restarted, the replacement `Master` replays events from the log to retrieve the current state. This means that when the WorkState is modified, the `Master` must persist the event before acting on it. When the event is successfully stored, we can modify the state. Otherwise, if a failure occurs before the event is persisted, the replacement `Master` will not be able to attain the same state as the failed `Master`.
20 |
21 | Let's look at how a command to process a work item from the front-end comes in:
22 |
23 | @@snip [Master.scala]($g8src$/scala/worker/Master.scala) { #persisting }
24 |
25 | The first thing you might notice is the comment saying _idempotent_, this means that the same work message may arrive multiple times, but regardless how many times the same work arrives, it should only be executed once. This is needed since the `FrontEnd` actor re-sends work in case of the `Work` or `Ack` messages getting lost (Akka does not provide any guarantee of delivery, [see details in the docs](http://doc.akka.io/docs/akka/current/scala/general/message-delivery-reliability.html#discussion-why-no-guaranteed-delivery-)).
26 |
27 | To make the logic idempotent we simple check if the work id is already known, and if it is we simply `Ack` it without further logic. If the work is previously unknown, we start by transforming it into a `WorkAccepted` event, which we persist, and only in the `handler`-function passed to `persist` do we actually update the `workState`, send an `Ack` back to the `FrontEnd` and trigger a search for available workers.
28 |
29 |
30 | ## Implementation items required for Akka Persistence
31 |
32 | In a "normal" Actor the only thing we have to do is to implement `receive`, which is then invoked for each incoming message. In a `PersistentActor` there are three things that needs to be implemented:
33 |
34 | 1. `persistenceId` is a global identifier for the actor, we must make sure that there is never more than one Actor instance with the same `persistenceId` running globally, or else we would possibly mess up its journal.
35 | 1. `receiveCommand` corresponds to the `receive` method of regular actors. Messages sent to the actor end up here.
36 | 1. `receiveRecover` is invoked with the recorded events of the actor when it starts up
37 |
38 | ## Tracking workers
39 |
40 | Unlike the `Master` actor, the example system contains multiple workers that can be stopped and restarted frequently. We do not need to save their state since the `Master` is tracking work and will simply send work to another worker if the original fails to respond. So, rather than persisting a list of available workers, the example uses the following strategy:
41 |
42 | * Running workers periodically register with the master using a `RegisterWorker` message. If a `back-end` node fails and the `Master` is started on a new node, the registrations go automatically to the new node.
43 | * Any type of failure -- whether from the network, worker actor, or node -- that prevents a `RegisterWorker` message from arriving within the `work-timeout` period causes the 'Master' actor to remove the worker from its list.
44 |
45 | @@snip [Master.scala]($g8src$/scala/worker/Master.scala) { #pruning }
46 |
47 | When stopping a `Worker` Actor still tries to gracefully remove it self using the `DeRegisterWorker` message, but in case of crash it will have no chance to communicate that with the master node.
48 |
49 | @@snip [Master.scala]($g8src$/scala/worker/Master.scala) { #graceful-remove }
50 |
51 | Now let's move on to the last piece of the puzzle, the worker nodes.
--------------------------------------------------------------------------------
/docs/src/main/paradox/next-steps.md:
--------------------------------------------------------------------------------
1 | # Next Steps
2 |
3 | The following are some ideas where to take this sample next. Implementation of each idea is left up to you.
4 |
5 | ## Using a different serializer
6 |
7 | To simplify things in this sample the default Java Serializer was used but in a real application it should not be used. For passing messages across the network the Java Serialization has serious security implications (worst case ability to remotely execute code) and does not have good performance characteristics. For storing the domain events in a persistent journal, there is also the problem with how to deal with versioning, which is hard if not impossible using Java Serialization.
8 |
9 | A few options to look into are listed in the [Akka Docs section on Serialization](http://doc.akka.io/docs/akka/current/scala/serialization.html#external-akka-serializers)
10 |
11 | ## A HTTP Based API
12 |
13 | The `FrontEnd` in this sample is a dummy that automatically generates work. A real application could for example use [Akka HTTP](http://doc.akka.io/docs/akka-http/current/scala/http/introduction.html) to provide a HTTP REST (or other) API for external clients.
14 |
15 | ## Scaling better with many masters
16 |
17 | If the singleton master becomes a bottleneck we could start several master actors and shard the jobs among them. This could be achieved by using [Akka Cluster Sharding](http://doc.akka.io/docs/akka/current/scala/cluster-sharding.html) with many `Master` actors as entities and a hash of some sort on the payload deciding which master it should go to.
18 |
19 | ## More tools for building distributed systems
20 |
21 | In this example we have used
22 | [Cluster Singleton](http://doc.akka.io/docs/akka/current/scala/cluster-singleton.html#cluster-singleton)
23 | and
24 | [Distributed Publish Subscribe](http://doc.akka.io/docs/akka/current/scala/distributed-pub-sub.html)
25 | but those are not the only tools in Akka Cluster.
26 |
27 | You can also find a good overview of the various modules that make up Akka in
28 | [this section of the official documentation](http://doc.akka.io/docs/akka/current/scala/guide/modules.html#cluster-singleton)
--------------------------------------------------------------------------------
/docs/src/main/paradox/worker.md:
--------------------------------------------------------------------------------
1 | # The Worker Nodes
2 |
3 | `Worker` actors and the `Master` actor interact as follows:
4 |
5 | 1. `Worker` actors register with the `Master` so the master knows they are available and ready to take on work.
6 | 1. When the `Master` actor has work, it sends a 'WorkIsReady' message to all workers it things are not busy.
7 | `Worker` actors that are `idle` will reply to the `Master` with a `WorkerRequestsWork` message.
8 | 1. The `Master` picks the first reply and assigns the work to that worker. This achieves back pressure because the `Master` does not push work on workers that are already busy and overwhelm their mailboxes.
9 | 1. When the worker receives work from the master, it delegates the actual processing to a child actor, `WorkExecutor`. This allows the worker to be responsive while its child executes the work.
10 |
11 | 
12 |
13 | You can see how a worker node and a number of worker actors is started in the method `Main.startWorker`:
14 |
15 | @@snip [Main.scala]($g8src$/scala/worker/Main.scala) { #worker }
16 |
17 | Now that we have covered all the details, we can experiment with different sets of nodes for the cluster.
--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=0.13.16
2 |
--------------------------------------------------------------------------------
/project/default.properties:
--------------------------------------------------------------------------------
1 | name=akka-distributed-workers-scala
2 | description=Akka is a toolkit and runtime for building highly concurrent, distributed, and fault tolerant event-driven apps. This application will get you started building distributed systems with Scala.
--------------------------------------------------------------------------------
/project/giter8.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("org.foundweekends.giter8" %% "sbt-giter8" % "0.11.0")
2 |
--------------------------------------------------------------------------------
/project/paradox.sbt:
--------------------------------------------------------------------------------
1 | // sbt-paradox, used for documentation
2 | addSbtPlugin("com.lightbend.paradox" % "sbt-paradox" % "0.2.11")
--------------------------------------------------------------------------------
/src/main/g8/build.sbt:
--------------------------------------------------------------------------------
1 | name := "akka-distributed-workers"
2 |
3 | version := "1.0"
4 |
5 | scalaVersion := "$scala_version$"
6 | lazy val akkaVersion = "$akka_version$"
7 | lazy val cassandraPluginVersion = "$akka_persistence_cassandra_version$"
8 |
9 | fork in Test := true
10 |
11 | libraryDependencies ++= Seq(
12 | "com.typesafe.akka" %% "akka-cluster" % akkaVersion,
13 | "com.typesafe.akka" %% "akka-cluster-tools" % akkaVersion,
14 | "com.typesafe.akka" %% "akka-persistence" % akkaVersion,
15 | "com.typesafe.akka" %% "akka-persistence-cassandra" % cassandraPluginVersion,
16 | // this allows us to start cassandra from the sample
17 | "com.typesafe.akka" %% "akka-persistence-cassandra-launcher" % cassandraPluginVersion,
18 |
19 | "com.typesafe.akka" %% "akka-slf4j" % akkaVersion,
20 | "ch.qos.logback" % "logback-classic" % "1.2.3",
21 |
22 | // test dependencies
23 | "com.typesafe.akka" %% "akka-testkit" % akkaVersion % "test",
24 | "org.scalatest" %% "scalatest" % "3.0.1" % "test",
25 | "commons-io" % "commons-io" % "2.4" % "test")
26 |
--------------------------------------------------------------------------------
/src/main/g8/default.properties:
--------------------------------------------------------------------------------
1 | name=akka-distributed-workers-scala
2 | description=Akka is a toolkit and runtime for building highly concurrent, distributed, and fault tolerant event-driven apps. This application will get you started building distributed systems with Scala.
3 | verbatim=*.scala
4 | scala_version=2.12.7
5 | akka_version=2.5.26
6 | akka_persistence_cassandra_version=maven(com.typesafe.akka, akka-persistence-cassandra_2.12)
7 |
--------------------------------------------------------------------------------
/src/main/g8/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=0.13.16
2 |
--------------------------------------------------------------------------------
/src/main/g8/src/main/resources/application.conf:
--------------------------------------------------------------------------------
1 | # This is the main configuration file for our application, it provides overrides to the default values
2 | # provided in the reference.conf of the modules from Akka
3 | akka {
4 | actor {
5 | # Must be set like this to use Akka Cluster
6 | provider = cluster
7 |
8 | # Only for convenience in the quickstart, Java serialization should not be used for actual applications
9 | warn-about-java-serializer-usage = off
10 | }
11 |
12 | # Use slf4j (backed by logback) for logging, additional configuration
13 | # can be done in logback.xml
14 | loggers = ["akka.event.slf4j.Slf4jLogger"]
15 | logging-filter = "akka.event.slf4j.Slf4jLoggingFilter"
16 | loglevel = INFO
17 |
18 | # For the sample, just bind to loopback and do not allow access from the network
19 | remote.netty.tcp.hostname=127.0.0.1
20 | # the port is overridden by the logic in Main.scala
21 | remote.netty.tcp.port=0
22 |
23 | cluster {
24 | # Seed nodes are a way to have a node join the cluster (or form a new cluster) from configuration.
25 | seed-nodes = [
26 | "akka.tcp://ClusterSystem@127.0.0.1:2551",
27 | "akka.tcp://ClusterSystem@127.0.0.1:2552",
28 | "akka.tcp://ClusterSystem@127.0.0.1:2553",
29 | "akka.tcp://ClusterSystem@127.0.0.1:2554"]
30 |
31 | # Only for convenience in the quickstart, auto-downing should not be used for actual applications.
32 | # Read more here: http://doc.akka.io/docs/akka/current/scala/cluster-usage.html#auto-downing-do-not-use-
33 | auto-down-unreachable-after = 10s
34 |
35 | # Needed when running many actor systems in the same JVM
36 | jmx.multi-mbeans-in-same-jvm = on
37 | }
38 |
39 | # use Cassandra to store both snapshots and the events of the persistent actors
40 | persistence {
41 | journal.plugin = "cassandra-journal"
42 | snapshot-store.plugin = "cassandra-snapshot-store"
43 | }
44 |
45 | # Run the pubsub mediator on all nodes, without any code starting it up
46 | extensions = ["akka.cluster.pubsub.DistributedPubSub"]
47 | }
48 |
49 | # Configuration related to the app is in its own namespace
50 | distributed-workers {
51 | # Each worker pings the master with this interval
52 | # to let it know that it is alive
53 | worker-registration-interval = 10s
54 | # If a worker hasn't gotten in touch in this long
55 | # it is removed from the set of workers
56 | consider-worker-dead-after = 60s
57 |
58 | # If a workload hasn't finished in this long it
59 | # is considered failed and is retried
60 | work-timeout = 10s
61 |
62 | }
--------------------------------------------------------------------------------
/src/main/g8/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/src/main/g8/src/main/scala/worker/FrontEnd.scala:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | import java.util.UUID
4 | import java.util.concurrent.ThreadLocalRandom
5 |
6 | import akka.actor.{Actor, ActorLogging, Cancellable, Props, Timers}
7 | import akka.pattern._
8 | import akka.util.Timeout
9 |
10 | import scala.concurrent.duration._
11 |
12 | /**
13 | * Dummy front-end that periodically sends a workload to the master.
14 | */
15 | object FrontEnd {
16 |
17 | def props: Props = Props(new FrontEnd)
18 |
19 | private case object NotOk
20 | private case object Tick
21 | private case object Retry
22 | }
23 |
24 | // #front-end
25 | class FrontEnd extends Actor with ActorLogging with Timers {
26 | import FrontEnd._
27 | import context.dispatcher
28 |
29 | val masterProxy = context.actorOf(
30 | MasterSingleton.proxyProps(context.system),
31 | name = "masterProxy")
32 |
33 | var workCounter = 0
34 |
35 | def nextWorkId(): String = UUID.randomUUID().toString
36 |
37 | override def preStart(): Unit = {
38 | timers.startSingleTimer("tick", Tick, 5.seconds)
39 | }
40 |
41 | def receive = idle
42 |
43 | def idle: Receive = {
44 | case Tick =>
45 | workCounter += 1
46 | log.info("Produced work: {}", workCounter)
47 | val work = Work(nextWorkId(), workCounter)
48 | context.become(busy(work))
49 | }
50 |
51 | def busy(workInProgress: Work): Receive = {
52 | sendWork(workInProgress)
53 |
54 | {
55 | case Master.Ack(workId) =>
56 | log.info("Got ack for workId {}", workId)
57 | val nextTick = ThreadLocalRandom.current.nextInt(3, 10).seconds
58 | timers.startSingleTimer(s"tick", Tick, nextTick)
59 | context.become(idle)
60 |
61 | case NotOk =>
62 | log.info("Work {} not accepted, retry after a while", workInProgress.workId)
63 | timers.startSingleTimer("retry", Retry, 3.seconds)
64 |
65 | case Retry =>
66 | log.info("Retrying work {}", workInProgress.workId)
67 | sendWork(workInProgress)
68 | }
69 | }
70 |
71 | def sendWork(work: Work): Unit = {
72 | implicit val timeout = Timeout(5.seconds)
73 | (masterProxy ? work).recover {
74 | case _ => NotOk
75 | } pipeTo self
76 | }
77 |
78 | }
79 | // #front-end
--------------------------------------------------------------------------------
/src/main/g8/src/main/scala/worker/Main.scala:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | import java.io.File
4 | import java.util.concurrent.CountDownLatch
5 |
6 | import akka.actor.ActorSystem
7 | import akka.persistence.cassandra.testkit.CassandraLauncher
8 | import com.typesafe.config.{Config, ConfigFactory}
9 |
10 | object Main {
11 |
12 | // note that 2551 and 2552 are expected to be seed nodes though, even if
13 | // the back-end starts at 2000
14 | val backEndPortRange = 2000 to 2999
15 |
16 | val frontEndPortRange = 3000 to 3999
17 |
18 | def main(args: Array[String]): Unit = {
19 | args.headOption match {
20 |
21 | case None =>
22 | startClusterInSameJvm()
23 |
24 | case Some(portString) if portString.matches("""\d+""") =>
25 | val port = portString.toInt
26 | if (backEndPortRange.contains(port)) startBackEnd(port)
27 | else if (frontEndPortRange.contains(port)) startFrontEnd(port)
28 | else startWorker(port, args.lift(1).map(_.toInt).getOrElse(1))
29 |
30 | case Some("cassandra") =>
31 | startCassandraDatabase()
32 | println("Started Cassandra, press Ctrl + C to kill")
33 | new CountDownLatch(1).await()
34 |
35 | }
36 | }
37 |
38 | def startClusterInSameJvm(): Unit = {
39 | startCassandraDatabase()
40 |
41 | // two backend nodes
42 | startBackEnd(2551)
43 | startBackEnd(2552)
44 | // two front-end nodes
45 | startFrontEnd(3000)
46 | startFrontEnd(3001)
47 | // two worker nodes with two worker actors each
48 | startWorker(5001, 2)
49 | startWorker(5002, 2)
50 | }
51 |
52 | /**
53 | * Start a node with the role backend on the given port. (This may also
54 | * start the shared journal, see below for details)
55 | */
56 | def startBackEnd(port: Int): Unit = {
57 | val system = ActorSystem("ClusterSystem", config(port, "back-end"))
58 | MasterSingleton.startSingleton(system)
59 | }
60 |
61 | /**
62 | * Start a front end node that will submit work to the backend nodes
63 | */
64 | // #front-end
65 | def startFrontEnd(port: Int): Unit = {
66 | val system = ActorSystem("ClusterSystem", config(port, "front-end"))
67 | system.actorOf(FrontEnd.props, "front-end")
68 | system.actorOf(WorkResultConsumer.props, "consumer")
69 | }
70 | // #front-end
71 |
72 | /**
73 | * Start a worker node, with n actual workers that will accept and process workloads
74 | */
75 | // #worker
76 | def startWorker(port: Int, workers: Int): Unit = {
77 | val system = ActorSystem("ClusterSystem", config(port, "worker"))
78 | val masterProxy = system.actorOf(
79 | MasterSingleton.proxyProps(system),
80 | name = "masterProxy")
81 |
82 | (1 to workers).foreach(n =>
83 | system.actorOf(Worker.props(masterProxy), s"worker-$n")
84 | )
85 | }
86 | // #worker
87 |
88 | def config(port: Int, role: String): Config =
89 | ConfigFactory.parseString(s"""
90 | akka.remote.netty.tcp.port=$port
91 | akka.cluster.roles=[$role]
92 | """).withFallback(ConfigFactory.load())
93 |
94 | /**
95 | * To make the sample easier to run we kickstart a Cassandra instance to
96 | * act as the journal. Cassandra is a great choice of backend for Akka Persistence but
97 | * in a real application a pre-existing Cassandra cluster should be used.
98 | */
99 | def startCassandraDatabase(): Unit = {
100 | val databaseDirectory = new File("target/cassandra-db")
101 | CassandraLauncher.start(
102 | databaseDirectory,
103 | CassandraLauncher.DefaultTestConfigResource,
104 | clean = false,
105 | port = 9042
106 | )
107 |
108 | // shut the cassandra instance down when the JVM stops
109 | sys.addShutdownHook {
110 | CassandraLauncher.stop()
111 | }
112 | }
113 |
114 | }
115 |
--------------------------------------------------------------------------------
/src/main/g8/src/main/scala/worker/Master.scala:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | import akka.actor.{ActorLogging, ActorRef, Cancellable, Props, Timers}
4 | import akka.cluster.pubsub.{DistributedPubSub, DistributedPubSubMediator}
5 | import akka.persistence.{PersistentActor, RecoveryCompleted, SnapshotOffer}
6 |
7 | import scala.concurrent.duration.{Deadline, FiniteDuration, _}
8 |
9 | /**
10 | * The master actor keep tracks of all available workers, and all scheduled and ongoing work items
11 | */
12 | object Master {
13 |
14 | val ResultsTopic = "results"
15 |
16 | def props(workTimeout: FiniteDuration): Props =
17 | Props(new Master(workTimeout))
18 |
19 | case class Ack(workId: String)
20 |
21 | private sealed trait WorkerStatus
22 | private case object Idle extends WorkerStatus
23 | private case class Busy(workId: String, deadline: Deadline) extends WorkerStatus
24 | private case class WorkerState(ref: ActorRef, status: WorkerStatus, staleWorkerDeadline: Deadline)
25 |
26 | private case object CleanupTick
27 |
28 | }
29 |
30 | class Master(workTimeout: FiniteDuration) extends Timers with PersistentActor with ActorLogging {
31 | import Master._
32 | import WorkState._
33 | import context.dispatcher
34 |
35 | override val persistenceId: String = "master"
36 |
37 | val considerWorkerDeadAfter: FiniteDuration =
38 | context.system.settings.config.getDuration("distributed-workers.consider-worker-dead-after").getSeconds.seconds
39 | def newStaleWorkerDeadline(): Deadline = considerWorkerDeadAfter.fromNow
40 |
41 | timers.startPeriodicTimer("cleanup", CleanupTick, workTimeout / 2)
42 |
43 | val mediator: ActorRef = DistributedPubSub(context.system).mediator
44 |
45 | // the set of available workers is not event sourced as it depends on the current set of workers
46 | private var workers = Map[String, WorkerState]()
47 |
48 | // workState is event sourced to be able to make sure work is processed even in case of crash
49 | private var workState = WorkState.empty
50 |
51 |
52 | override def receiveRecover: Receive = {
53 |
54 | case SnapshotOffer(_, workStateSnapshot: WorkState) =>
55 | // If we would have logic triggering snapshots in the actor
56 | // we would start from the latest snapshot here when recovering
57 | log.info("Got snapshot work state")
58 | workState = workStateSnapshot
59 |
60 | case event: WorkDomainEvent =>
61 | // only update current state by applying the event, no side effects
62 | workState = workState.updated(event)
63 | log.info("Replayed {}", event.getClass.getSimpleName)
64 |
65 | case RecoveryCompleted =>
66 | log.info("Recovery completed")
67 |
68 | }
69 |
70 | override def receiveCommand: Receive = {
71 | case MasterWorkerProtocol.RegisterWorker(workerId) =>
72 | if (workers.contains(workerId)) {
73 | workers += (workerId -> workers(workerId).copy(ref = sender(), staleWorkerDeadline = newStaleWorkerDeadline()))
74 | } else {
75 | log.info("Worker registered: {}", workerId)
76 | val initialWorkerState = WorkerState(
77 | ref = sender(),
78 | status = Idle,
79 | staleWorkerDeadline = newStaleWorkerDeadline())
80 | workers += (workerId -> initialWorkerState)
81 |
82 | if (workState.hasWork)
83 | sender() ! MasterWorkerProtocol.WorkIsReady
84 | }
85 |
86 | // #graceful-remove
87 | case MasterWorkerProtocol.DeRegisterWorker(workerId) =>
88 | workers.get(workerId) match {
89 | case Some(WorkerState(_, Busy(workId, _), _)) =>
90 | // there was a workload assigned to the worker when it left
91 | log.info("Busy worker de-registered: {}", workerId)
92 | persist(WorkerFailed(workId)) { event ⇒
93 | workState = workState.updated(event)
94 | notifyWorkers()
95 | }
96 | case Some(_) =>
97 | log.info("Worker de-registered: {}", workerId)
98 | case _ =>
99 | }
100 | workers -= workerId
101 | // #graceful-remove
102 |
103 | case MasterWorkerProtocol.WorkerRequestsWork(workerId) =>
104 | if (workState.hasWork) {
105 | workers.get(workerId) match {
106 | case Some(workerState @ WorkerState(_, Idle, _)) =>
107 | val work = workState.nextWork
108 | persist(WorkStarted(work.workId)) { event =>
109 | workState = workState.updated(event)
110 | log.info("Giving worker {} some work {}", workerId, work.workId)
111 | val newWorkerState = workerState.copy(
112 | status = Busy(work.workId, Deadline.now + workTimeout),
113 | staleWorkerDeadline = newStaleWorkerDeadline())
114 | workers += (workerId -> newWorkerState)
115 | sender() ! work
116 | }
117 | case _ =>
118 | }
119 | }
120 |
121 | case MasterWorkerProtocol.WorkIsDone(workerId, workId, result) =>
122 | // idempotent - redelivery from the worker may cause duplicates, so it needs to be
123 | if (workState.isDone(workId)) {
124 | // previous Ack was lost, confirm again that this is done
125 | sender() ! MasterWorkerProtocol.Ack(workId)
126 | } else if (!workState.isInProgress(workId)) {
127 | log.info("Work {} not in progress, reported as done by worker {}", workId, workerId)
128 | } else {
129 | log.info("Work {} is done by worker {}", workId, workerId)
130 | changeWorkerToIdle(workerId, workId)
131 | persist(WorkCompleted(workId, result)) { event ⇒
132 | workState = workState.updated(event)
133 | mediator ! DistributedPubSubMediator.Publish(ResultsTopic, WorkResult(workId, result))
134 | // Ack back to original sender
135 | sender ! MasterWorkerProtocol.Ack(workId)
136 | }
137 | }
138 |
139 | case MasterWorkerProtocol.WorkFailed(workerId, workId) =>
140 | if (workState.isInProgress(workId)) {
141 | log.info("Work {} failed by worker {}", workId, workerId)
142 | changeWorkerToIdle(workerId, workId)
143 | persist(WorkerFailed(workId)) { event ⇒
144 | workState = workState.updated(event)
145 | notifyWorkers()
146 | }
147 | }
148 |
149 | // #persisting
150 | case work: Work =>
151 | // idempotent
152 | if (workState.isAccepted(work.workId)) {
153 | sender() ! Master.Ack(work.workId)
154 | } else {
155 | log.info("Accepted work: {}", work.workId)
156 | persist(WorkAccepted(work)) { event ⇒
157 | // Ack back to original sender
158 | sender() ! Master.Ack(work.workId)
159 | workState = workState.updated(event)
160 | notifyWorkers()
161 | }
162 | }
163 | // #persisting
164 |
165 | // #pruning
166 | case CleanupTick =>
167 | workers.foreach {
168 | case (workerId, WorkerState(_, Busy(workId, timeout), _)) if timeout.isOverdue() =>
169 | log.info("Work timed out: {}", workId)
170 | workers -= workerId
171 | persist(WorkerTimedOut(workId)) { event ⇒
172 | workState = workState.updated(event)
173 | notifyWorkers()
174 | }
175 |
176 |
177 | case (workerId, WorkerState(_, Idle, lastHeardFrom)) if lastHeardFrom.isOverdue() =>
178 | log.info("Too long since heard from worker {}, pruning", workerId)
179 | workers -= workerId
180 |
181 | case _ => // this one is a keeper!
182 | }
183 | // #pruning
184 | }
185 |
186 | def notifyWorkers(): Unit =
187 | if (workState.hasWork) {
188 | workers.foreach {
189 | case (_, WorkerState(ref, Idle, _)) => ref ! MasterWorkerProtocol.WorkIsReady
190 | case _ => // busy
191 | }
192 | }
193 |
194 | def changeWorkerToIdle(workerId: String, workId: String): Unit =
195 | workers.get(workerId) match {
196 | case Some(workerState @ WorkerState(_, Busy(`workId`, _), _)) ⇒
197 | val newWorkerState = workerState.copy(status = Idle, staleWorkerDeadline = newStaleWorkerDeadline())
198 | workers += (workerId -> newWorkerState)
199 | case _ ⇒
200 | // ok, might happen after standby recovery, worker state is not persisted
201 | }
202 |
203 | def tooLongSinceHeardFrom(lastHeardFrom: Long) =
204 | System.currentTimeMillis() - lastHeardFrom > considerWorkerDeadAfter.toMillis
205 |
206 | }
--------------------------------------------------------------------------------
/src/main/g8/src/main/scala/worker/MasterSingleton.scala:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | import akka.actor.{ActorSystem, PoisonPill}
4 | import akka.cluster.singleton._
5 |
6 | import scala.concurrent.duration._
7 |
8 | object MasterSingleton {
9 |
10 | private val singletonName = "master"
11 | private val singletonRole = "back-end"
12 |
13 | // #singleton
14 | def startSingleton(system: ActorSystem) = {
15 | val workTimeout = system.settings.config.getDuration("distributed-workers.work-timeout").getSeconds.seconds
16 |
17 | system.actorOf(
18 | ClusterSingletonManager.props(
19 | Master.props(workTimeout),
20 | PoisonPill,
21 | ClusterSingletonManagerSettings(system).withRole(singletonRole)
22 | ),
23 | singletonName)
24 | }
25 | // #singleton
26 |
27 | // #proxy
28 | def proxyProps(system: ActorSystem) = ClusterSingletonProxy.props(
29 | settings = ClusterSingletonProxySettings(system).withRole(singletonRole),
30 | singletonManagerPath = s"/user/$singletonName")
31 | // #proxy
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/g8/src/main/scala/worker/MasterWorkerProtocol.scala:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | object MasterWorkerProtocol {
4 | // Messages from Workers
5 | case class RegisterWorker(workerId: String)
6 | case class DeRegisterWorker(workerId: String)
7 | case class WorkerRequestsWork(workerId: String)
8 | case class WorkIsDone(workerId: String, workId: String, result: Any)
9 | case class WorkFailed(workerId: String, workId: String)
10 |
11 | // Messages to Workers
12 | case object WorkIsReady
13 | case class Ack(id: String)
14 | }
--------------------------------------------------------------------------------
/src/main/g8/src/main/scala/worker/Work.scala:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | case class Work(workId: String, job: Any)
4 |
5 | case class WorkResult(workId: String, result: Any)
--------------------------------------------------------------------------------
/src/main/g8/src/main/scala/worker/WorkExecutor.scala:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | import java.util.concurrent.ThreadLocalRandom
4 |
5 | import akka.actor.{Actor, Props}
6 |
7 | import scala.concurrent.duration._
8 |
9 | /**
10 | * Work executor is the actor actually performing the work.
11 | */
12 | object WorkExecutor {
13 | def props = Props(new WorkExecutor)
14 |
15 | case class DoWork(n: Int)
16 | case class WorkComplete(result: String)
17 | }
18 |
19 | class WorkExecutor extends Actor {
20 | import WorkExecutor._
21 | import context.dispatcher
22 |
23 | def receive = {
24 | case DoWork(n: Int) =>
25 | val n2 = n * n
26 | val result = s"$n * $n = $n2"
27 |
28 | // simulate that the processing time varies
29 | val randomProcessingTime = ThreadLocalRandom.current.nextInt(1, 3).seconds
30 | context.system.scheduler.scheduleOnce(randomProcessingTime, sender(), WorkComplete(result))
31 | }
32 |
33 | }
--------------------------------------------------------------------------------
/src/main/g8/src/main/scala/worker/WorkResultConsumer.scala:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | import akka.actor.{Actor, ActorLogging, Props}
4 | import akka.cluster.pubsub.DistributedPubSub
5 | import akka.cluster.pubsub.DistributedPubSubMediator
6 |
7 | object WorkResultConsumer {
8 | def props: Props = Props(new WorkResultConsumer)
9 | }
10 |
11 | // #work-result-consumer
12 | class WorkResultConsumer extends Actor with ActorLogging {
13 |
14 | val mediator = DistributedPubSub(context.system).mediator
15 | mediator ! DistributedPubSubMediator.Subscribe(Master.ResultsTopic, self)
16 |
17 | def receive = {
18 | case _: DistributedPubSubMediator.SubscribeAck =>
19 | case WorkResult(workId, result) =>
20 | log.info("Consumed result: {}", result)
21 | }
22 |
23 | }
24 | // #work-result-consumer
--------------------------------------------------------------------------------
/src/main/g8/src/main/scala/worker/WorkState.scala:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | import scala.collection.immutable.Queue
4 |
5 | object WorkState {
6 |
7 | def empty: WorkState = WorkState(
8 | pendingWork = Queue.empty,
9 | workInProgress = Map.empty,
10 | acceptedWorkIds = Set.empty,
11 | doneWorkIds = Set.empty)
12 |
13 | trait WorkDomainEvent
14 | // #events
15 | case class WorkAccepted(work: Work) extends WorkDomainEvent
16 | case class WorkStarted(workId: String) extends WorkDomainEvent
17 | case class WorkCompleted(workId: String, result: Any) extends WorkDomainEvent
18 | case class WorkerFailed(workId: String) extends WorkDomainEvent
19 | case class WorkerTimedOut(workId: String) extends WorkDomainEvent
20 | // #events
21 | }
22 |
23 | case class WorkState private (
24 | private val pendingWork: Queue[Work],
25 | private val workInProgress: Map[String, Work],
26 | private val acceptedWorkIds: Set[String],
27 | private val doneWorkIds: Set[String]) {
28 |
29 | import WorkState._
30 |
31 | def hasWork: Boolean = pendingWork.nonEmpty
32 | def nextWork: Work = pendingWork.head
33 | def isAccepted(workId: String): Boolean = acceptedWorkIds.contains(workId)
34 | def isInProgress(workId: String): Boolean = workInProgress.contains(workId)
35 | def isDone(workId: String): Boolean = doneWorkIds.contains(workId)
36 |
37 | def updated(event: WorkDomainEvent): WorkState = event match {
38 | case WorkAccepted(work) ⇒
39 | copy(
40 | pendingWork = pendingWork enqueue work,
41 | acceptedWorkIds = acceptedWorkIds + work.workId)
42 |
43 | case WorkStarted(workId) ⇒
44 | val (work, rest) = pendingWork.dequeue
45 | require(workId == work.workId, s"WorkStarted expected workId $workId == ${work.workId}")
46 | copy(
47 | pendingWork = rest,
48 | workInProgress = workInProgress + (workId -> work))
49 |
50 | case WorkCompleted(workId, result) ⇒
51 | copy(
52 | workInProgress = workInProgress - workId,
53 | doneWorkIds = doneWorkIds + workId)
54 |
55 | case WorkerFailed(workId) ⇒
56 | copy(
57 | pendingWork = pendingWork enqueue workInProgress(workId),
58 | workInProgress = workInProgress - workId)
59 |
60 | case WorkerTimedOut(workId) ⇒
61 | copy(
62 | pendingWork = pendingWork enqueue workInProgress(workId),
63 | workInProgress = workInProgress - workId)
64 | }
65 |
66 | }
67 |
--------------------------------------------------------------------------------
/src/main/g8/src/main/scala/worker/Worker.scala:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | import java.util.UUID
4 |
5 | import akka.actor.SupervisorStrategy.{Restart, Stop}
6 | import akka.actor._
7 |
8 | import scala.concurrent.duration._
9 |
10 | /**
11 | * The worker is actually more of a middle manager, delegating the actual work
12 | * to the WorkExecutor, supervising it and keeping itself available to interact with the work master.
13 | */
14 | object Worker {
15 |
16 | def props(masterProxy: ActorRef): Props = Props(new Worker(masterProxy))
17 |
18 | }
19 |
20 | class Worker(masterProxy: ActorRef)
21 | extends Actor with Timers with ActorLogging {
22 | import MasterWorkerProtocol._
23 | import context.dispatcher
24 |
25 |
26 | val workerId = UUID.randomUUID().toString
27 | val registerInterval = context.system.settings.config.getDuration("distributed-workers.worker-registration-interval").getSeconds.seconds
28 |
29 | val registerTask = context.system.scheduler.schedule(0.seconds, registerInterval, masterProxy, RegisterWorker(workerId))
30 |
31 | val workExecutor = createWorkExecutor()
32 |
33 | var currentWorkId: Option[String] = None
34 | def workId: String = currentWorkId match {
35 | case Some(workId) => workId
36 | case None => throw new IllegalStateException("Not working")
37 | }
38 |
39 | def receive = idle
40 |
41 | def idle: Receive = {
42 | case WorkIsReady =>
43 | // this is the only state where we reply to WorkIsReady
44 | masterProxy ! WorkerRequestsWork(workerId)
45 |
46 | case Work(workId, job: Int) =>
47 | log.info("Got work: {}", job)
48 | currentWorkId = Some(workId)
49 | workExecutor ! WorkExecutor.DoWork(job)
50 | context.become(working)
51 |
52 | }
53 |
54 | def working: Receive = {
55 | case WorkExecutor.WorkComplete(result) =>
56 | log.info("Work is complete. Result {}.", result)
57 | masterProxy ! WorkIsDone(workerId, workId, result)
58 | context.setReceiveTimeout(5.seconds)
59 | context.become(waitForWorkIsDoneAck(result))
60 |
61 | case _: Work =>
62 | log.warning("Yikes. Master told me to do work, while I'm already working.")
63 |
64 | }
65 |
66 | def waitForWorkIsDoneAck(result: Any): Receive = {
67 | case Ack(id) if id == workId =>
68 | masterProxy ! WorkerRequestsWork(workerId)
69 | context.setReceiveTimeout(Duration.Undefined)
70 | context.become(idle)
71 |
72 | case ReceiveTimeout =>
73 | log.info("No ack from master, resending work result")
74 | masterProxy ! WorkIsDone(workerId, workId, result)
75 |
76 | }
77 |
78 | def createWorkExecutor(): ActorRef =
79 | // in addition to starting the actor we also watch it, so that
80 | // if it stops this worker will also be stopped
81 | context.watch(context.actorOf(WorkExecutor.props, "work-executor"))
82 |
83 | override def supervisorStrategy = OneForOneStrategy() {
84 | case _: ActorInitializationException => Stop
85 | case _: Exception =>
86 | currentWorkId foreach { workId => masterProxy ! WorkFailed(workerId, workId) }
87 | context.become(idle)
88 | Restart
89 | }
90 |
91 | override def postStop(): Unit = {
92 | registerTask.cancel()
93 | masterProxy ! DeRegisterWorker(workerId)
94 | }
95 |
96 | }
--------------------------------------------------------------------------------
/src/main/g8/src/test/scala/worker/DistributedWorkerSpec.scala:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | import java.io.File
4 |
5 | import akka.actor.{Actor, ActorLogging, ActorRef, ActorSystem, PoisonPill, Props}
6 | import akka.cluster.Cluster
7 | import akka.cluster.ClusterEvent.{CurrentClusterState, MemberUp}
8 | import akka.cluster.pubsub.DistributedPubSub
9 | import akka.cluster.pubsub.DistributedPubSubMediator.{CurrentTopics, GetTopics, Subscribe, SubscribeAck}
10 | import akka.cluster.singleton.{ClusterSingletonManager, ClusterSingletonManagerSettings}
11 | import akka.testkit.{ImplicitSender, TestKit, TestProbe}
12 | import com.typesafe.config.ConfigFactory
13 | import org.apache.commons.io.FileUtils
14 | import org.scalatest.{BeforeAndAfterAll, FlatSpecLike, Matchers}
15 |
16 | import scala.concurrent.duration._
17 | import scala.concurrent.{Await, Future}
18 |
19 | object DistributedWorkerSpec {
20 |
21 | val clusterConfig = ConfigFactory.parseString("""
22 | akka {
23 | persistence {
24 | journal.plugin = "akka.persistence.journal.inmem"
25 | snapshot-store {
26 | plugin = "akka.persistence.snapshot-store.local"
27 | local.dir = "target/test-snapshots"
28 | }
29 | }
30 | extensions = ["akka.cluster.pubsub.DistributedPubSub"]
31 | }
32 | distributed-workers.consider-worker-dead-after = 10s
33 | distributed-workers.worker-registration-interval = 1s
34 | """).withFallback(ConfigFactory.load())
35 |
36 | class FlakyWorkExecutor extends Actor with ActorLogging {
37 | var i = 0
38 |
39 | override def postRestart(reason: Throwable): Unit = {
40 | i = 3
41 | super.postRestart(reason)
42 | }
43 |
44 | def receive = {
45 | case WorkExecutor.DoWork(n: Int) =>
46 | i += 1
47 | if (i == 3) {
48 | log.info("Cannot be trusted, crashing")
49 | throw new RuntimeException("Flaky worker")
50 | } else if (i == 5) {
51 | log.info("Cannot be trusted, stopping myself")
52 | context.stop(self)
53 | } else {
54 | val n2 = n * n
55 | val result = s"$n * $n = $n2"
56 | log.info("Cannot be trusted, but did complete work: {}", result)
57 | sender() ! WorkExecutor.WorkComplete(result)
58 | }
59 | }
60 | }
61 |
62 | class FastWorkExecutor extends Actor with ActorLogging {
63 | def receive = {
64 | case WorkExecutor.DoWork(n: Int) =>
65 | val n2 = n * n
66 | val result = s"$n * $n = $n2"
67 | sender() ! WorkExecutor.WorkComplete(result)
68 | }
69 | }
70 |
71 | class RemoteControllableFrontend extends FrontEnd {
72 |
73 | var currentWorkIdAndSender: Option[(String, ActorRef)] = None
74 |
75 | override def idle: Receive = {
76 | // just to be able to send one message at a time from the test
77 | currentWorkIdAndSender match {
78 | case Some((workId, originalSender)) => originalSender ! "ok-" + workId
79 | case None =>
80 | }
81 | currentWorkIdAndSender = None
82 |
83 | {
84 | case work: Work =>
85 | log.debug("Forwarding some work: {}", work)
86 | sendWork(work)
87 | currentWorkIdAndSender = Some((work.workId, sender()))
88 | context.become(busy(work))
89 | }
90 | }
91 | }
92 | }
93 |
94 | class DistributedWorkerSpec(_system: ActorSystem)
95 | extends TestKit(_system)
96 | with Matchers
97 | with FlatSpecLike
98 | with BeforeAndAfterAll
99 | with ImplicitSender {
100 |
101 | import DistributedWorkerSpec._
102 |
103 | val workTimeout = 3.seconds
104 |
105 | def this() = this(ActorSystem("DistributedWorkerSpec", DistributedWorkerSpec.clusterConfig))
106 |
107 | val backendSystem: ActorSystem = {
108 | val config = ConfigFactory.parseString("akka.cluster.roles=[back-end]").withFallback(clusterConfig)
109 | ActorSystem("DistributedWorkerSpec", config)
110 | }
111 |
112 | val workerSystem: ActorSystem = ActorSystem("DistributedWorkerSpec", clusterConfig)
113 |
114 | val storageLocations = List(
115 | "akka.persistence.journal.leveldb.dir",
116 | "akka.persistence.snapshot-store.local.dir").map(s => new File(system.settings.config.getString(s)))
117 |
118 | override def beforeAll(): Unit = {
119 | // make sure we do not use persisted data from a previous run
120 | storageLocations.foreach(dir => FileUtils.deleteDirectory(dir))
121 | }
122 |
123 | "Distributed workers" should "perform work and publish results" in {
124 | val clusterAddress = Cluster(backendSystem).selfAddress
125 | val clusterProbe = TestProbe()
126 | Cluster(backendSystem).subscribe(clusterProbe.ref, classOf[MemberUp])
127 | clusterProbe.expectMsgType[CurrentClusterState]
128 | Cluster(backendSystem).join(clusterAddress)
129 | clusterProbe.expectMsgType[MemberUp]
130 |
131 | backendSystem.actorOf(
132 | ClusterSingletonManager.props(
133 | Master.props(workTimeout),
134 | PoisonPill,
135 | ClusterSingletonManagerSettings(system).withRole("back-end")),
136 | "master")
137 |
138 | Cluster(workerSystem).join(clusterAddress)
139 |
140 | val masterProxy = workerSystem.actorOf(
141 | MasterSingleton.proxyProps(workerSystem),
142 | name = "masterProxy")
143 | val fastWorkerProps = Props(new Worker(masterProxy) {
144 | override def createWorkExecutor(): ActorRef = context.actorOf(Props(new FastWorkExecutor), "fast-executor")
145 | })
146 |
147 | for (n <- 1 to 3)
148 | workerSystem.actorOf(fastWorkerProps, "worker-" + n)
149 |
150 | val flakyWorkerProps = Props(new Worker(masterProxy) {
151 | override def createWorkExecutor(): ActorRef = {
152 | context.actorOf(Props(new FlakyWorkExecutor), "flaky-executor")
153 | }
154 | })
155 | val flakyWorker = workerSystem.actorOf(flakyWorkerProps, "flaky-worker")
156 |
157 | Cluster(system).join(clusterAddress)
158 | clusterProbe.expectMsgType[MemberUp]
159 |
160 | // allow posting work from the outside
161 |
162 | val frontend = system.actorOf(Props[RemoteControllableFrontend], "front-end")
163 |
164 | val results = TestProbe()
165 | DistributedPubSub(system).mediator ! Subscribe(Master.ResultsTopic, results.ref)
166 | expectMsgType[SubscribeAck]
167 |
168 | // make sure pub sub topics are replicated over to the back-end system before triggering any work
169 | within(10.seconds) {
170 | awaitAssert {
171 | DistributedPubSub(backendSystem).mediator ! GetTopics
172 | expectMsgType[CurrentTopics].getTopics() should contain(Master.ResultsTopic)
173 | }
174 | }
175 |
176 | // make sure we can get one piece of work through to fail fast if it doesn't
177 | frontend ! Work("1", 1)
178 | expectMsg("ok-1")
179 | within(10.seconds) {
180 | awaitAssert {
181 | results.expectMsgType[WorkResult].workId should be("1")
182 | }
183 | }
184 |
185 |
186 | // and then send in some actual work
187 | for (n <- 2 to 100) {
188 | frontend ! Work(n.toString, n)
189 | expectMsg(s"ok-$n")
190 | }
191 | system.log.info("99 work items sent")
192 |
193 | results.within(20.seconds) {
194 | val ids = results.receiveN(99).map { case WorkResult(workId, _) => workId }
195 | // nothing lost, and no duplicates
196 | ids.toVector.map(_.toInt).sorted should be((2 to 100).toVector)
197 | }
198 |
199 | }
200 |
201 | override def afterAll(): Unit = {
202 | import scala.concurrent.ExecutionContext.Implicits.global
203 | val allTerminated = Future.sequence(Seq(
204 | system.terminate(),
205 | backendSystem.terminate(),
206 | workerSystem.terminate()
207 | ))
208 |
209 | Await.ready(allTerminated, Duration.Inf)
210 |
211 | storageLocations.foreach(dir => FileUtils.deleteDirectory(dir))
212 | }
213 |
214 | }
--------------------------------------------------------------------------------