├── .gitignore
├── LICENSE
├── README.md
├── autoscale.py
├── deployment
└── to-go
│ ├── README.md
│ ├── Vagrantfile
│ └── provision.sh
├── doc
├── architecture and flow.graffle
│ ├── data.plist
│ ├── image1.tiff
│ ├── image2.tiff
│ ├── image4.tiff
│ └── image5.tiff
├── architecture.png
├── elsa-marathon-deploy.png
├── flow.png
└── sa-logo.jpg
├── elsa.conf.example
├── launch-elsa.sh
├── pom.xml
└── src
└── main
└── scala
└── spark
└── elsa
├── ElsaHelper.scala
└── OnlineSA.scala
/.gitignore:
--------------------------------------------------------------------------------
1 | # Vagrant
2 | .vagrant/
3 |
4 |
5 | *.conf
6 | *.class
7 | *.log
8 |
9 | # sbt specific
10 | .cache/
11 | .history/
12 | .lib/
13 | dist/*
14 | target/
15 | lib_managed/
16 | src_managed/
17 | project/boot/
18 | project/plugins/project/
19 |
20 | # Scala-IDE specific
21 | .scala_dependencies
22 | .worksheet
23 | .idea/
24 | *.iml
25 |
26 | # Python
27 | __pycache__/
28 | *.py[cod]
29 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
203 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Elastic Sentiment Analysis (ElSA)
2 |
3 | The *Elastic Sentiment Analysis* (ElSA) is a Spark Streaming-based application written for the [DCOS](http://mesosphere.com/product/). It derives public opinions/sentiments on specified Twitter topics and is able to elastically scale its processing capacity, based on the volume of the topics' traffic, leveraging Apache Mesos and Marathon.
4 |
5 | ElSA works as follows:
6 |
7 | * It takes a list of words (called topics in the following), such as *Mesos*, *Docker*, *DCOS*, etc., as input and—using the Twitter firehose—pulls tweets containing these topics for processing.
8 | * Based on the tweet content it performs a simple sentiment analysis in an ongoing fashion. This operation is implemented via [Spark Streaming](https://spark.apache.org/docs/latest/streaming-programming-guide.html).
9 | * Last but not least, based on the activity in a certain topic the app scales elastically through leveraging the [Marathon REST API](https://mesosphere.github.io/marathon/docs/rest-api.html). This means that if, for example, a rapid increase of mentions of the topic *DCOS* is detected (tweets per time unit), then more instances are launched.
10 |
11 | See below for the architecture and data flow as well as [deployment](#deployment) and [usage](#usage) instructions.
12 |
13 | ## Architecture
14 |
15 | | components | flow |
16 | | ------------------------------------- | -------------------- |
17 | |  | |
18 |
19 | Description TBD.
20 |
21 | ## Deployment
22 |
23 | In the following, an Ubuntu 14.04 environment is assumed and in addition, ElSA depends on:
24 |
25 | * Apache [Mesos 0.21.x](http://archive.apache.org/dist/mesos/0.21.0/) with [Marathon 0.7.6](https://github.com/mesosphere/marathon/releases/tag/v0.7.6)
26 | * [marathon-python](https://github.com/thefactory/marathon-python)
27 | * Apache [Spark 1.2.x](https://spark.apache.org/downloads.html)
28 | * A Twitter account and an [app](https://apps.twitter.com/) that can be used for accessing the Twitter firehose
29 |
30 | ### ElSA to-go: Vagrant deployment
31 |
32 | **ElSA to-go** is a single-node Vagrant deployment based on the ingenious [Playa Mesos](https://github.com/mesosphere/playa-mesos). See details in [deployment/to-go](deployment/to-go) …
33 |
34 |
35 | ### ElSA Docker
36 |
37 |
38 | ### Digital Ocean deployment
39 |
40 | IaaS deployment on [DO](https://cloud.digitalocean.com/)
41 |
42 | ### Google Compute deployment
43 |
44 | IaaS deployment on [GCE](https://cloud.google.com/)
45 |
46 | ### AWS EC2 deployment
47 |
48 | IaaS deployment on [EC2](https://console.aws.amazon.com/)
49 |
50 |
51 | ### Manual single node deployment
52 |
53 | For Python packages we need `pip` so before anything else do:
54 |
55 | $ sudo apt-get install python-pip
56 |
57 | Now we can start with the setup.
58 |
59 | **Install Mesos**: simply use [Playa Mesos](https://github.com/mesosphere/playa-mesos) which contains an Marathon installation or follow the [step-by-step instructions](http://mesos.apache.org/gettingstarted/) from the Apache Mesos site and install Marathon on top of it.
60 |
61 | Further, as a preparation for the ElSA app, we need a [Python package](https://github.com/thefactory/marathon-python) wrapping the Marathon [REST API](https://mesosphere.github.io/marathon/docs/rest-api.html) so let's do that right away:
62 |
63 | $ pip install marathon
64 |
65 | **Install Spark**:
66 |
67 | First we download the Spark source and make sure Java env is set up correctly:
68 |
69 | $ cd
70 | $ wget http://d3kbcqa49mib13.cloudfront.net/spark-1.2.0.tgz
71 | $ tar xzf spark-1.2.0.tgz && cd spark-1.2.0/
72 | $ sudo apt-get install default-jdk
73 | $ export JAVA_HOME=$(readlink -f /usr/bin/javac | sed "s:bin/javac::")
74 |
75 | Now make sure the correct version of Maven (3.0.4 or higher) is available:
76 |
77 | $ sudo apt-get update
78 | $ sudo apt-get install maven
79 | $ mvn -version
80 | Apache Maven 3.0.5
81 | Maven home: /usr/share/maven
82 | Java version: 1.7.0_65, vendor: Oracle Corporation
83 | Java home: /usr/lib/jvm/java-7-openjdk-amd64/jre
84 |
85 | OK, ready to build Spark. Note: right now is a good time to get a cup of tea or coffee, whatever floats your boat. As usual, Maven is downloading half of the Internet for the following and that might take, um, a while:
86 |
87 | $ export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"
88 | $ sudo mvn -DskipTests clean package
89 |
90 | So, here we are. Next we package our newly built Spark distribution for the Mesos slaves to use:
91 |
92 | $ ./make-distribution.sh
93 | $ mv dist spark-1.2.0
94 | $ tar czf spark-1.2.0.tgz spark-1.2.0/
95 | $ cd conf/
96 | $ cp spark-env.sh.template spark-env.sh
97 |
98 | Now open `../spark-1.2.0/conf/spark-env.sh` in your favorite editor and add the following at the end of the file:
99 |
100 | export MESOS_NATIVE_LIBRARY=/usr/local/lib/libmesos.so
101 | export SPARK_EXECUTOR_URI=file:///home/vagrant/spark-1.2.0/spark-1.2.0.tgz
102 | export MASTER=mesos://127.0.1.1:5050
103 |
104 | Note that if you've built Spark in a different directory (I did it in `/home/vagrant/`) then you'll have to change the setting for the `SPARK_EXECUTOR_URI` to point to the resulting `tgz` file from the previous step,
105 |
106 | Then, finally, we're ready to launch Spark:
107 |
108 | $ cd ..
109 | $ bin/spark-shell
110 |
111 | **Install Elsa**:
112 |
113 | $ cd
114 | $ git clone https://github.com/mhausenblas/elsa.git
115 | $ cd elsa
116 | $ mvn clean package
117 |
118 | ## Usage
119 |
120 | Assuming you've installed ElSA using one of the options above, you should now be in the position to launch it as described below.
121 |
122 | Note: in order for ElSA to run you'll need to supply your Twitter credentials, that is, you `cp elsa.conf.example elsa.conf` and replace the `YOUR STUFF HERE` sections with the details you obtain from creating a Twitter application and generating the access token via the [app](https://apps.twitter.com/) interface.
123 |
124 | Before you start, you might want to quickly check out this 3min walkthrough of ElSA op:
125 |
126 |
127 |
128 |
129 | ### Launching ElSA manually
130 |
131 | To launch ElSA manually (without elasticity, directly on Mesos), do the following:
132 |
133 | $ cd elsa
134 | $ ./launch-elsa.sh
135 | Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
136 | 15/03/06 19:02:32 INFO ElsaHelper: Setting log level to [ERROR].
137 |
138 | In the past 5 seconds I found 0 tweet(s) containing your topics: Datacenter DCOS Docker Mesos Mesosphere devop microservice
139 |
140 | In the past 5 seconds I found 1 tweet(s) containing your topics: Datacenter DCOS Docker Mesos Mesosphere devop microservice
141 | ===
142 | RT @SoftLayer: Let’s talk software, specifically how to create a private @Docker registry on SoftLayer. ≡ http://t.co/UVpX1Anl4s http://t.c…
143 | ===
144 |
145 | ## Launching Elsa through Marathon
146 |
147 | To launch the ElSA app (through Marathon) and automatically scale the number of instances used, depending on the increase/decrease of traffic detected for the specified topics, do the following (hint: stop app by hitting `CTRL+C`):
148 |
149 | $ cd elsa
150 | $ ./autoscale.py http://localhost:8080 elsa.conf
151 | Using /tmp/elsa/stats to monitor topic traffic
152 | Using traffic increase threshold of 10 and scale factor 5
153 | ElSA is deployed and running, waiting now 5 sec before starting auto-scale ...
154 | Difference in traffic in the past 10 seconds: 9
155 | Difference in traffic in the past 10 seconds: -9
156 | Resetting number of instances to 1
157 | Difference in traffic in the past 10 seconds: 11
158 | Increasing number of instances to 2
159 | Difference in traffic in the past 10 seconds: -14
160 | Resetting number of instances to 1
161 | ^CElSA has been stopped by user, halting app and rolling back deployment. Thanks and bye!
162 |
163 | You should then see something like the following in [Marathon](http://10.141.141.10:8080/):
164 |
165 | 
166 |
167 | ## To do
168 |
169 | - [x] Core business logic
170 | - [x] Single node deployment and launch
171 | - [x] Single node elastic
172 | - [x] Make all auto-scale parameter configurable via config
173 | - [x] Improve SA (positive negative)
174 | - [x] Video walkthrough
175 | - [x] Vagrant file
176 | - [ ] Docker image
177 | - [ ] Cluster deployment DO
178 | - [ ] Cluster deployment GCE
179 | - [ ] Cluster deployment EC2
180 | - [ ] Architecture and flow explanation
181 |
182 | ## Notes
183 |
184 | Kudos to the Spark team for providing [the basis](https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterPopularTags.scala) for the SA part and to Alexandre Rodrigues for helping me out concerning the initial development.
185 |
186 | If you want to learn how to run Spark on Mesos, I suggest you try out the great [step-by-step tutorial](https://mesosphere.com/docs/tutorials/run-spark-on-mesos/) provided by the Mesosphere folks.
187 |
188 | Lastly, apologies to all [Frozen](http://www.imdb.com/title/tt2294629/) fans, especially our kids, for hijacking the Elsa label in this context. I thought it's funny …
189 |
--------------------------------------------------------------------------------
/autoscale.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | Launches ElSA app using Marathon and scales it depending of topic traffic.
4 |
5 | Usage:
6 |
7 | ./autoscale.py $MARATHON_URL $ELSA_CONFIG_FILE
8 |
9 | Example:
10 |
11 | ./autoscale.py http://localhost:8080 ./elsa.conf
12 |
13 |
14 | @author: Michael Hausenblas, http://mhausenblas.info/#i
15 | @since: 2015-03-06
16 | @status: init
17 | """
18 |
19 | import logging
20 | import os
21 | import sys
22 | import time
23 |
24 | from marathon import MarathonClient
25 | from marathon.models import MarathonApp
26 |
27 | ################################################################################
28 | # Defaults
29 | #
30 |
31 | DEBUG = True
32 |
33 | if DEBUG:
34 | FORMAT = '%(asctime)-0s %(levelname)s %(message)s [at line %(lineno)d]'
35 | logging.basicConfig(level=logging.DEBUG, format=FORMAT, datefmt='%Y-%m-%dT%I:%M:%S')
36 | else:
37 | FORMAT = '%(asctime)-0s %(message)s'
38 | logging.basicConfig(level=logging.INFO, format=FORMAT, datefmt='%Y-%m-%dT%I:%M:%S')
39 |
40 | TRAFFIC_INCREASE_THRESHOLD = 6 # difference between previous and current traffic
41 | SCALE_FACTOR = 2 # part of threshold number of instances should be scaled
42 |
43 | ################################################################################
44 | # Scaling example:
45 | #
46 | # If TRAFFIC_INCREASE_THRESHOLD == 10 and SCALE_FACTOR == 10 and there has been
47 | # a traffic increase of 25, then this means that (because 25 > 10) the number of
48 | # instances will be increased by a factor of int(25/10) == 2, that is doubled.
49 |
50 |
51 | ################################################################################
52 | # Helper
53 | #
54 |
55 | def get_config_params(elsa_config):
56 | stats_file_path = ''
57 | traffic_increase_threshold = 6
58 | scale_factor = 2
59 | if os.path.exists(elsa_config):
60 | logging.info('Using %s as config file' %(elsa_config))
61 | lines = tuple(open(elsa_config, 'r'))
62 | for line in lines:
63 | l = str(line).strip()
64 | if l and not l.startswith('#'): # not empty or comment line
65 | cfg_param = line.split('=')[0].rstrip() # extract config parameter
66 | if cfg_param == 'stats-file':
67 | stats_file_path = line.split('=')[1].strip().translate(None, '"')
68 | if cfg_param == 'batch-window':
69 | batch_window = int(line.split('=')[1].strip())
70 | if cfg_param == 'traffic-increase-threshold':
71 | traffic_increase_threshold = int(line.split('=')[1].strip())
72 | if cfg_param == 'scale-factor':
73 | scale_factor = int(line.split('=')[1].strip())
74 | else:
75 | logging.info('No config file provided.')
76 | logging.debug('[%s]' %(stats_file_path))
77 | return (stats_file_path, batch_window, traffic_increase_threshold, scale_factor)
78 |
79 |
80 | def launch_elsa(marathon, stats_file, scale_window):
81 | logging.info('Start monitoring the inbound traffic on topics using %s' %(stats_file))
82 | # make sure the stats file is properly initialized:
83 | if not os.path.exists(stats_file):
84 | f = open(stats_file, 'w')
85 | f.write('0')
86 | f.close()
87 |
88 | # launch the Elsa app via Marathon
89 | c = MarathonClient(marathon)
90 | c.create_app('elsa', MarathonApp(cmd='/home/vagrant/elsa/launch-elsa.sh', mem=200, cpus=1, user='vagrant'))
91 | # c.list_apps()
92 |
93 | print('ElSA is deployed and running, waiting now 5 sec before starting auto-scale ...')
94 | time.sleep(5) # allow time to deploy before autoscaling sets in
95 |
96 | # kick off traffic monitoring and trigger autoscaling:
97 | previous_topic_traffic = 0
98 | try:
99 | while True:
100 | with open(stats_file, 'r') as elsa_file:
101 | topic_traffic = int(elsa_file.read())
102 | topic_traffic_diff = topic_traffic - previous_topic_traffic
103 | print('Difference in traffic in the past %d seconds: %d' %(scale_window, topic_traffic_diff))
104 | previous_topic_traffic = topic_traffic
105 |
106 | current_instance_num = c.get_app('elsa').instances
107 |
108 | if topic_traffic_diff > TRAFFIC_INCREASE_THRESHOLD: # we see a surge of traffic above threshold ...
109 | instance_multiplier = int(topic_traffic_diff / SCALE_FACTOR) # ... increase number of instances
110 | c.scale_app('elsa', current_instance_num * instance_multiplier)
111 | print('Increasing number of instances to %d' %(current_instance_num * instance_multiplier))
112 | elif topic_traffic_diff < 0: # negative, back off exponentially
113 | target_instance_num = int(current_instance_num/2)
114 | if target_instance_num > 1:
115 | c.scale_app('elsa', target_instance_num)
116 | print('Decreasing number of instances to %d' %(target_instance_num))
117 | else:
118 | c.scale_app('elsa', 1)
119 | print('Resetting number of instances to 1')
120 | time.sleep(scale_window)
121 | except KeyboardInterrupt:
122 | print('ElSA has been stopped by user, halting app and rolling back deployment. Thanks and bye!')
123 | c.delete_app('elsa', force=True)
124 |
125 | ################################################################################
126 | # Main script
127 | #
128 | if __name__ == '__main__':
129 | try:
130 | marathon = sys.argv[1] # Marathon URL to use
131 | (stats_file_path, batch_window, traffic_increase_threshold, scale_factor) = get_config_params(sys.argv[2])
132 | print('Using %s to monitor topic traffic' %(stats_file_path))
133 | if traffic_increase_threshold:
134 | TRAFFIC_INCREASE_THRESHOLD = traffic_increase_threshold
135 | if scale_factor:
136 | SCALE_FACTOR = scale_factor
137 | print('Using traffic increase threshold of %d and scale factor %d' %(TRAFFIC_INCREASE_THRESHOLD, SCALE_FACTOR))
138 | launch_elsa(marathon, stats_file_path, batch_window)
139 | except Exception, e:
140 | print(e)
141 | print(__doc__)
142 | sys.exit(2)
--------------------------------------------------------------------------------
/deployment/to-go/README.md:
--------------------------------------------------------------------------------
1 | # ElSA to-go
2 |
3 | ElSA to-go is derived from [Playa Mesos][1] which in turn relies on [VirtualBox][2] and [Vagrant][3], and an Ubuntu box image.
4 |
5 | ## Prerequisites
6 |
7 | * [VirtualBox][2] 4.2+
8 | * [Vagrant][3] 1.3+
9 |
10 | ## Quick Start
11 |
12 | Preparation:
13 |
14 | 1. [Install VirtualBox](https://www.virtualbox.org/wiki/Downloads)
15 |
16 | 1. [Install Vagrant](http://www.vagrantup.com/downloads.html)
17 |
18 | 1. Clone this repository
19 |
20 | ```bash
21 | git clone https://github.com/mhausenblas/elsa.git
22 | cd elsa/deployment/to-go
23 | ```
24 | 1. Start the VM
25 |
26 | ```bash
27 | vagrant up
28 | ```
29 |
30 | 1. Connect to the Mesos Web UI on [10.141.141.10:5050](http://10.141.141.10:5050) and the Marathon Web UI on [10.141.141.10:8080](http://10.141.141.10:8080)
31 |
32 | 1. SSH to the VM
33 |
34 | ```bash
35 | vagrant ssh
36 | ls -al
37 | exit
38 | ```
39 |
40 | At this point in time you can [launch ElSA via the autoscale](https://github.com/mhausenblas/elsa#launching-elsa-through-marathon) script.
41 |
42 | Once you're done experimenting, you can shut down the VM like so:
43 |
44 | ```bash
45 | vagrant halt
46 | ```
47 |
48 | … or, for faster start-up but larger disk footprint:
49 |
50 | ```bash
51 | vagrant suspend
52 | ```
53 |
54 | When you want to get rid of the VM, do the following:
55 |
56 | ```bash
57 | vagrant destroy
58 | ```
59 |
60 |
61 | ## Kudos
62 |
63 | Kudos to the original [Playa Mesos][1] authors:
64 |
65 | * [Jeremy Lingmann](https://github.com/lingmann) ([@lingmann](https://twitter.com/lingmann))
66 | * [Jason Dusek](https://github.com/solidsnack) ([@solidsnack](https://twitter.com/solidsnack))
67 |
68 | [1]: https://github.com/mesosphere/playa-mesos "Playa Mesos"
69 | [2]: http://www.virtualbox.org/ "VirtualBox"
70 | [3]: http://www.vagrantup.com/ "Vagrant"
71 |
--------------------------------------------------------------------------------
/deployment/to-go/Vagrantfile:
--------------------------------------------------------------------------------
1 | # -*- mode: ruby -*-
2 | # vi: set ft=ruby :
3 |
4 | # Vagrantfile API/syntax version. Don't touch unless you know what you're doing
5 | VAGRANTFILE_API_VERSION = '2'
6 |
7 | PM_ROOT = File.dirname(__FILE__)
8 |
9 | box_url = "http://downloads.mesosphere.io/playa-mesos/playa_mesos_ubuntu_14.04-virtualbox.box"
10 |
11 |
12 | # #############################################################################
13 | # Vagrant VM Definitions
14 | # #############################################################################
15 |
16 |
17 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
18 |
19 | # Create a private network, which allows host-only access to the machine
20 | # using a specific IP.
21 | config.vm.network :private_network, ip: "10.141.141.10"
22 |
23 | # If true, then any SSH connections made will enable agent forwarding.
24 | # Default value: false
25 | config.ssh.forward_agent = true
26 |
27 | # Every Vagrant virtual environment requires a box to build off of.
28 | config.vm.box = "playa_mesos_ubuntu_14.04"
29 |
30 | # Hardcoded the Playa Mesos box
31 | config.vm.box_url = box_url
32 |
33 | # Only VirtualBox provider
34 | config.vm.provider :virtualbox do |vb|
35 | vb.name = "elsa"
36 | vb.customize ['modifyvm', :id, '--memory', "2048"]
37 | vb.customize ['modifyvm', :id, '--cpus', "2"]
38 | end
39 |
40 | # Make the project root available to the guest VM.
41 | # config.vm.synced_folder '.', '/vagrant'
42 |
43 | # Provision the ElSA app
44 | config.vm.provision :shell do |shell|
45 | shell.path = 'provision.sh'
46 | end
47 |
48 | end
49 |
--------------------------------------------------------------------------------
/deployment/to-go/provision.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | ###############################################################################
4 | # Provisioning script for deploying ElSA on Ubuntu 14.04
5 | #
6 | # Usage:
7 | #
8 | # ./provision.sh [INSTALL_DIR]
9 | #
10 | # Examples:
11 | #
12 | # ... without INSTALL_DIR provided installs it in `/home/vagrant`:
13 | # $ ./provision.sh
14 | #
15 | # ... install it in `/home/mhausenblas`:
16 | # $ ./provision.sh /home/mhausenblas
17 | #
18 | # Author: Michael Hausenblas
19 | # Init: 2015-03-16
20 |
21 |
22 | set -e # exit on error immediately, just to keep things sane
23 |
24 |
25 | ###############################################################################
26 | # Global variables
27 |
28 | SCRIPT_PATH=`dirname $0`
29 |
30 | BASE_INSTALL=${1:-"/home/vagrant"}
31 |
32 | SPARK_CONF_TEMPLATE=$(cat <> spark-env.sh
104 |
105 | cd $BASE_INSTALL
106 | rm -f spark-1.2.0.tgz
107 |
108 | ###############################################################################
109 | # Building ElSA
110 |
111 | echo Phase 3: Building ElSA
112 |
113 | cd $BASE_INSTALL # back to the base install dir to set up ElSA
114 | git clone https://github.com/mhausenblas/elsa.git
115 | cd elsa
116 | mvn clean package
117 |
118 | echo Done provisioning ElSA into $BASE_INSTALL
119 |
120 | popd # restore and change back to where we started
121 |
122 | exit 0
--------------------------------------------------------------------------------
/doc/architecture and flow.graffle/data.plist:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mhausenblas/elsa/6c11ea3d4f03cbefbca9f83021645786d6f252a1/doc/architecture and flow.graffle/data.plist
--------------------------------------------------------------------------------
/doc/architecture and flow.graffle/image1.tiff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mhausenblas/elsa/6c11ea3d4f03cbefbca9f83021645786d6f252a1/doc/architecture and flow.graffle/image1.tiff
--------------------------------------------------------------------------------
/doc/architecture and flow.graffle/image2.tiff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mhausenblas/elsa/6c11ea3d4f03cbefbca9f83021645786d6f252a1/doc/architecture and flow.graffle/image2.tiff
--------------------------------------------------------------------------------
/doc/architecture and flow.graffle/image4.tiff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mhausenblas/elsa/6c11ea3d4f03cbefbca9f83021645786d6f252a1/doc/architecture and flow.graffle/image4.tiff
--------------------------------------------------------------------------------
/doc/architecture and flow.graffle/image5.tiff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mhausenblas/elsa/6c11ea3d4f03cbefbca9f83021645786d6f252a1/doc/architecture and flow.graffle/image5.tiff
--------------------------------------------------------------------------------
/doc/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mhausenblas/elsa/6c11ea3d4f03cbefbca9f83021645786d6f252a1/doc/architecture.png
--------------------------------------------------------------------------------
/doc/elsa-marathon-deploy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mhausenblas/elsa/6c11ea3d4f03cbefbca9f83021645786d6f252a1/doc/elsa-marathon-deploy.png
--------------------------------------------------------------------------------
/doc/flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mhausenblas/elsa/6c11ea3d4f03cbefbca9f83021645786d6f252a1/doc/flow.png
--------------------------------------------------------------------------------
/doc/sa-logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mhausenblas/elsa/6c11ea3d4f03cbefbca9f83021645786d6f252a1/doc/sa-logo.jpg
--------------------------------------------------------------------------------
/elsa.conf.example:
--------------------------------------------------------------------------------
1 | deployment = "production"
2 | master = "local[*]"
3 | checkpoint-dir = "file:///tmp/elsa/checkpoints"
4 | stats-file = "/tmp/elsa/stats"
5 | batch-window = 5
6 | traffic-increase-threshold = 6
7 | scale-factor = 2
8 | topics = "Datacenter,DCOS,Docker,Mesos,Mesosphere,devop,microservice"
9 | consumer-key = "YOUR STUFF HERE"
10 | consumer-secret = "YOUR STUFF HERE"
11 | access-token = "YOUR STUFF HERE"
12 | access-token-secret = "YOUR STUFF HERE"
--------------------------------------------------------------------------------
/launch-elsa.sh:
--------------------------------------------------------------------------------
1 | /home/vagrant/spark-1.2.0/bin/spark-submit \
2 | --class spark.elsa.OnlineSA \
3 | --master mesos://127.0.1.1:5050 \
4 | /home/vagrant/elsa/target/elsa-1.0-SNAPSHOT-jar-with-dependencies.jar \
5 | /home/vagrant/elsa/elsa.conf
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | 4.0.0
4 | info.mhausenblas.dev
5 | elsa
6 | Elastic Sentiment Analysis (ElSA)
7 | https://github.com/mhausenblas/elsa
8 | 1.0-SNAPSHOT
9 |
10 |
11 | org.apache.spark
12 | spark-core_2.10
13 | 1.2.0
14 |
15 |
16 | org.apache.spark
17 | spark-streaming_2.10
18 | 1.2.0
19 |
20 |
21 | org.apache.spark
22 | spark-streaming-twitter_2.10
23 | 1.2.0
24 |
25 |
26 | org.streum
27 | configrity-core_2.10
28 | 1.0.0
29 |
30 |
31 | org.scala-lang
32 | scala-library
33 | 2.10.4
34 |
35 |
36 |
37 |
38 |
39 | maven-assembly-plugin
40 |
41 |
42 | jar-with-dependencies
43 |
44 |
45 |
46 | spark.elsa.OnlineSA
47 |
48 |
49 |
50 |
51 |
52 | make-assembly
53 | package
54 |
55 | single
56 |
57 |
58 |
59 |
60 |
61 | org.scala-tools
62 | maven-scala-plugin
63 |
64 |
65 |
66 | compile
67 |
68 |
69 |
70 |
71 |
72 | -Xms64m
73 | -Xmx1024m
74 |
75 |
76 |
77 |
78 | org.apache.maven.plugins
79 | maven-compiler-plugin
80 | 3.1
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/src/main/scala/spark/elsa/ElsaHelper.scala:
--------------------------------------------------------------------------------
1 | package spark.elsa
2 |
3 | import org.apache.spark.Logging
4 |
5 | import org.apache.log4j.{Level, Logger}
6 |
7 | object ElsaHelper extends Logging {
8 |
9 | def setLogLevel() {
10 | val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
11 | if (!log4jInitialized) {
12 | logInfo("Setting log level to [ERROR].")
13 | Logger.getRootLogger.setLevel(Level.ERROR)
14 | }
15 | }
16 | }
--------------------------------------------------------------------------------
/src/main/scala/spark/elsa/OnlineSA.scala:
--------------------------------------------------------------------------------
1 | package spark.elsa
2 |
3 | import java.nio.file.{Paths, Files}
4 | import java.nio.charset.StandardCharsets
5 |
6 | import org.apache.spark.SparkConf
7 | import org.apache.spark.streaming.{Seconds, StreamingContext}
8 | import org.apache.spark.streaming.twitter._
9 |
10 | import org.streum.configrity._
11 |
12 | object OnlineSA {
13 |
14 | def runAnalysis(elsaConf: Configuration): Unit = {
15 |
16 | // setting up the Spark configuration:
17 | val conf = new SparkConf().setAppName("ElSA Online").setMaster(elsaConf[String]("master"))
18 | // setting up the filename where to log the stats to:
19 | val stats = elsaConf[String]("stats-file")
20 | // setting up list of topics to be monitored by ElSA:
21 | val topics: Array[String] = elsaConf[String]("topics").split(",").distinct
22 | // setting up the Spark Streaming context:
23 | val ssc = new StreamingContext(conf, Seconds(elsaConf[Int]("batch-window")))
24 |
25 | // setting up system properties for Twitter4j lib OAuth credentials:
26 | System.setProperty("twitter4j.oauth.consumerKey", elsaConf[String]("consumer-key"))
27 | System.setProperty("twitter4j.oauth.consumerSecret", elsaConf[String]("consumer-secret"))
28 | System.setProperty("twitter4j.oauth.accessToken", elsaConf[String]("access-token"))
29 | System.setProperty("twitter4j.oauth.accessTokenSecret", elsaConf[String]("access-token-secret"))
30 |
31 | // sentiment triggers:
32 | val posSen: Array[String] = Array("like", "cool", "awesome", "nice", "good", "love")
33 | val negSen: Array[String] = Array("dislike", "meh", "bad", "sad", "hate", "mad")
34 |
35 | // hook into the Twitter firehose and get tweets with the topics of interest:
36 | val twitterFirehose = TwitterUtils.createStream(ssc, None, topics)
37 |
38 |
39 | twitterFirehose.foreachRDD(rdd => {
40 | val tweetCount = rdd.count()
41 |
42 | // overall stats:
43 | print("\n\nIn the past " + elsaConf[Int]("batch-window") + " seconds " +
44 | "I found " + tweetCount + " tweet(s) " +
45 | "containing your topics: "
46 | )
47 | for (topic <- topics) print(topic + " ")
48 | println("\n**********************")
49 |
50 | // display tweet details and determine sentiment
51 | rdd.foreach{ tweet =>
52 | val tweetText = tweet.getText.toLowerCase // normalize for comparison with sentiments
53 |
54 | println("\n===\n" + tweetText + "\n===")
55 |
56 | // here comes the *very* simplistic sentiment analysis (just check if certain words are present):
57 | if ( posSen.exists(tweetText.contains) ) { print("SA: positive sentiment") }
58 | if ( negSen.exists(tweetText.contains) ) { print("SA: negative sentiment") }
59 | }
60 |
61 | // write out the tweet count as primary input for the auto-scale process:
62 | Files.write(Paths.get(stats), tweetCount.toString.getBytes(StandardCharsets.UTF_8))
63 | })
64 |
65 | // kick off the ongoing stream processing:
66 | //ssc.checkpoint(elsaConf[String]("checkpoint-dir"))
67 | ssc.start()
68 | ssc.awaitTermination()
69 | }
70 |
71 | def main(args: Array[String]) {
72 | if (args.length < 1) {
73 | System.err.println("Usage: OnlineSA ")
74 | System.exit(1)
75 | }
76 | // setting up configuration:
77 | val elsaConf = Configuration.load(args(0))
78 |
79 | // makes sure that if and only if we're in production we don't show too verbose logs info:
80 | if (elsaConf[String]("deployment") == "production") {
81 | ElsaHelper.setLogLevel()
82 | }
83 |
84 | runAnalysis(elsaConf)
85 | System.exit(0)
86 | }
87 | }
--------------------------------------------------------------------------------