├── .gitignore
├── MANIFEST.in
├── README.md
├── cloud
    ├── __init__.py
    ├── cluster.py
    ├── decorators.py
    ├── exception.py
    ├── plugin.py
    ├── providers
    │   ├── __init__.py
    │   └── ec2.py
    ├── service.py
    ├── settings.py
    ├── storage.py
    └── util.py
├── example_scripts
    ├── cassandra-ec2-init-remote.sh
    ├── hadoop-cassandra-hybrid-ec2-init-remote.sh
    ├── hadoop-ec2-init-remote.sh
    ├── hbase-ec2-init-remote.sh
    └── zookeeper-ec2-init-remote.sh
├── plugins
    ├── __init__.py
    ├── cassandra
    │   ├── __init__.py
    │   ├── cli.plugin
    │   ├── cli.py
    │   ├── service.plugin
    │   └── service.py
    ├── hadoop
    │   ├── __init__.py
    │   ├── cli.plugin
    │   ├── cli.py
    │   ├── service.plugin
    │   └── service.py
    ├── hadoop_cassandra_hybrid
    │   ├── __init__.py
    │   ├── cli.plugin
    │   ├── cli.py
    │   ├── service.plugin
    │   └── service.py
    └── simple
    │   ├── __init__.py
    │   ├── cli.plugin
    │   ├── cli.py
    │   ├── service.plugin
    │   └── service.py
├── setup.py
└── stratus


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | *.iml
3 | *.pyc
4 | 
5 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include example_scripts/ *
2 | recursive-include plugins/ *
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | This work was originally derived from the Cloudera CDH Cloud Scripts for managing Hadoop in 
 2 | Amazon EC2 (https://wiki.cloudera.com/display/DOC/CDH+Cloud+Scripts). We needed a way to 
 3 | manage Hadoop, Cassandra, and other distributed services, thus PyStratus was born. Thanks 
 4 | to Cloudera for providing a great starting point for us!Currently only Amazon EC2 is supported, 
 5 | but we hope to add new cloud providers very soon.
 6 | 
 7 | To get up and running quickly, use virtualenv and install PyStratus with these instructions: 
 8 | ```code
 9 | $ mkvirtualenv stratus
10 | (stratus)$ pip install https://github.com/digitalreasoning/PyStratus/archive/master.zip
11 | ...
12 | # issue commands like: 
13 | (stratus)$ stratus list
14 | (stratus)$ stratus exec HADOOP_CLUSTER launch-cluster 3
15 | (stratus)$ stratus exec HADOOP_CLUSTER terminate-cluster
16 | ...
17 | (stratus)$ deactivate # to leave virtualenv
18 | ```
19 | 
20 | 
21 | Additionally, the following script is sufficient (assumes that you have a ~/bin directory and it is on your PATH):
22 | 
23 | ```code
24 | INSTALL_DIR=~/Tools/pystratus
25 | virtualenv $INSTALL_DIR --no-site-packages
26 | $INSTALL_DIR/bin/pip install https://github.com/digitalreasoning/PyStratus/archive/master.zip
27 | ln -snf $INSTALL_DIR/bin/stratus ~/bin/stratus
28 | ```
29 | 
30 | PyStratus uses the following dependencies:
31 | 
32 | * Python 2.5+
33 | * boto 
34 | * simplejson
35 | * prettytable
36 | * setuptools
37 | * dateutil
38 | * PyYAML
39 | * cElementTree or elementree
40 | * Fabric
41 | 
42 | You may also check out the project and run "python setup.py install" and the command "stratus" will now available 
43 | and an egg file will be located in your site-packages directory. You may want to run the command with 
44 | sudo to install it for all users.
45 | 
46 | See the full documentation at http://github.com/digitalreasoning/PyStratus/wiki/Documentation
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/cloud/__init__.py:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | VERSION = "0.8.30"
16 | 


--------------------------------------------------------------------------------
/cloud/cluster.py:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one or more
  2 | # contributor license agreements.  See the NOTICE file distributed with
  3 | # this work for additional information regarding copyright ownership.
  4 | # The ASF licenses this file to You under the Apache License, Version 2.0
  5 | # (the "License"); you may not use this file except in compliance with
  6 | # the License.  You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """
 17 | Classes for controlling a cluster of cloud instances.
 18 | """
 19 | 
 20 | from __future__ import with_statement
 21 | 
 22 | import gzip
 23 | import StringIO
 24 | import urllib
 25 | import providers
 26 | 
 27 | from cloud.storage import Storage
 28 | 
 29 | CLUSTER_PROVIDER_MAP = {}
 30 | 
 31 | def _build_provider_map() :
 32 |     from pkgutil import iter_modules
 33 |     it = iter_modules(providers.__path__, 'providers.')
 34 |     for module in it :
 35 |         try :
 36 |             provider = __import__(module[1], globals(), locals(), ['CLOUD_PROVIDER']).CLOUD_PROVIDER
 37 |         except :
 38 |             pass
 39 |         else :
 40 |             CLUSTER_PROVIDER_MAP[provider[0]] = provider[1]
 41 | 
 42 | def get_cluster(provider):
 43 |   """
 44 |   Retrieve the Cluster class for a provider.
 45 |   """
 46 |   if not len(CLUSTER_PROVIDER_MAP):
 47 |     _build_provider_map()
 48 |   mod_name, driver_name = CLUSTER_PROVIDER_MAP[provider]
 49 |   _mod = __import__(mod_name, globals(), locals(), [driver_name])
 50 |   return getattr(_mod, driver_name)
 51 | 
 52 | class Cluster(object):
 53 |   """
 54 |   A cluster of server instances. A cluster has a unique name.
 55 |   One may launch instances which run in a certain role.
 56 |   """
 57 | 
 58 |   def __init__(self, name, config_dir, region):
 59 |     self.name = name
 60 |     self.config_dir = config_dir
 61 |     self.region = region
 62 | 
 63 |   def get_provider_code(self):
 64 |     """
 65 |     The code that uniquely identifies the cloud provider.
 66 |     """
 67 |     raise Exception("Unimplemented")
 68 | 
 69 |   def authorize_role(self, role, from_port, to_port, cidr_ip):
 70 |     """
 71 |     Authorize access to machines in a given role from a given network.
 72 |     """
 73 |     pass
 74 | 
 75 |   def get_instances_in_role(self, role, state_filter=None):
 76 |     """
 77 |     Get all the instances in a role, filtered by state.
 78 | 
 79 |     @param role: the name of the role
 80 |     @param state_filter: the state that the instance should be in
 81 |     (e.g. "running"), or None for all states
 82 |     """
 83 |     raise Exception("Unimplemented")
 84 | 
 85 |   def print_status(self, roles=None, state_filter="running"):
 86 |     """
 87 |     Print the status of instances in the given roles, filtered by state.
 88 |     """
 89 |     pass
 90 | 
 91 |   def check_running(self, role, number):
 92 |     """
 93 |     Check that a certain number of instances in a role are running.
 94 |     """
 95 |     instances = self.get_instances_in_role(role, "running")
 96 |     if len(instances) != number:
 97 |       print "Expected %s instances in role %s, but was %s %s" % \
 98 |         (number, role, len(instances), instances)
 99 |       return False
100 |     else:
101 |       return instances
102 | 
103 |   def launch_instances(self, roles, number, image_id, size_id,
104 |                        instance_user_data, **kwargs):
105 |     """
106 |     Launch instances (having the given roles) in the cluster.
107 |     Returns a list of IDs for the instances started.
108 |     """
109 |     pass
110 | 
111 |   def wait_for_instances(self, instance_ids, timeout=600):
112 |     """
113 |     Wait for instances to start.
114 |     Raise TimeoutException if the timeout is exceeded.
115 |     """
116 |     pass
117 | 
118 |   def terminate(self):
119 |     """
120 |     Terminate all instances in the cluster.
121 |     """
122 |     pass
123 | 
124 |   def delete(self):
125 |     """
126 |     Delete the cluster permanently. This operation is only permitted if no
127 |     instances are running.
128 |     """
129 |     pass
130 | 
131 |   def get_storage(self):
132 |     """
133 |     Return the external storage for the cluster.
134 |     """
135 |     return Storage(self)
136 | 
137 | class InstanceUserData(object):
138 |   """
139 |   The data passed to an instance on start up.
140 |   """
141 | 
142 |   def __init__(self, filename, replacements={}):
143 |     self.filename = filename
144 |     self.replacements = replacements
145 | 
146 |   def _read_file(self, filename):
147 |     """
148 |     Read the user data.
149 |     """
150 |     return urllib.urlopen(filename).read()
151 | 
152 |   def read(self):
153 |     """
154 |     Read the user data, making replacements.
155 |     """
156 |     contents = self._read_file(self.filename)
157 |     for (match, replacement) in self.replacements.iteritems():
158 |       if replacement == None:
159 |         replacement = ''
160 |       contents = contents.replace(match, replacement)
161 |     return contents
162 | 
163 |   def read_as_gzip_stream(self):
164 |     """
165 |     Read and compress the data.
166 |     """
167 |     output = StringIO.StringIO()
168 |     compressed = gzip.GzipFile(mode='wb', fileobj=output)
169 |     compressed.write(self.read())
170 |     compressed.close()
171 |     return output.getvalue()
172 | 
173 | class Instance(object):
174 |   """
175 |   A server instance.
176 |   """
177 |   def __init__(self, id, role, public_ip, private_ip, launch_time, instance_type, zone):
178 |     self.id = id
179 |     self.role = role
180 |     self.public_ip = public_ip
181 |     self.private_ip = private_ip
182 |     self.launch_time = launch_time
183 |     self.instance_type = instance_type
184 |     self.zone = zone 
185 | 
186 | class RoleSyntaxException(Exception):
187 |   """
188 |   Raised when a role name is invalid. Role names may consist of a sequence
189 |   of alphanumeric characters and underscores. Dashes are not permitted in role
190 |   names.
191 |   """
192 |   def __init__(self, message):
193 |     super(RoleSyntaxException, self).__init__()
194 |     self.message = message
195 |   def __str__(self):
196 |     return repr(self.message)
197 | 
198 | class TimeoutException(Exception):
199 |   """
200 |   Raised when a timeout is exceeded.
201 |   """
202 |   pass
203 | 
204 | class InstanceTerminatedException(Exception):
205 |     """
206 |     Raised when an instance that should start goes to a terminated state.
207 |     """
208 |     pass
209 | 


--------------------------------------------------------------------------------
/cloud/decorators.py:
--------------------------------------------------------------------------------
 1 | import signal
 2 | from cloud.cluster import TimeoutException
 3 | 
 4 | def timeout(seconds_before_timeout):
 5 |     """
 6 |     Borrowed from http://www.saltycrane.com/blog/2010/04/using-python-timeout-decorator-uploading-s3/
 7 |     """
 8 |     def decorate(f):
 9 |         def handler(signum, frame):
10 |             raise TimeoutException()
11 |         def new_f(*args, **kwargs):
12 |             old = signal.signal(signal.SIGALRM, handler)
13 |             signal.alarm(seconds_before_timeout)
14 |             try:
15 |                 result = f(*args, **kwargs)
16 |             finally:
17 |                 signal.signal(signal.SIGALRM, old)
18 |             signal.alarm(0)
19 |             return result
20 |         new_f.func_name = f.func_name
21 |         return new_f
22 |     return decorate
23 | 
24 | 


--------------------------------------------------------------------------------
/cloud/exception.py:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | class VolumesStillInUseException(Exception):
17 |     pass
18 | 
19 | class InvalidSpotConfigurationException(Exception):
20 |     pass
21 | 


--------------------------------------------------------------------------------
/cloud/plugin.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import os
  3 | import subprocess
  4 | import sys
  5 | import logging
  6 | import time
  7 | 
  8 | from optparse import OptionParser
  9 | from optparse import make_option
 10 | from yapsy.IPlugin import IPlugin
 11 | from prettytable import PrettyTable
 12 | 
 13 | from cloud.cluster import InstanceUserData
 14 | from cloud.util import xstr
 15 | from cloud.util import build_env_string
 16 | from cloud.exception import VolumesStillInUseException
 17 | 
 18 | from cloud import VERSION
 19 | 
 20 | CONFIG_DIR_OPTION = \
 21 |   make_option("--config-dir", metavar="CONFIG-DIR",
 22 |     help="The configuration directory.")
 23 | 
 24 | PROVIDER_OPTION = \
 25 |   make_option("--cloud-provider", metavar="PROVIDER",
 26 |     help="The cloud provider, e.g. 'ec2' for Amazon EC2.")
 27 | 
 28 | AVAILABILITY_ZONE_OPTION = \
 29 |   make_option("-z", "--availability-zone", metavar="ZONE",
 30 |     help="The availability zone to run the instances in.")
 31 | 
 32 | REGION_OPTION = \
 33 |   make_option("-r", "--region", metavar="REGION",
 34 |     help="The region run the instances in.")
 35 | 
 36 | FORCE_OPTION = \
 37 |   make_option("--force", metavar="FORCE", 
 38 |               action="store_true", default=False,
 39 |               help="Force the command without prompting.")
 40 | 
 41 | BASIC_OPTIONS = [
 42 |   CONFIG_DIR_OPTION,
 43 |   PROVIDER_OPTION,
 44 |   AVAILABILITY_ZONE_OPTION,
 45 |   REGION_OPTION,
 46 | ]
 47 | 
 48 | class CLIPlugin(IPlugin):
 49 |     """
 50 |     """
 51 |     USAGE = None
 52 | 
 53 |     def __init__(self, service=None):
 54 |         self.service = service
 55 |         self.logger = logging #logging.getLogger(self.__class__.__name__)
 56 | 
 57 |     def print_help(self, exitCode=1):
 58 |         if self.USAGE is None:
 59 |             raise RuntimeError("USAGE has not been defined.")
 60 | 
 61 |         print self.USAGE
 62 |         sys.exit(exitCode)
 63 | 
 64 |     def parse_options(self, command, argv, option_list=[], expected_arguments=[],
 65 |                       unbounded_args=False):
 66 |         """
 67 |         Parse the arguments to command using the given option list.
 68 | 
 69 |         If unbounded_args is true then there must be at least as many extra arguments
 70 |         as specified by extra_arguments (the first argument is always CLUSTER).
 71 |         Otherwise there must be exactly the same number of arguments as
 72 |         extra_arguments.
 73 |         """
 74 | 
 75 |         usage = "%%prog CLUSTER [options] %s" % \
 76 |             (" ".join([command] + expected_arguments[:]),)
 77 | 
 78 |         parser = OptionParser(usage=usage, version="%%prog %s" % VERSION,
 79 |                             option_list=option_list)
 80 | 
 81 |         parser.disable_interspersed_args()
 82 |         (options, args) = parser.parse_args(argv)
 83 |         if unbounded_args:
 84 |             if len(args) < len(expected_arguments):
 85 |                 parser.error("incorrect number of arguments")
 86 |         elif len(args) != len(expected_arguments):
 87 |             parser.error("incorrect number of arguments")
 88 | 
 89 |         return (vars(options), args)
 90 | 
 91 |     def _prompt(self, prompt):
 92 |         """
 93 |         Returns true if user responds "yes" to prompt.
 94 |         """
 95 |         return raw_input("%s [yes or no]: " % prompt).lower() == "yes"
 96 |     
 97 |     def execute_command(self, argv, options_dict):
 98 |         """
 99 |         Should be overridden by the subclass to handle
100 |         command specific options.
101 |         """
102 |         raise RuntimeError("Not implemented.")
103 | 
104 |     def create_storage(self, argv, options_dict):
105 |         raise RuntimeError("Not implemented.")
106 | 
107 |     def terminate_cluster(self, argv, options_dict):
108 |         opt, args = self.parse_options(self._command_name, argv, [FORCE_OPTION])
109 | 
110 |         if not self.service.get_instances():
111 |             print "No running instances. Aborting."
112 |             return
113 | 
114 |         if opt.get("force"):
115 |             print "Terminating cluster..."
116 |             self.service.terminate_cluster()
117 |         else:
118 |             self.print_instances()
119 |             if not self._prompt("Terminate all instances?"):
120 |                 print "Not terminating cluster."
121 |             else:
122 |                 print "Terminating cluster..."
123 |                 self.service.terminate_cluster()
124 | 
125 |     def simple_print_instances(self, argv, options_dict):
126 |         opt, fields = self.parse_options(self._command_name, argv, expected_arguments=['FIELD*'], unbounded_args=True)
127 | 
128 |         for instance in self.service.get_instances():
129 |             print("|".join([instance.__getattribute__(field) for field in fields]))
130 | 
131 |     def print_instances(self):
132 |         if not self.service.get_instances():
133 |             print "No running instances. Aborting."
134 |             return
135 | 
136 |         table = PrettyTable()
137 |         table.set_field_names(("Role", "Instance Id", "Image Id", 
138 |                                "Public DNS", "Private DNS", "State", 
139 |                                "Key", "Instance Type", "Launch Time", 
140 |                                "Zone", "Region"))
141 |         
142 |         for i in self.service.get_instances():
143 |             table.add_row((
144 |                 i.role, i.id, i.image_id, i.public_dns_name, 
145 |                 i.private_dns_name, i.state, i.key_name, i.instance_type,
146 |                 i.launch_time, i.placement, i.region.name))
147 | 
148 |         table.printt()
149 | 
150 |     def print_storage(self):
151 |         storage = self.service.get_storage()
152 |         
153 |         table = PrettyTable()
154 |         table.set_field_names(("Role", "Instance ID", "Volume Id", 
155 |                                "Volume Size", "Snapshot Id", "Zone", 
156 |                                "Status", "Device", "Create Time", 
157 |                                "Attach Time"))
158 | 
159 |         for (r, v) in storage.get_volumes():
160 |             table.add_row((r, v.attach_data.instance_id, v.id, 
161 |                            str(v.size), v.snapshot_id, v.zone,
162 |                            "%s / %s" % (v.status, v.attach_data.status), 
163 |                            v.attach_data.device, str(v.create_time),
164 |                            str(v.attach_data.attach_time)))
165 |             
166 |         if len(table.rows) > 0:
167 |             s = 0
168 |             for r in table.rows:
169 |                 s += int(r[3])
170 | 
171 |             table.printt()
172 |             print "Total volumes: %d" % len(table.rows)
173 |             print "Total size:    %d" % s
174 |         else:
175 |             print "No volumes defined."
176 |     
177 |     def delete_storage(self, argv, options_dict):
178 |         opt, args = self.parse_options(self._command_name, argv, [FORCE_OPTION])
179 | 
180 |         storage = self.service.get_storage()
181 |         volumes = storage.get_volumes()
182 | 
183 |         if not volumes:
184 |             print "No volumes defined."
185 |             sys.exit()
186 | 
187 |         if opt.get('force'):
188 |             print "Deleting storage..."
189 |             try:
190 |                 storage.delete(storage.get_roles())
191 |             except VolumesStillInUseException, e:
192 |                 print e.message
193 |                 sys.exit(1)
194 |         else:
195 |             self.print_storage()
196 |             if not self._prompt("Delete all storage volumes? THIS WILL PERMANENTLY DELETE ALL DATA"):
197 |                 print "Not deleting storage."
198 |             else:
199 |                 print "Deleting storage..."
200 |                 try:
201 |                     storage.delete(storage.get_roles())
202 |                 except VolumesStillInUseException, e:
203 |                     print e.message
204 |                     sys.exit(1)
205 | 
206 |     def login(self, argv, options_dict):
207 |         """
208 |         """
209 |         instances = self.service.get_instances()
210 |         if not instances:
211 |             print "No running instances. Aborting."
212 |             return
213 | 
214 |         table = PrettyTable()
215 |         table.set_field_names(("", "ROLE", "INSTANCE ID", "PUBLIC IP", "PRIVATE IP"))
216 | 
217 |         for instance in instances:
218 |             table.add_row((len(table.rows)+1, 
219 |                            instance.role,
220 |                            instance.id, 
221 |                            instance.public_dns_name, 
222 |                            instance.private_dns_name))
223 | 
224 |         table.printt()
225 | 
226 |         while True:
227 |             try:
228 |                 choice = raw_input("Instance to login to [Enter = quit]: ")
229 |                 if choice == "":
230 |                     sys.exit(0)
231 |                 choice = int(choice)
232 |                 if choice > 0 and choice <= len(table.rows):
233 |                     instance = instances[choice-1]
234 |                     self.service.login(instance, options_dict.get('ssh_options'))
235 |                     break
236 |                 else:
237 |                     print "Not a valid choice. Try again."
238 |             except ValueError:
239 |                 print "Not a valid choice. Try again."
240 | 
241 |     def transfer_files(self, argv, options_dict):
242 |         opt, args = self.parse_options(self._command_name, argv, expected_arguments=['FILE_NAME*'], unbounded_args=True)
243 |         result = self.service.transfer_files(args, options_dict.get('ssh_options'))
244 | 
245 |         table = PrettyTable()
246 |         table.set_field_names(("INSTANCE ID", "PUBLIC IP", "PRIVATE IP", "FILE NAME", "RESULT"))
247 |         for instance, file, retcode in result:
248 |             table.add_row((instance.id,
249 |                            instance.public_dns_name,
250 |                            instance.private_dns_name,
251 |                            file,
252 |                            retcode
253 |                            ))
254 |         table.printt()
255 | 
256 |     def run_command(self, argv, options_dict):
257 |         opt, args = self.parse_options(self._command_name, argv, expected_arguments=['COMMAND'])
258 |         result = self.service.run_command(args[0], options_dict.get('ssh_options'))
259 | 
260 |         table = PrettyTable()
261 |         table.set_field_names(("INSTANCE ID", "PUBLIC IP", "PRIVATE IP", "RESULT"))
262 |         for instance, retcode in result:
263 |             table.add_row((instance.id,
264 |                            instance.public_dns_name,
265 |                            instance.private_dns_name,
266 |                            retcode
267 |                            ))
268 |         table.printt()
269 | 
270 | 
271 | 
272 | class ServicePlugin(object):
273 |     def __init__(self, cluster=None):
274 |         self.cluster = cluster
275 |         self.logger = logging #logging.getLogger(self.__class__.__name__)
276 | 
277 |     def get_roles(self):
278 |         """
279 |         Returns a list of role identifiers for this service type.
280 |         """
281 |         raise RuntimeError("Not implemented.")
282 | 
283 |     def get_instances(self):
284 |         """
285 |         Returns a list of running Instance objects from the cluster
286 | 
287 |         self.cluster.get_instances_in_role(ROLE, "running")
288 |         """
289 |         raise RuntimeError("Not implemented.")
290 | 
291 |     def launch_cluster(self):
292 |         raise RuntimeError("Not implemented.")
293 |         
294 |     def terminate_cluster(self):
295 |         """
296 |         Terminates all instances in the cluster
297 |         """
298 |         # TODO: Clear all tags
299 |         self.logger.info("Terminating cluster")
300 |         self.cluster.terminate()
301 | 
302 |     def get_storage(self):
303 |         return self.cluster.get_storage()
304 |     
305 |     def print_storage_status(self):
306 |         storage = self.get_storage()
307 |         if not os.path.isfile(storage._get_storage_filename()):
308 |             storage.print_status(volumes=self._get_cluster_volumes(storage))
309 |         else:
310 |             storage.print_status()
311 | 
312 |     def _get_standard_ssh_command(self, instance, ssh_options, remote_command=None):
313 |         """
314 |         Returns the complete SSH command ready for execution on the instance.
315 |         """
316 |         cmd = "ssh %s %s" % (xstr(ssh_options), instance.public_dns_name)
317 |         
318 |         if remote_command is not None:
319 |             cmd += " '%s'" % remote_command
320 | 
321 |         return cmd
322 | 
323 |     def _attach_storage(self, roles):
324 |         storage = self.cluster.get_storage()
325 |         if storage.has_any_storage(roles):
326 |             print "Waiting 10 seconds before attaching storage"
327 |             time.sleep(10)
328 |             for role in roles:
329 |                 storage.attach(role, self.cluster.get_instances_in_role(role, 'running'))
330 |             storage.print_status(roles)
331 | 
332 |     def _launch_instances(self, instance_template, exclude_roles=[]):
333 |         it = instance_template
334 |         user_data_file_template = it.user_data_file_template
335 |         
336 |         if it.user_data_file_template == None:
337 |             user_data_file_template = self._get_default_user_data_file_template()
338 | 
339 |         ebs_mappings = []
340 |         storage = self.cluster.get_storage()
341 |         for role in it.roles:
342 |             if role in exclude_roles:
343 |                 continue 
344 |             if storage.has_any_storage((role,)):
345 |                 ebs_mappings.append(storage.get_mappings_string_for_role(role))
346 | 
347 |         replacements = {
348 |             "%ENV%": build_env_string(it.env_strings, {
349 |                 "ROLES": ",".join(it.roles),
350 |                 "USER_PACKAGES": it.user_packages,
351 |                 "AUTO_SHUTDOWN": it.auto_shutdown,
352 |                 "EBS_MAPPINGS": ";".join(ebs_mappings),
353 |             })
354 |         }
355 |         self.logger.debug("EBS Mappings: %s" % ";".join(ebs_mappings))
356 |         instance_user_data = InstanceUserData(user_data_file_template, replacements)
357 | 
358 |         self.logger.debug("InstanceUserData gzipped length: %d" % len(instance_user_data.read_as_gzip_stream()))
359 | 
360 |         instance_ids = self.cluster.launch_instances(it.roles, 
361 |                                                      it.number, 
362 |                                                      it.image_id,
363 |                                                      it.size_id,
364 |                                                      instance_user_data,
365 |                                                      key_name=it.key_name,
366 |                                                      public_key=it.public_key,
367 |                                                      placement=it.placement,
368 |                                                      security_groups=it.security_groups, 
369 |                                                      spot_config=it.spot_config)
370 | 
371 |         self.logger.debug("Instance ids reported to start: %s" % str(instance_ids))
372 |         return instance_ids
373 | 
374 |     def delete_storage(self, force=False):
375 |         storage = self.cluster.get_storage()
376 |         self._print_storage_status(storage)
377 |         if not force and not self._prompt("Delete all storage volumes? THIS WILL \
378 |     PERMANENTLY DELETE ALL DATA"):
379 |             print "Not deleting storage volumes."
380 |         else:
381 |             print "Deleting storage"
382 |             storage.delete(storage.get_roles())
383 |     
384 |     def create_storage(self, role, number_of_instances, availability_zone, spec_file):
385 |         storage = self.get_storage()
386 |         storage.create(role, number_of_instances, availability_zone, spec_file)
387 | 
388 |     def run_command(self, command, ssh_options):
389 |         instances = self.get_instances()
390 |         ssh_commands = [self._get_standard_ssh_command(instance, ssh_options=ssh_options, remote_command=command)
391 |                     for instance in instances]
392 |         procs = [subprocess.Popen(ssh_command, shell=True) for ssh_command in ssh_commands]
393 |         retcodes = [proc.wait() for proc in procs]
394 |         return zip(instances, retcodes)
395 | 
396 |     def _get_transfer_command(self, instance, file_name, ssh_options):
397 |         transfer_command = "scp %s %s %s:" % (xstr(ssh_options), file_name, instance.public_dns_name)
398 | #        transfer_command = self._get_standard_ssh_command(instance, ssh_options, "cat > %s" % file_name) + " < %s" % file_name
399 |         self.logger.debug("Transfer command: %s" % transfer_command)
400 |         return transfer_command
401 | 
402 |     def transfer_files(self, file_names, ssh_options):
403 |         instances = self.get_instances()
404 |         operations = list(itertools.product(instances, file_names))
405 |         ssh_commands = [self._get_transfer_command(instance, file_name, ssh_options) for instance, file_name in
406 |                         operations]
407 |         procs = [subprocess.Popen(ssh_command, shell=True) for ssh_command in ssh_commands]
408 |         retcodes = [proc.wait() for proc in procs]
409 |         return [(operation[0], operation[1], retcode) for operation, retcode in zip(operations, retcodes)]
410 | 
411 |     def login(self, instance, ssh_options):
412 |         ssh_command = self._get_standard_ssh_command(instance, ssh_options)
413 |         subprocess.call(ssh_command, shell=True)
414 |     
415 | 


--------------------------------------------------------------------------------
/cloud/providers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.


--------------------------------------------------------------------------------
/cloud/service.py:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """
17 | Classes for running services on a cluster.
18 | """
19 | 
20 | from __future__ import with_statement
21 | 
22 | from cloud.settings import SERVICE_PROVIDER_MAP
23 | from cloud.cluster import get_cluster
24 | from cloud.cluster import InstanceUserData
25 | from cloud.cluster import TimeoutException
26 | from cloud.providers.ec2 import Ec2Storage
27 | from cloud.util import build_env_string
28 | from cloud.util import url_get
29 | from cloud.util import xstr
30 | from prettytable import PrettyTable
31 | from datetime import datetime
32 | import logging
33 | import types
34 | import os
35 | import re
36 | import socket
37 | import subprocess
38 | import sys
39 | import time
40 | import tempfile
41 | import simplejson
42 | 
43 | logger = logging.getLogger(__name__) 
44 | 
45 | class InstanceTemplate(object):
46 |   """
47 |   A template for creating server instances in a cluster.
48 |   """
49 |   def __init__(self, roles, number, image_id, size_id,
50 |                      key_name, public_key,
51 |                      user_data_file_template=None, placement=None,
52 |                      user_packages=None, auto_shutdown=None, env_strings=[],
53 |                      security_groups=[], spot_config=None):
54 |     self.roles = roles
55 |     self.number = number
56 |     self.image_id = image_id
57 |     self.size_id = size_id
58 |     self.key_name = key_name
59 |     self.public_key = public_key
60 |     self.user_data_file_template = user_data_file_template
61 |     self.placement = placement
62 |     self.user_packages = user_packages
63 |     self.auto_shutdown = auto_shutdown
64 |     self.env_strings = env_strings
65 |     self.security_groups = security_groups
66 |     self.spot_config = spot_config
67 | 
68 |     t = type(self.security_groups)
69 |     if t is types.NoneType:
70 |         self.security_groups = []
71 |     elif t is types.StringType:
72 |         self.security_groups = [security_groups]
73 | 
74 |   def add_env_strings(self, env_strings):
75 |     new_env_strings = list(self.env_strings or [])
76 |     new_env_strings.extend(env_strings)
77 |     self.env_strings = new_env_strings
78 | 
79 | def get_service(service, provider):
80 |     """
81 |     Retrieve the Service class for a service and provider.
82 |     """
83 |     mod_name, service_classname = SERVICE_PROVIDER_MAP[service][provider]
84 |     _mod = __import__(mod_name, globals(), locals(), [service_classname])
85 |     return getattr(_mod, service_classname)
86 | 


--------------------------------------------------------------------------------
/cloud/settings.py:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | SERVICE_PROVIDER_MAP = {
17 |   "cassandra": {
18 |     "ec2": ('cassandra.service', 'CassandraService')
19 |   },
20 |   "hadoop": {
21 |     "ec2": ('hadoop.service', 'HadoopService'),
22 |     "ec2_spot": ('hadoop.service', 'HadoopService'),
23 |   },
24 |   "hadoop_cassandra_hybrid": {
25 |     "ec2": ('hadoop_cassandra_hybrid.service', 'HadoopCassandraHybridService')
26 |   },
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/cloud/storage.py:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one or more
  2 | # contributor license agreements.  See the NOTICE file distributed with
  3 | # this work for additional information regarding copyright ownership.
  4 | # The ASF licenses this file to You under the Apache License, Version 2.0
  5 | # (the "License"); you may not use this file except in compliance with
  6 | # the License.  You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """
 17 | Classes for controlling external cluster storage.
 18 | """
 19 | 
 20 | import logging
 21 | import sys
 22 | import simplejson as json
 23 | 
 24 | logger = logging.getLogger(__name__)
 25 | 
 26 | class VolumeSpec(object):
 27 |   """
 28 |   The specification for a storage volume, encapsulating all the information
 29 |   needed to create a volume and ultimately mount it on an instance.
 30 |   """
 31 |   def __init__(self, size, mount_point, device, snapshot_id):
 32 |     self.size = size
 33 |     self.mount_point = mount_point
 34 |     self.device = device
 35 |     self.snapshot_id = snapshot_id
 36 | 
 37 | 
 38 | class JsonVolumeSpecManager(object):
 39 |   """
 40 |   A container for VolumeSpecs. This object can read VolumeSpecs specified in
 41 |   JSON.
 42 |   """
 43 |   def __init__(self, spec_file):
 44 |     self.spec = json.load(spec_file)
 45 | 
 46 |   def volume_specs_for_role(self, role):
 47 |     return [VolumeSpec(d["size_gb"], d["mount_point"], d["device"],
 48 |                        d["snapshot_id"]) for d in self.spec[role]]
 49 | 
 50 |   def get_mappings_string_for_role(self, role):
 51 |     """
 52 |     Returns a short string of the form
 53 |     "role,mount_point1,device1;role,mount_point2,device2;..."
 54 |     which is useful for passing as an environment variable.
 55 |     """
 56 |     return ";".join(["%s,%s,%s" % (role, d["mount_point"], d["device"])
 57 |                      for d in self.spec[role]])
 58 | 
 59 | 
 60 | class MountableVolume(object):
 61 |   """
 62 |   A storage volume that has been created. It may or may not have been attached
 63 |   or mounted to an instance.
 64 |   """
 65 |   def __init__(self, volume_id, mount_point, device):
 66 |     self.volume_id = volume_id
 67 |     self.mount_point = mount_point
 68 |     self.device = device
 69 | 
 70 | 
 71 | class JsonVolumeManager(object):
 72 | 
 73 |   def __init__(self, filename):
 74 |     self.filename = filename
 75 | 
 76 |   def _load(self):
 77 |     try:
 78 |       return json.load(open(self.filename, "r"))
 79 |     except IOError:
 80 |       logger.debug("File %s does not exist.", self.filename)
 81 |       return {}
 82 | 
 83 |   def _store(self, obj):
 84 |     return json.dump(obj, open(self.filename, "w"), sort_keys=True, indent=2)
 85 |   
 86 |   def get_roles(self):
 87 |     json_dict = self._load()
 88 |     return json_dict.keys()
 89 | 
 90 |   def add_instance_storage_for_role(self, role, mountable_volumes):
 91 |     json_dict = self._load()
 92 |     mv_dicts = [mv.__dict__ for mv in mountable_volumes]
 93 |     json_dict.setdefault(role, []).append(mv_dicts)
 94 |     self._store(json_dict)
 95 | 
 96 |   def remove_instance_storage_for_role(self, role):
 97 |     json_dict = self._load()
 98 |     del json_dict[role]
 99 |     self._store(json_dict)
100 | 
101 |   def get_instance_storage_for_role(self, role):
102 |     """
103 |     Returns a list of lists of MountableVolume objects. Each nested list is
104 |     the storage for one instance.
105 |     """
106 |     try:
107 |       json_dict = self._load()
108 |       instance_storage = []
109 |       for instance in json_dict[role]:
110 |         vols = []
111 |         for vol in instance:
112 |           vols.append(MountableVolume(vol["volume_id"], vol["mount_point"],
113 |                                       vol["device"]))
114 |         instance_storage.append(vols)
115 |       return instance_storage
116 |     except KeyError:
117 |       return []
118 | 
119 | class Storage(object):
120 |   """
121 |   Storage volumes for a cluster. The storage is associated with a named
122 |   cluster. Many clusters just have local storage, in which case this is
123 |   not used.
124 |   """
125 | 
126 |   def __init__(self, cluster):
127 |     self.cluster = cluster
128 | 
129 |   def create(self, role, number_of_instances, availability_zone, spec_filename):
130 |     """
131 |     Create new storage volumes for instances with the given role, according to
132 |     the mapping defined in the spec file.
133 |     """
134 |     pass
135 | 
136 |   def get_mappings_string_for_role(self, role):
137 |     """
138 |     Returns a short string of the form
139 |     "mount_point1,device1;mount_point2,device2;..."
140 |     which is useful for passing as an environment variable.
141 |     """
142 |     raise Exception("Unimplemented")
143 | 
144 |   def has_any_storage(self, roles):
145 |     """
146 |     Return True if any of the given roles has associated storage
147 |     """
148 |     return False
149 | 
150 |   def get_roles(self):
151 |     """
152 |     Return a list of roles that have storage defined.
153 |     """
154 |     return []
155 | 
156 |   def print_status(self, roles=None):
157 |     """
158 |     Print the status of storage volumes for the given roles.
159 |     """
160 |     pass
161 | 
162 |   def attach(self, role, instances):
163 |     """
164 |     Attach volumes for a role to instances. Some volumes may already be
165 |     attached, in which case they are ignored, and we take care not to attach
166 |     multiple volumes to an instance.
167 |     """
168 |     pass
169 | 
170 |   def delete(self, roles=[]):
171 |     """
172 |     Permanently delete all the storage for the given roles.
173 |     """
174 |     pass
175 | 


--------------------------------------------------------------------------------
/cloud/util.py:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one or more
  2 | # contributor license agreements.  See the NOTICE file distributed with
  3 | # this work for additional information regarding copyright ownership.
  4 | # The ASF licenses this file to You under the Apache License, Version 2.0
  5 | # (the "License"); you may not use this file except in compliance with
  6 | # the License.  You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """
 17 | Utility functions.
 18 | """
 19 | 
 20 | import os
 21 | import csv
 22 | import time
 23 | import ConfigParser
 24 | import socket
 25 | import urllib2
 26 | import paramiko
 27 | import logging
 28 | 
 29 | from subprocess import Popen, PIPE, CalledProcessError
 30 | from boto.ec2 import regions as EC2Regions
 31 | from fabric.api import *
 32 | 
 33 | FULL_HIDE = hide("running", "stdout", "stderr", "warnings")
 34 | 
 35 | def get_ec2_connection(regionName):
 36 |     for region in EC2Regions():
 37 |         if region.name == regionName:
 38 |             return region.connect()
 39 | 
 40 |     raise RuntimeError("Unknown region name: %s" % regionName)
 41 | 
 42 | def bash_quote(text):
 43 |   """Quotes a string for bash, by using single quotes."""
 44 |   if text == None:
 45 |     return ""
 46 |   return "'%s'" % text.replace("'", "'\\''")
 47 | 
 48 | def bash_quote_env(env):
 49 |   """Quotes the value in an environment variable assignment."""
 50 |   if env.find("=") == -1:
 51 |     return env
 52 |   (var, value) = env.split("=", 1)
 53 |   return "%s=%s" % (var, bash_quote(value))
 54 | 
 55 | def build_env_string(env_strings=[], pairs={}):
 56 |   """Build a bash environment variable assignment"""
 57 |   env = ''
 58 |   if env_strings:
 59 |     for env_string in env_strings:
 60 |       env += "%s " % bash_quote_env(env_string)
 61 |   if pairs:
 62 |     for key, val in pairs.items():
 63 |       env += "%s=%s " % (key, bash_quote(val))
 64 |   return env[:-1]
 65 | 
 66 | def get_all_cluster_names_from_config_file(config):
 67 |   return config.sections()
 68 | 
 69 | def merge_config_with_options(section_name, config, options):
 70 |   """
 71 |   Merge configuration options with a dictionary of options.
 72 |   Keys in the options dictionary take precedence.
 73 |   """
 74 |   res = {}
 75 |   try:
 76 |     for (key, value) in config.items(section_name):
 77 |       if value.find("\n") != -1:
 78 |         res[key] = value.split("\n")
 79 |       else:
 80 |         res[key] = value
 81 |   except ConfigParser.NoSectionError:
 82 |     pass
 83 |   except ValueError, e:
 84 |     # incomplete format error usually means you forgot
 85 |     # to include the type for interpolation
 86 |     if "incomplete format" in e.message:
 87 |        msg = "Section '%s'. Double check that your formatting " \
 88 |              "contains the format type after the closing parantheses. " \
 89 |              "Example: %%(foo)s" % section_name
 90 |        raise ConfigParser.InterpolationError(options, section_name, msg)
 91 | 
 92 |   for key in options:
 93 |     if options[key] != None:
 94 |       res[key] = options[key]
 95 |   return res
 96 | 
 97 | def url_get(url, timeout=10, retries=0):
 98 |   """
 99 |   Retrieve content from the given URL.
100 |   """
101 |    # in Python 2.6 we can pass timeout to urllib2.urlopen
102 |   socket.setdefaulttimeout(timeout)
103 |   attempts = 0
104 |   while True:
105 |     try:
106 |       return urllib2.urlopen(url).read()
107 |     except urllib2.URLError:
108 |       attempts = attempts + 1
109 |       if attempts > retries:
110 |         raise
111 | 
112 | def xstr(string):
113 |   """Sane string conversion: return an empty string if string is None."""
114 |   return '' if string is None else str(string)
115 | 
116 | def check_output(*popenargs, **kwargs):
117 |   r"""Run command with arguments and return its output as a byte string.
118 | 
119 |   If the exit code was non-zero it raises a CalledProcessError.  The
120 |   CalledProcessError object will have the return code in the returncode
121 |   attribute and output in the output attribute.
122 | 
123 |   The arguments are the same as for the Popen constructor.  Example:
124 | 
125 |   >>> check_output(["ls", "-l", "/dev/null"])
126 |   'crw-rw-rw- 1 root root 1, 3 Oct 18  2007 /dev/null\n'
127 | 
128 |   The stdout argument is not allowed as it is used internally.
129 |   To capture standard error in the result, use stderr=STDOUT.
130 | 
131 |   >>> check_output(["/bin/sh", "-c",
132 |   ...               "ls -l non_existent_file ; exit 0"],
133 |   ...              stderr=STDOUT)
134 |   'ls: non_existent_file: No such file or directory\n'
135 | 
136 |   NOTE: copied from 2.7 standard library so that we maintain our compatibility with 2.5
137 |   """
138 |   if 'stdout' in kwargs:
139 |       raise ValueError('stdout argument not allowed, it will be overridden.')
140 |   process = Popen(stdout=PIPE, *popenargs, **kwargs)
141 |   output, unused_err = process.communicate()
142 |   retcode = process.poll()
143 |   if retcode:
144 |       cmd = kwargs.get("args")
145 |       if cmd is None:
146 |           cmd = popenargs[0]
147 |       raise CalledProcessError(retcode, cmd)
148 |   return output
149 | 
150 | def log_cluster_action(config_dir, cluster_name, command, number,
151 | instance_type=None, provider=None, plugin=None):
152 |     """Log details of cluster launching or termination to a csv file.
153 |     """
154 | 
155 |     csv_file = open(os.path.join(config_dir, "launch_log.csv"), "a+b")
156 |     csv_log = csv.writer(csv_file)
157 |     csv_log.writerow([cluster_name, command, number, instance_type, provider, plugin, time.strftime("%Y-%m-%d %H:%M:%S %Z")])
158 |     csv_file.close()
159 | 
160 | def ssh_available(user, private_key, host, port=22, timeout=10):
161 |     client = paramiko.SSHClient()
162 | 
163 |     # Load known host keys (e.g. ~/.ssh/known_hosts) unless user says not to.
164 |     if not env.disable_known_hosts:
165 |         client.load_system_host_keys()
166 |     # Unless user specified not to, accept/add new, unknown host keys
167 |     if not env.reject_unknown_hosts:
168 |         client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
169 | 
170 |     try:
171 |         client.connect(
172 |             hostname=host,
173 |             port=port,
174 |             username=user,
175 |             key_filename=private_key,
176 |             timeout=timeout,
177 |             allow_agent=not env.no_agent,
178 |             look_for_keys=not env.no_keys
179 |         )
180 |         return True
181 |     except Exception, e:
182 |         logging.warn(e)
183 |         return False
184 | 
185 | def exec_command(cmd, **kwargs):
186 |     c = sudo if use_sudo() else run
187 |     return c(cmd, **kwargs)
188 | 
189 | def use_sudo():
190 |     return env.user != "root"
191 | 


--------------------------------------------------------------------------------
/example_scripts/cassandra-ec2-init-remote.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -x
  2 | 
  3 | # Licensed to the Apache Software Foundation (ASF) under one or more
  4 | # contributor license agreements.  See the NOTICE file distributed with
  5 | # this work for additional information regarding copyright ownership.
  6 | # The ASF licenses this file to You under the Apache License, Version 2.0
  7 | # (the "License"); you may not use this file except in compliance with
  8 | # the License.  You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | ################################################################################
 19 | # Script that is run on each EC2 instance on boot. It is passed in the EC2 user
 20 | # data, so should not exceed 16K in size after gzip compression.
 21 | #
 22 | # This script is executed by /etc/init.d/ec2-run-user-data, and output is
 23 | # logged to /var/log/messages.
 24 | ################################################################################
 25 | 
 26 | set -e -x
 27 | 
 28 | ################################################################################
 29 | # Initialize variables
 30 | ################################################################################
 31 | 
 32 | # Substitute environment variables passed by the client
 33 | export %ENV%
 34 | 
 35 | # Write environment variables to /root/.bash_profile
 36 | echo "export %ENV%" >> ~root/.bash_profile
 37 | echo "export %ENV%" >> ~root/.bashrc
 38 | 
 39 | DEFAULT_CASSANDRA_URL="http://mirror.cloudera.com/apache/cassandra/0.6.4/apache-cassandra-0.6.4-bin.tar.gz"
 40 | PUBLIC_HOSTNAME=`wget -q -O - http://169.254.169.254/latest/meta-data/public-hostname`
 41 | CASSANDRA_HOME_ALIAS=/usr/local/apache-cassandra
 42 | DEFAULT_JNA_URL="http://java.net/projects/jna/sources/svn/content/tags/3.2.7/jnalib/dist/jna.jar?rev=1182"
 43 | if [ -z INSTALL_JNA ]; then
 44 |     INSTALL_JNA=1
 45 | fi
 46 | if [ -z PUBLIC_JMX ]; then
 47 |     PUBLIC_JMX=0
 48 | fi
 49 | 
 50 | function install_jna() {
 51 |     if [ $INSTALL_JNA -eq 0 ]; then
 52 |         return
 53 |     fi
 54 |     curl="curl --retry 3 --silent --show-error --fail"
 55 |     if [ -z "$JNA_URL" ]; then
 56 |         JNA_URL=$DEFAULT_JNA_URL
 57 |     fi
 58 | 
 59 |     $curl -o "jna.jar" $DEFAULT_JNA_URL
 60 |     cp "jna.jar" $CASSANDRA_HOME_WITH_VERSION/lib
 61 |     rm -rf "jna.jar"
 62 | }
 63 | 
 64 | function install_cassandra() {
 65 | 
 66 |     curl="curl --retry 3 --silent --show-error --fail"
 67 |     if [ ! -z "$CASSANDRA_URL" ]; then
 68 |         DEFAULT_CASSANDRA_URL=$CASSANDRA_URL
 69 |     fi
 70 | 
 71 |     cassandra_tar_file=`basename $DEFAULT_CASSANDRA_URL`
 72 |     $curl -O $DEFAULT_CASSANDRA_URL
 73 |     
 74 |     tar zxf $cassandra_tar_file -C /usr/local
 75 |     rm -f $cassandra_tar_file
 76 | 
 77 |     CASSANDRA_HOME_WITH_VERSION=/usr/local/`ls -1 /usr/local | grep cassandra`
 78 | 
 79 |     echo "export CASSANDRA_HOME=$CASSANDRA_HOME_ALIAS" >> ~root/.bash_profile
 80 |     echo 'export PATH=$CASSANDRA_HOME/bin:$PATH' >> ~root/.bash_profile
 81 | 
 82 |     install_jna
 83 | }
 84 | 
 85 | function wait_for_mount {
 86 |   mount=$1
 87 |   device=$2
 88 | 
 89 |   mkdir -p $mount
 90 | 
 91 |   i=1
 92 |   echo "Attempting to mount $device"
 93 |   while true ; do
 94 |     sleep 10
 95 |     echo -n "$i "
 96 |     i=$[$i+1]
 97 |     mount -o defaults,noatime $device $mount || continue
 98 |     echo " Mounted."
 99 |     break;
100 |   done
101 | 
102 |   if [ -e $mount/lost+found ]; then
103 |     rm -rf $mount/lost+found
104 |   fi
105 | }
106 | 
107 | function configure_cassandra() {
108 |   if [ -n "$EBS_MAPPINGS" ]; then
109 |     # EBS_MAPPINGS is like "cn,/ebs1,/dev/sdj;cn,/ebs2,/dev/sdk"
110 |     # EBS_MAPPINGS is like "ROLE,MOUNT_POINT,DEVICE;ROLE,MOUNT_POINT,DEVICE"
111 |     for mapping in $(echo "$EBS_MAPPINGS" | tr ";" "\n"); do
112 |       role=`echo $mapping | cut -d, -f1`
113 |       mount=`echo $mapping | cut -d, -f2`
114 |       device=`echo $mapping | cut -d, -f3`
115 |       wait_for_mount $mount $device
116 |     done
117 |   fi
118 | 
119 |     if [ -f "$CASSANDRA_HOME_WITH_VERSION/conf/cassandra-env.sh" ]
120 |     then
121 |         # for cassandra 0.7.x we need to set the MAX_HEAP_SIZE env
122 |         # variable so that it can be used in cassandra-env.sh on
123 |         # startup
124 |         if [ -z "$MAX_HEAP_SIZE" ]
125 |         then
126 |             JVM_OPTS="-XX:+PrintGCApplicationStoppedTime -XX:HeapDumpPath=/mnt"
127 |             if [ $PUBLIC_JMX -gt 0 ]; then
128 |                 JVM_OPTS="$JVM_OPTS -Djava.rmi.server.hostname="$PUBLIC_HOSTNAME
129 |             fi
130 |             INSTANCE_TYPE=`wget -q -O - http://169.254.169.254/latest/meta-data/instance-type`
131 |             case $INSTANCE_TYPE in
132 |             m1.xlarge|m2.xlarge)
133 |                 MAX_HEAP_SIZE="10G"
134 |                 ;;
135 |             m1.large|c1.xlarge)
136 |                 MAX_HEAP_SIZE="5G"
137 |                 ;;
138 |             *)
139 |                 # Don't set it and let cassandra-env figure it out
140 |                 ;;
141 |             esac
142 | 
143 |             # write it to the profile
144 |             echo "export MAX_HEAP_SIZE=$MAX_HEAP_SIZE" >> ~root/.bash_profile
145 |             echo "export MAX_HEAP_SIZE=$MAX_HEAP_SIZE" >> ~root/.bashrc
146 |             echo "export JVM_OPTS=\"$JVM_OPTS\"" >> ~root/.bash_profile
147 |             echo "export JVM_OPTS=\"$JVM_OPTS\"" >> ~root/.bashrc
148 |         fi
149 |     else
150 |         write_cassandra_in_sh_file
151 |     fi
152 | }
153 | 
154 | function write_cassandra_in_sh_file {
155 |   # for cassandra 0.6.x memory settings
156 | 
157 |   # configure the cassandra.in.sh script based on instance type
158 |   INSTANCE_TYPE=`wget -q -O - http://169.254.169.254/latest/meta-data/instance-type`
159 |   SETTINGS_FILE=$CASSANDRA_HOME_WITH_VERSION/bin/cassandra.in.sh
160 | 
161 |   cat > $SETTINGS_FILE <<EOF
162 | # Licensed to the Apache Software Foundation (ASF) under one
163 | # or more contributor license agreements.  See the NOTICE file
164 | # distributed with this work for additional information
165 | # regarding copyright ownership.  The ASF licenses this file
166 | # to you under the Apache License, Version 2.0 (the
167 | # "License"); you may not use this file except in compliance
168 | # with the License.  You may obtain a copy of the License at
169 | #
170 | #     http://www.apache.org/licenses/LICENSE-2.0
171 | #
172 | # Unless required by applicable law or agreed to in writing, software
173 | # distributed under the License is distributed on an "AS IS" BASIS,
174 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
175 | # See the License for the specific language governing permissions and
176 | # limitations under the License.
177 | 
178 | cassandra_home=$CASSANDRA_HOME_ALIAS
179 | 
180 | # The directory where Cassandra's configs live (required)
181 | CASSANDRA_CONF=\$cassandra_home/conf
182 | 
183 | # This can be the path to a jar file, or a directory containing the 
184 | # compiled classes. NOTE: This isn't needed by the startup script,
185 | # it's just used here in constructing the classpath.
186 | cassandra_bin=\$cassandra_home/build/classes
187 | #cassandra_bin=\$cassandra_home/build/cassandra.jar
188 | 
189 | # JAVA_HOME can optionally be set here
190 | #JAVA_HOME=/usr/local/jdk6
191 | 
192 | # The java classpath (required)
193 | CLASSPATH=\$CASSANDRA_CONF:\$cassandra_bin
194 | 
195 | for jar in \$cassandra_home/lib/*.jar; do
196 |     CLASSPATH=\$CLASSPATH:\$jar
197 | done
198 | 
199 | EOF
200 | 
201 |   case $INSTANCE_TYPE in
202 |   m1.xlarge|m2.xlarge)
203 |     cat >> $SETTINGS_FILE <<EOF
204 | # Arguments to pass to the JVM
205 | JVM_OPTS="-ea -Xms10G -Xmx10G -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=1 -XX:+HeapDumpOnOutOfMemoryError -Dcom.sun.management.jmxremote.port=8080 -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Djava.rmi.server.hostname="$HOSTNAME
206 | EOF
207 |     ;;
208 |   m1.large|c1.xlarge)
209 |     cat >> $SETTINGS_FILE <<EOF
210 | # Arguments to pass to the JVM
211 | JVM_OPTS="-ea -Xms5G -Xmx5G -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=1 -XX:+HeapDumpOnOutOfMemoryError -Dcom.sun.management.jmxremote.port=8080 -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Djava.rmi.server.hostname="$HOSTNAME
212 | EOF
213 |     ;;
214 |   *)
215 |     cat >> $SETTINGS_FILE <<EOF
216 | # Arguments to pass to the JVM
217 | JVM_OPTS="-ea -Xms256M -Xmx1G -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=1 -XX:+HeapDumpOnOutOfMemoryError -Dcom.sun.management.jmxremote.port=8080 -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Djava.rmi.server.hostname="$HOSTNAME
218 | EOF
219 |     ;;
220 |   esac
221 | }
222 | 
223 | function finish_cassandra {
224 |     # symlink the actual cassandra directory to a more standard one (without version)
225 |     # this is very important so the cluster can be configured more easily after the 
226 |     # machines start and services started/stopped
227 |     #
228 |     # NOTE: Stratus also looks for this aliased directory to know when cassandra
229 |     # is ready to be started
230 |     ln -s $CASSANDRA_HOME_WITH_VERSION $CASSANDRA_HOME_ALIAS
231 | }
232 | 
233 | install_cassandra
234 | configure_cassandra
235 | finish_cassandra
236 | 


--------------------------------------------------------------------------------
/example_scripts/hadoop-cassandra-hybrid-ec2-init-remote.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -x
  2 | 
  3 | # Licensed to the Apache Software Foundation (ASF) under one or more
  4 | # contributor license agreements.  See the NOTICE file distributed with
  5 | # this work for additional information regarding copyright ownership.
  6 | # The ASF licenses this file to You under the Apache License, Version 2.0
  7 | # (the "License"); you may not use this file except in compliance with
  8 | # the License.  You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | ################################################################################
 19 | # Script that is run on each EC2 instance on boot. It is passed in the EC2 user
 20 | # data, so should not exceed 16K in size after gzip compression.
 21 | #
 22 | # This script is executed by /etc/init.d/ec2-run-user-data, and output is
 23 | # logged to /var/log/messages.
 24 | ################################################################################
 25 | 
 26 | ################################################################################
 27 | # Initialize variables
 28 | ################################################################################
 29 | 
 30 | # Substitute environment variables passed by the client
 31 | export %ENV%
 32 | 
 33 | echo "export %ENV%" >> ~root/.bash_profile
 34 | echo "export %ENV%" >> ~root/.bashrc
 35 | 
 36 | DEFAULT_CASSANDRA_URL="http://mirror.cloudera.com/apache/cassandra/0.6.4/apache-cassandra-0.6.4-bin.tar.gz"
 37 | CASSANDRA_HOME_ALIAS=/usr/local/apache-cassandra
 38 | 
 39 | HADOOP_VERSION=${HADOOP_VERSION:-0.20.1}
 40 | HADOOP_HOME=/usr/local/hadoop-$HADOOP_VERSION
 41 | HADOOP_CONF_DIR=$HADOOP_HOME/conf
 42 | 
 43 | PIG_VERSION=${PIG_VERSION:-0.7.0}
 44 | PIG_HOME=/usr/local/pig-$PIG_VERSION
 45 | PIG_CONF_DIR=$PIG_HOME/conf
 46 | 
 47 | SELF_HOST=`wget -q -O - http://169.254.169.254/latest/meta-data/public-hostname`
 48 | for role in $(echo "$ROLES" | tr "," "\n"); do
 49 |   case $role in
 50 |   hybrid_nn)
 51 |     HYBRID_NN_HOST=$SELF_HOST
 52 |     ;;
 53 |   hybrid_jt)
 54 |     HYBRID_JT_HOST=$SELF_HOST
 55 |     ;;
 56 |   esac
 57 | done
 58 | 
 59 | function register_auto_shutdown() {
 60 |   if [ ! -z "$AUTO_SHUTDOWN" ]; then
 61 |     shutdown -h +$AUTO_SHUTDOWN >/dev/null &
 62 |   fi
 63 | }
 64 | 
 65 | # Install a list of packages on debian or redhat as appropriate
 66 | function install_packages() {
 67 |   if which dpkg &> /dev/null; then
 68 |     apt-get update
 69 |     apt-get -y install $@
 70 |   elif which rpm &> /dev/null; then
 71 |     yum install -y $@
 72 |   else
 73 |     echo "No package manager found."
 74 |   fi
 75 | }
 76 | 
 77 | # Install any user packages specified in the USER_PACKAGES environment variable
 78 | function install_user_packages() {
 79 |   if [ ! -z "$USER_PACKAGES" ]; then
 80 |     install_packages $USER_PACKAGES
 81 |   fi
 82 | }
 83 | 
 84 | function install_yourkit() {
 85 | 	mkdir /mnt/yjp
 86 | 	YOURKIT_URL="http://www.yourkit.com/download/yjp-9.0.7-linux.tar.bz2"
 87 | 	curl="curl --retry 3 --silent --show-error --fail"
 88 | 	$curl -O $YOURKIT_URL
 89 | 	yourkit_tar_file=`basename $YOURKIT_URL`
 90 |     tar xjf $yourkit_tar_file -C /mnt/yjp
 91 |     rm -f $yourkit_tar_file
 92 |     chown -R hadoop /mnt/yjp
 93 | 	chgrp -R hadoop /mnt/yjp
 94 | }
 95 | 
 96 | function install_hadoop() {
 97 |   useradd hadoop
 98 | 
 99 |   hadoop_tar_url=http://s3.amazonaws.com/hadoop-releases/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
100 |   hadoop_tar_file=`basename $hadoop_tar_url`
101 |   hadoop_tar_md5_file=`basename $hadoop_tar_url.md5`
102 | 
103 |   curl="curl --retry 3 --silent --show-error --fail"
104 |   for i in `seq 1 3`;
105 |   do
106 |     $curl -O $hadoop_tar_url
107 |     $curl -O $hadoop_tar_url.md5
108 |     if md5sum -c $hadoop_tar_md5_file; then
109 |       break;
110 |     else
111 |       rm -f $hadoop_tar_file $hadoop_tar_md5_file
112 |     fi
113 |   done
114 | 
115 |   if [ ! -e $hadoop_tar_file ]; then
116 |     echo "Failed to download $hadoop_tar_url. Aborting."
117 |     exit 1
118 |   fi
119 | 
120 |   tar zxf $hadoop_tar_file -C /usr/local
121 |   rm -f $hadoop_tar_file $hadoop_tar_md5_file
122 | 
123 |   echo "export HADOOP_HOME=$HADOOP_HOME" >> ~root/.bashrc
124 |   echo 'export PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$PATH' >> ~root/.bashrc
125 | }
126 | 
127 | function install_pig()
128 | {
129 |   pig_tar_url=http://mirror.cloudera.com/apache/hadoop/pig/pig-$PIG_VERSION/pig-$PIG_VERSION.tar.gz
130 |   pig_tar_file=`basename $pig_tar_url`
131 | 
132 |   curl="curl --retry 3 --silent --show-error --fail"
133 |   for i in `seq 1 3`;
134 |   do
135 |     $curl -O $pig_tar_url
136 |   done
137 | 
138 |   if [ ! -e $pig_tar_file ]; then
139 |     echo "Failed to download $pig_tar_url. Pig will not be installed."
140 |   else
141 |     tar zxf $pig_tar_file -C /usr/local
142 |     rm -f $pig_tar_file
143 | 
144 |     if [ ! -e $HADOOP_CONF_DIR ]; then
145 |       echo "Hadoop must be installed.  Aborting."
146 |       exit 1
147 |     fi
148 | 
149 |     cp $HADOOP_CONF_DIR/*.xml $PIG_CONF_DIR/
150 | 
151 |     echo "export PIG_HOME=$PIG_HOME" >> ~root/.bashrc
152 |     echo 'export PATH=$JAVA_HOME/bin:$PIG_HOME/bin:$PATH' >> ~root/.bashrc
153 |  fi
154 | }
155 | 
156 | function prep_disk() {
157 |   mount=$1
158 |   device=$2
159 |   automount=${3:-false}
160 | 
161 |   echo "warning: ERASING CONTENTS OF $device"
162 |   mkfs.xfs -f $device
163 |   if [ ! -e $mount ]; then
164 |     mkdir $mount
165 |   fi
166 |   mount -o defaults,noatime $device $mount
167 |   if $automount ; then
168 |     echo "$device $mount xfs defaults,noatime 0 0" >> /etc/fstab
169 |   fi
170 | }
171 | 
172 | function wait_for_mount {
173 |   mount=$1
174 |   device=$2
175 | 
176 |   mkdir $mount
177 | 
178 |   i=1
179 |   echo "Attempting to mount $device"
180 |   while true ; do
181 |     sleep 10
182 |     echo -n "$i "
183 |     i=$[$i+1]
184 |     mount -o defaults,noatime $device $mount || continue
185 |     echo " Mounted."
186 |     break;
187 |   done
188 | }
189 | 
190 | function make_hadoop_dirs {
191 |   for mount in "$@"; do
192 |     if [ ! -e $mount/hadoop ]; then
193 |       mkdir -p $mount/hadoop
194 |       chown hadoop:hadoop $mount/hadoop
195 |     fi
196 |   done
197 | }
198 | 
199 | # Configure Hadoop by setting up disks and site file
200 | function configure_hadoop() {
201 | 
202 |   install_packages xfsprogs # needed for XFS
203 | 
204 |   INSTANCE_TYPE=`wget -q -O - http://169.254.169.254/latest/meta-data/instance-type`
205 | 
206 |   if [ -n "$EBS_MAPPINGS" ]; then
207 |     # EBS_MAPPINGS is like "hybrid_nn,/ebs1,/dev/sdj;hybrid_dn,/ebs2,/dev/sdk"
208 |     # EBS_MAPPINGS is like "ROLE,MOUNT_POINT,DEVICE;ROLE,MOUNT_POINT,DEVICE"
209 |     DFS_NAME_DIR=''
210 |     FS_CHECKPOINT_DIR=''
211 |     DFS_DATA_DIR=''
212 |     for mapping in $(echo "$EBS_MAPPINGS" | tr ";" "\n"); do
213 |       role=`echo $mapping | cut -d, -f1`
214 |       mount=`echo $mapping | cut -d, -f2`
215 |       device=`echo $mapping | cut -d, -f3`
216 |       wait_for_mount $mount $device
217 |       DFS_NAME_DIR=${DFS_NAME_DIR},"$mount/hadoop/hdfs/name"
218 |       FS_CHECKPOINT_DIR=${FS_CHECKPOINT_DIR},"$mount/hadoop/hdfs/secondary"
219 |       DFS_DATA_DIR=${DFS_DATA_DIR},"$mount/hadoop/hdfs/data"
220 |       FIRST_MOUNT=${FIRST_MOUNT-$mount}
221 |       make_hadoop_dirs $mount
222 |     done
223 |     # Remove leading commas
224 |     DFS_NAME_DIR=${DFS_NAME_DIR#?}
225 |     FS_CHECKPOINT_DIR=${FS_CHECKPOINT_DIR#?}
226 |     DFS_DATA_DIR=${DFS_DATA_DIR#?}
227 | 
228 |     DFS_REPLICATION=3 # EBS is internally replicated, but we also use HDFS replication for safety
229 |   else
230 |     case $INSTANCE_TYPE in
231 |     m1.xlarge|c1.xlarge)
232 |       DFS_NAME_DIR=/mnt/hadoop/hdfs/name,/mnt2/hadoop/hdfs/name
233 |       FS_CHECKPOINT_DIR=/mnt/hadoop/hdfs/secondary,/mnt2/hadoop/hdfs/secondary
234 |       DFS_DATA_DIR=/mnt/hadoop/hdfs/data,/mnt2/hadoop/hdfs/data,/mnt3/hadoop/hdfs/data,/mnt4/hadoop/hdfs/data
235 |       ;;
236 |     m1.large)
237 |       DFS_NAME_DIR=/mnt/hadoop/hdfs/name,/mnt2/hadoop/hdfs/name
238 |       FS_CHECKPOINT_DIR=/mnt/hadoop/hdfs/secondary,/mnt2/hadoop/hdfs/secondary
239 |       DFS_DATA_DIR=/mnt/hadoop/hdfs/data,/mnt2/hadoop/hdfs/data
240 |       ;;
241 |     *)
242 |       # "m1.small" or "c1.medium"
243 |       DFS_NAME_DIR=/mnt/hadoop/hdfs/name
244 |       FS_CHECKPOINT_DIR=/mnt/hadoop/hdfs/secondary
245 |       DFS_DATA_DIR=/mnt/hadoop/hdfs/data
246 |       ;;
247 |     esac
248 |     FIRST_MOUNT=/mnt
249 |     DFS_REPLICATION=3
250 |   fi
251 | 
252 |   case $INSTANCE_TYPE in
253 |   m1.xlarge|c1.xlarge)
254 |     prep_disk /mnt2 /dev/sdc true &
255 |     disk2_pid=$!
256 |     prep_disk /mnt3 /dev/sdd true &
257 |     disk3_pid=$!
258 |     prep_disk /mnt4 /dev/sde true &
259 |     disk4_pid=$!
260 |     wait $disk2_pid $disk3_pid $disk4_pid
261 |     MAPRED_LOCAL_DIR=/mnt/hadoop/mapred/local,/mnt2/hadoop/mapred/local,/mnt3/hadoop/mapred/local,/mnt4/hadoop/mapred/local
262 |     MAX_MAP_TASKS=8
263 |     MAX_REDUCE_TASKS=4
264 |     CHILD_OPTS=-Xmx680m
265 |     CHILD_ULIMIT=1392640
266 |     ;;
267 |   m1.large)
268 |     prep_disk /mnt2 /dev/sdc true
269 |     MAPRED_LOCAL_DIR=/mnt/hadoop/mapred/local,/mnt2/hadoop/mapred/local
270 |     MAX_MAP_TASKS=4
271 |     MAX_REDUCE_TASKS=2
272 |     CHILD_OPTS=-Xmx1024m
273 |     CHILD_ULIMIT=2097152
274 |     ;;
275 |   c1.medium)
276 |     MAPRED_LOCAL_DIR=/mnt/hadoop/mapred/local
277 |     MAX_MAP_TASKS=4
278 |     MAX_REDUCE_TASKS=2
279 |     CHILD_OPTS=-Xmx550m
280 |     CHILD_ULIMIT=1126400
281 |     ;;
282 |   *)
283 |     # "m1.small"
284 |     MAPRED_LOCAL_DIR=/mnt/hadoop/mapred/local
285 |     MAX_MAP_TASKS=2
286 |     MAX_REDUCE_TASKS=1
287 |     CHILD_OPTS=-Xmx550m
288 |     CHILD_ULIMIT=1126400
289 |     ;;
290 |   esac
291 | 
292 |   make_hadoop_dirs `ls -d /mnt*`
293 | 
294 |   # Create tmp directory
295 |   mkdir /mnt/tmp
296 |   chmod a+rwxt /mnt/tmp
297 |   
298 |   mkdir /etc/hadoop
299 |   ln -s $HADOOP_CONF_DIR /etc/hadoop/conf
300 | 
301 |   ##############################################################################
302 |   # Modify this section to customize your Hadoop cluster.
303 |   ##############################################################################
304 |   cat > $HADOOP_CONF_DIR/hadoop-site.xml <<EOF
305 | <?xml version="1.0"?>
306 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
307 | <configuration>
308 | <property>
309 |   <name>dfs.block.size</name>
310 |   <value>134217728</value>
311 |   <final>true</final>
312 | </property>
313 | <property>
314 |   <name>dfs.data.dir</name>
315 |   <value>$DFS_DATA_DIR</value>
316 |   <final>true</final>
317 | </property>
318 | <property>
319 |   <name>dfs.datanode.du.reserved</name>
320 |   <value>1073741824</value>
321 |   <final>true</final>
322 | </property>
323 | <property>
324 |   <name>dfs.datanode.handler.count</name>
325 |   <value>3</value>
326 |   <final>true</final>
327 | </property>
328 | <!--property>
329 |   <name>dfs.hosts</name>
330 |   <value>$HADOOP_CONF_DIR/dfs.hosts</value>
331 |   <final>true</final>
332 | </property-->
333 | <!--property>
334 |   <name>dfs.hosts.exclude</name>
335 |   <value>$HADOOP_CONF_DIR/dfs.hosts.exclude</value>
336 |   <final>true</final>
337 | </property-->
338 | <property>
339 |   <name>dfs.name.dir</name>
340 |   <value>$DFS_NAME_DIR</value>
341 |   <final>true</final>
342 | </property>
343 | <property>
344 |   <name>dfs.namenode.handler.count</name>
345 |   <value>5</value>
346 |   <final>true</final>
347 | </property>
348 | <property>
349 |   <name>dfs.permissions</name>
350 |   <value>true</value>
351 |   <final>true</final>
352 | </property>
353 | <property>
354 |   <name>dfs.replication</name>
355 |   <value>$DFS_REPLICATION</value>
356 | </property>
357 | <property>
358 |   <name>fs.checkpoint.dir</name>
359 |   <value>$FS_CHECKPOINT_DIR</value>
360 |   <final>true</final>
361 | </property>
362 | <property>
363 |   <name>fs.default.name</name>
364 |   <value>hdfs://$HYBRID_NN_HOST:8020/</value>
365 | </property>
366 | <property>
367 |   <name>fs.trash.interval</name>
368 |   <value>1440</value>
369 |   <final>true</final>
370 | </property>
371 | <property>
372 |   <name>hadoop.tmp.dir</name>
373 |   <value>/mnt/tmp/hadoop-\${user.name}</value>
374 |   <final>true</final>
375 | </property>
376 | <property>
377 |   <name>io.file.buffer.size</name>
378 |   <value>65536</value>
379 | </property>
380 | <property>
381 |   <name>mapred.child.java.opts</name>
382 |   <value>$CHILD_OPTS</value>
383 | </property>
384 | <property>
385 |   <name>mapred.child.ulimit</name>
386 |   <value>$CHILD_ULIMIT</value>
387 |   <final>true</final>
388 | </property>
389 | <property>
390 |   <name>mapred.job.tracker</name>
391 |   <value>$HYBRID_JT_HOST:8021</value>
392 | </property>
393 | <property>
394 |   <name>mapred.job.tracker.handler.count</name>
395 |   <value>5</value>
396 |   <final>true</final>
397 | </property>
398 | <property>
399 |   <name>mapred.local.dir</name>
400 |   <value>$MAPRED_LOCAL_DIR</value>
401 |   <final>true</final>
402 | </property>
403 | <property>
404 |   <name>mapred.map.tasks.speculative.execution</name>
405 |   <value>true</value>
406 | </property>
407 | <property>
408 |   <name>mapred.reduce.parallel.copies</name>
409 |   <value>10</value>
410 | </property>
411 | <property>
412 |   <name>mapred.reduce.tasks</name>
413 |   <value>10</value>
414 | </property>
415 | <property>
416 |   <name>mapred.reduce.tasks.speculative.execution</name>
417 |   <value>false</value>
418 | </property>
419 | <property>
420 |   <name>mapred.submit.replication</name>
421 |   <value>10</value>
422 | </property>
423 | <property>
424 |   <name>mapred.system.dir</name>
425 |   <value>/hadoop/system/mapred</value>
426 | </property>
427 | <property>
428 |   <name>mapred.tasktracker.map.tasks.maximum</name>
429 |   <value>$MAX_MAP_TASKS</value>
430 |   <final>true</final>
431 | </property>
432 | <property>
433 |   <name>mapred.tasktracker.reduce.tasks.maximum</name>
434 |   <value>$MAX_REDUCE_TASKS</value>
435 |   <final>true</final>
436 | </property>
437 | <property>
438 |   <name>tasktracker.http.threads</name>
439 |   <value>46</value>
440 |   <final>true</final>
441 | </property>
442 | <property>
443 |   <name>mapred.compress.map.output</name>
444 |   <value>true</value>
445 | </property>
446 | <property>
447 |   <name>mapred.output.compression.type</name>
448 |   <value>BLOCK</value>
449 | </property>
450 | <property>
451 |   <name>hadoop.rpc.socket.factory.class.default</name>
452 |   <value>org.apache.hadoop.net.StandardSocketFactory</value>
453 |   <final>true</final>
454 | </property>
455 | <property>
456 |   <name>hadoop.rpc.socket.factory.class.ClientProtocol</name>
457 |   <value></value>
458 |   <final>true</final>
459 | </property>
460 | <property>
461 |   <name>hadoop.rpc.socket.factory.class.JobSubmissionProtocol</name>
462 |   <value></value>
463 |   <final>true</final>
464 | </property>
465 | <property>
466 |   <name>io.compression.codecs</name>
467 |   <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec</value>
468 | </property>
469 | <property>
470 |   <name>fs.s3.awsAccessKeyId</name>
471 |   <value>$AWS_ACCESS_KEY_ID</value>
472 | </property>
473 | <property>
474 |   <name>fs.s3.awsSecretAccessKey</name>
475 |   <value>$AWS_SECRET_ACCESS_KEY</value>
476 | </property>
477 | <property>
478 |   <name>fs.s3n.awsAccessKeyId</name>
479 |   <value>$AWS_ACCESS_KEY_ID</value>
480 | </property>
481 | <property>
482 |   <name>fs.s3n.awsSecretAccessKey</name>
483 |   <value>$AWS_SECRET_ACCESS_KEY</value>
484 | </property>
485 | </configuration>
486 | EOF
487 | 
488 |   # Keep PID files in a non-temporary directory
489 |   sed -i -e "s|# export HADOOP_PID_DIR=.*|export HADOOP_PID_DIR=/var/run/hadoop|" \
490 |     $HADOOP_CONF_DIR/hadoop-env.sh
491 |   mkdir -p /var/run/hadoop
492 |   chown -R hadoop:hadoop /var/run/hadoop
493 | 
494 |   # Set SSH options within the cluster
495 |   sed -i -e 's|# export HADOOP_SSH_OPTS=.*|export HADOOP_SSH_OPTS="-o StrictHostKeyChecking=no"|' \
496 |     $HADOOP_CONF_DIR/hadoop-env.sh
497 | 
498 |   # Hadoop logs should be on the /mnt partition
499 |   sed -i -e 's|# export HADOOP_LOG_DIR=.*|export HADOOP_LOG_DIR=/var/log/hadoop/logs|' \
500 |     $HADOOP_CONF_DIR/hadoop-env.sh
501 |   rm -rf /var/log/hadoop
502 |   mkdir /mnt/hadoop/logs
503 |   chown hadoop:hadoop /mnt/hadoop/logs
504 |   ln -s /mnt/hadoop/logs /var/log/hadoop
505 |   chown -R hadoop:hadoop /var/log/hadoop
506 | 
507 | }
508 | 
509 | # Sets up small website on cluster.
510 | function setup_web() {
511 | 
512 |   if which dpkg &> /dev/null; then
513 |     apt-get -y install thttpd
514 |     WWW_BASE=/var/www
515 |   elif which rpm &> /dev/null; then
516 |     yum install -y thttpd
517 |     chkconfig --add thttpd
518 |     WWW_BASE=/var/www/thttpd/html
519 |   fi
520 | 
521 |   cat > $WWW_BASE/index.html << END
522 | <html>
523 | <head>
524 | <title>Hadoop EC2 Cluster</title>
525 | </head>
526 | <body>
527 | <h1>Hadoop EC2 Cluster</h1>
528 | To browse the cluster you need to have a proxy configured.
529 | Start the proxy with <tt>hadoop-ec2 proxy &lt;cluster_name&gt;</tt>,
530 | and point your browser to
531 | <a href="http://apache-hadoop-ec2.s3.amazonaws.com/proxy.pac">this Proxy
532 | Auto-Configuration (PAC)</a> file.  To manage multiple proxy configurations,
533 | you may wish to use
534 | <a href="https://addons.mozilla.org/en-US/firefox/addon/2464">FoxyProxy</a>.
535 | <ul>
536 | <li><a href="http://$HYBRID_NN_HOST:50070/">NameNode</a>
537 | <li><a href="http://$HYBRID_JT_HOST:50030/">JobTracker</a>
538 | </ul>
539 | </body>
540 | </html>
541 | END
542 | 
543 |   service thttpd start
544 | 
545 | }
546 | 
547 | function start_namenode() {
548 |   if which dpkg &> /dev/null; then
549 |     AS_HADOOP="su -s /bin/bash - hadoop -c"
550 |   elif which rpm &> /dev/null; then
551 |     AS_HADOOP="/sbin/runuser -s /bin/bash - hadoop -c"
552 |   fi
553 | 
554 |   # Format HDFS
555 |   [ ! -e $FIRST_MOUNT/hadoop/hdfs ] && $AS_HADOOP "$HADOOP_HOME/bin/hadoop namenode -format"
556 | 
557 |   $AS_HADOOP "$HADOOP_HOME/bin/hadoop-daemon.sh start namenode"
558 | 
559 |   $AS_HADOOP "$HADOOP_HOME/bin/hadoop dfsadmin -safemode wait"
560 |   $AS_HADOOP "$HADOOP_HOME/bin/hadoop fs -mkdir /user"
561 |   # The following is questionable, as it allows a user to delete another user
562 |   # It's needed to allow users to create their own user directories
563 |   $AS_HADOOP "$HADOOP_HOME/bin/hadoop fs -chmod +w /user"
564 | 
565 | }
566 | 
567 | function start_daemon() {
568 |   if which dpkg &> /dev/null; then
569 |     AS_HADOOP="su -s /bin/bash - hadoop -c"
570 |   elif which rpm &> /dev/null; then
571 |     AS_HADOOP="/sbin/runuser -s /bin/bash - hadoop -c"
572 |   fi
573 |   $AS_HADOOP "$HADOOP_HOME/bin/hadoop-daemon.sh start $1"
574 | }
575 | 
576 | function install_cassandra() {
577 | 
578 |     curl="curl --retry 3 --silent --show-error --fail"
579 |     if [ ! -z "$CASSANDRA_URL" ]; then
580 |         DEFAULT_CASSANDRA_URL=$CASSANDRA_URL
581 |     fi
582 | 
583 |     cassandra_tar_file=`basename $DEFAULT_CASSANDRA_URL`
584 |     $curl -O $DEFAULT_CASSANDRA_URL
585 |     
586 |     tar zxf $cassandra_tar_file -C /usr/local
587 |     rm -f $cassandra_tar_file
588 | 
589 |     CASSANDRA_HOME_WITH_VERSION=/usr/local/`ls -1 /usr/local | grep cassandra`
590 | 
591 |     echo "export CASSANDRA_HOME=$CASSANDRA_HOME_ALIAS" >> ~root/.bash_profile
592 |     echo 'export PATH=$CASSANDRA_HOME/bin:$PATH' >> ~root/.bash_profile
593 | }
594 | 
595 | function configure_cassandra() {
596 |   if [ -n "$EBS_MAPPINGS" ]; then
597 |     # EBS_MAPPINGS is like "cn,/ebs1,/dev/sdj;cn,/ebs2,/dev/sdk"
598 |     # EBS_MAPPINGS is like "ROLE,MOUNT_POINT,DEVICE;ROLE,MOUNT_POINT,DEVICE"
599 |     for mapping in $(echo "$EBS_MAPPINGS" | tr ";" "\n"); do
600 |       role=`echo $mapping | cut -d, -f1`
601 |       mount=`echo $mapping | cut -d, -f2`
602 |       device=`echo $mapping | cut -d, -f3`
603 |       wait_for_mount $mount $device
604 |     done
605 |   fi
606 | 
607 |     if [ -f "$CASSANDRA_HOME_WITH_VERSION/conf/cassandra-env.sh" ]
608 |     then
609 |         # for cassandra 0.7.x we need to set the MAX_HEAP_SIZE env
610 |         # variable so that it can be used in cassandra-env.sh on
611 |         # startup
612 |         if [ -z "$MAX_HEAP_SIZE" ]
613 |         then
614 |             INSTANCE_TYPE=`wget -q -O - http://169.254.169.254/latest/meta-data/instance-type`
615 |             case $INSTANCE_TYPE in
616 |             m1.xlarge|m2.xlarge)
617 |                 MAX_HEAP_SIZE="10G"
618 |                 ;;
619 |             m1.large|c1.xlarge)
620 |                 MAX_HEAP_SIZE="5G"
621 |                 ;;
622 |             *)
623 |                 # Don't set it and let cassandra-env figure it out
624 |                 ;;
625 |             esac
626 | 
627 |             # write it to the profile
628 |             echo "export MAX_HEAP_SIZE=$MAX_HEAP_SIZE" >> ~root/.bash_profile
629 |             echo "export MAX_HEAP_SIZE=$MAX_HEAP_SIZE" >> ~root/.bashrc
630 |         fi
631 |     else
632 |         write_cassandra_in_sh_file
633 |     fi
634 | }
635 | 
636 | function write_cassandra_in_sh_file {
637 |   # for cassandra 0.6.x memory settings
638 | 
639 |   # configure the cassandra.in.sh script based on instance type
640 |   INSTANCE_TYPE=`wget -q -O - http://169.254.169.254/latest/meta-data/instance-type`
641 |   SETTINGS_FILE=$CASSANDRA_HOME_WITH_VERSION/bin/cassandra.in.sh
642 | 
643 |   cat > $SETTINGS_FILE <<EOF
644 | # Licensed to the Apache Software Foundation (ASF) under one
645 | # or more contributor license agreements.  See the NOTICE file
646 | # distributed with this work for additional information
647 | # regarding copyright ownership.  The ASF licenses this file
648 | # to you under the Apache License, Version 2.0 (the
649 | # "License"); you may not use this file except in compliance
650 | # with the License.  You may obtain a copy of the License at
651 | #
652 | #     http://www.apache.org/licenses/LICENSE-2.0
653 | #
654 | # Unless required by applicable law or agreed to in writing, software
655 | # distributed under the License is distributed on an "AS IS" BASIS,
656 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
657 | # See the License for the specific language governing permissions and
658 | # limitations under the License.
659 | 
660 | cassandra_home=$CASSANDRA_HOME_ALIAS
661 | 
662 | # The directory where Cassandra's configs live (required)
663 | CASSANDRA_CONF=\$cassandra_home/conf
664 | 
665 | # This can be the path to a jar file, or a directory containing the 
666 | # compiled classes. NOTE: This isn't needed by the startup script,
667 | # it's just used here in constructing the classpath.
668 | cassandra_bin=\$cassandra_home/build/classes
669 | #cassandra_bin=\$cassandra_home/build/cassandra.jar
670 | 
671 | # JAVA_HOME can optionally be set here
672 | #JAVA_HOME=/usr/local/jdk6
673 | 
674 | # The java classpath (required)
675 | CLASSPATH=\$CASSANDRA_CONF:\$cassandra_bin
676 | 
677 | for jar in \$cassandra_home/lib/*.jar; do
678 |     CLASSPATH=\$CLASSPATH:\$jar
679 | done
680 | 
681 | EOF
682 | 
683 |   case $INSTANCE_TYPE in
684 |   m1.xlarge|m2.xlarge)
685 |     cat >> $SETTINGS_FILE <<EOF
686 | # Arguments to pass to the JVM
687 | JVM_OPTS="-ea -Xms10G -Xmx10G -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=1 -XX:+HeapDumpOnOutOfMemoryError -Dcom.sun.management.jmxremote.port=8080 -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false"
688 | EOF
689 |     ;;
690 |   m1.large|c1.xlarge)
691 |     cat >> $SETTINGS_FILE <<EOF
692 | # Arguments to pass to the JVM
693 | JVM_OPTS="-ea -Xms5G -Xmx5G -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=1 -XX:+HeapDumpOnOutOfMemoryError -Dcom.sun.management.jmxremote.port=8080 -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false"
694 | EOF
695 |     ;;
696 |   *)
697 |     cat >> $SETTINGS_FILE <<EOF
698 | # Arguments to pass to the JVM
699 | JVM_OPTS="-ea -Xms256M -Xmx1G -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=1 -XX:+HeapDumpOnOutOfMemoryError -Dcom.sun.management.jmxremote.port=8080 -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false"
700 | EOF
701 |     ;;
702 |   esac
703 | }
704 | 
705 | function finish_cassandra {
706 |     # symlink the actual cassandra directory to a more standard one (without version)
707 |     # this is very important so the cluster can be configured more easily after the 
708 |     # machines start and services started/stopped
709 |     #
710 |     # NOTE: Stratus also looks for this aliased directory to know when cassandra
711 |     # is ready to be started
712 |     ln -s $CASSANDRA_HOME_WITH_VERSION $CASSANDRA_HOME_ALIAS
713 | }
714 | 
715 | register_auto_shutdown
716 | install_user_packages
717 | install_hadoop
718 | configure_hadoop
719 | 
720 | for role in $(echo "$ROLES" | tr "," "\n"); do
721 |   case $role in
722 |   hybrid_nn)
723 |     setup_web
724 |     start_namenode
725 |     ;;
726 |   hybrid_snn)
727 |     start_daemon secondarynamenode
728 |     ;;
729 |   hybrid_jt)
730 |     start_daemon jobtracker
731 |     ;;
732 |   hybrid_dn)
733 |     start_daemon datanode
734 |     install_cassandra
735 |     configure_cassandra
736 |     finish_cassandra
737 |     ;;
738 |   hybrid_tt)
739 |     start_daemon tasktracker
740 |     ;;
741 |   esac
742 | done
743 | 
744 | 


--------------------------------------------------------------------------------
/example_scripts/hadoop-ec2-init-remote.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -x
  2 | 
  3 | # Licensed to the Apache Software Foundation (ASF) under one or more
  4 | # contributor license agreements.  See the NOTICE file distributed with
  5 | # this work for additional information regarding copyright ownership.
  6 | # The ASF licenses this file to You under the Apache License, Version 2.0
  7 | # (the "License"); you may not use this file except in compliance with
  8 | # the License.  You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | ################################################################################
 19 | # Script that is run on each EC2 instance on boot. It is passed in the EC2 user
 20 | # data, so should not exceed 16K in size after gzip compression.
 21 | #
 22 | # This script is executed by /etc/init.d/ec2-run-user-data, and output is
 23 | # logged to /var/log/messages.
 24 | ################################################################################
 25 | 
 26 | ################################################################################
 27 | # Initialize variables
 28 | ################################################################################
 29 | 
 30 | # Substitute environment variables passed by the client
 31 | export %ENV%
 32 | 
 33 | echo "export %ENV%" >> ~root/.bash_profile
 34 | echo "export %ENV%" >> ~root/.bashrc
 35 | 
 36 | HADOOP_VERSION=${HADOOP_VERSION:-0.20.2+737}
 37 | HADOOP_HOME=/usr/local/hadoop-$HADOOP_VERSION
 38 | HADOOP_CONF_DIR=$HADOOP_HOME/conf
 39 | 
 40 | PIG_VERSION=${PIG_VERSION:-0.7.0}
 41 | PIG_HOME=/usr/local/pig-$PIG_VERSION
 42 | PIG_CONF_DIR=$PIG_HOME/conf
 43 | 
 44 | SELF_HOST=`wget -q -O - http://169.254.169.254/latest/meta-data/public-hostname`
 45 | for role in $(echo "$ROLES" | tr "," "\n"); do
 46 |   case $role in
 47 |   nn)
 48 |     NN_HOST=$SELF_HOST
 49 |     ;;
 50 |   jt)
 51 |     JT_HOST=$SELF_HOST
 52 |     ;;
 53 |   esac
 54 | done
 55 | 
 56 | function register_auto_shutdown() {
 57 |   if [ ! -z "$AUTO_SHUTDOWN" ]; then
 58 |     shutdown -h +$AUTO_SHUTDOWN >/dev/null &
 59 |   fi
 60 | }
 61 | 
 62 | # Install a list of packages on debian or redhat as appropriate
 63 | function install_packages() {
 64 |   if which dpkg &> /dev/null; then
 65 |     apt-get update
 66 |     apt-get -y install $@
 67 |   elif which rpm &> /dev/null; then
 68 |     yum install -y $@
 69 |   else
 70 |     echo "No package manager found."
 71 |   fi
 72 | }
 73 | 
 74 | # Install any user packages specified in the USER_PACKAGES environment variable
 75 | function install_user_packages() {
 76 |   if [ ! -z "$USER_PACKAGES" ]; then
 77 |     install_packages $USER_PACKAGES
 78 |   fi
 79 | }
 80 | 
 81 | function install_yourkit() {
 82 | 	mkdir /mnt/yjp
 83 | 	YOURKIT_URL="http://www.yourkit.com/download/yjp-9.0.7-linux.tar.bz2"
 84 | 	curl="curl --retry 3 --silent --show-error --fail"
 85 | 	$curl -O $YOURKIT_URL
 86 | 	yourkit_tar_file=`basename $YOURKIT_URL`
 87 |     tar xjf $yourkit_tar_file -C /mnt/yjp
 88 |     rm -f $yourkit_tar_file
 89 |     chown -R hadoop /mnt/yjp
 90 | 	chgrp -R hadoop /mnt/yjp
 91 | }
 92 | 
 93 | function install_hadoop() {
 94 |   useradd hadoop
 95 | 
 96 |   hadoop_tar_url=http://archive.cloudera.com/cdh/3/hadoop-$HADOOP_VERSION.tar.gz
 97 |   hadoop_tar_file=`basename $hadoop_tar_url`
 98 | 
 99 |   curl="curl --retry 3 --silent --show-error --fail"
100 |   $curl -O $hadoop_tar_url
101 | 
102 |   if [ ! -e $hadoop_tar_file ]; then
103 |     echo "Failed to download $hadoop_tar_url. Aborting."
104 |     exit 1
105 |   fi
106 | 
107 |   tar zxf $hadoop_tar_file -C /usr/local
108 |   rm -f $hadoop_tar_file $hadoop_tar_md5_file
109 | 
110 |   echo "export HADOOP_HOME=$HADOOP_HOME" >> ~root/.bashrc
111 |   echo 'export PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$PATH' >> ~root/.bashrc
112 | }
113 | 
114 | function install_pig()
115 | {
116 |   pig_tar_url=http://mirror.cloudera.com/apache/hadoop/pig/pig-$PIG_VERSION/pig-$PIG_VERSION.tar.gz
117 |   pig_tar_file=`basename $pig_tar_url`
118 | 
119 |   curl="curl --retry 3 --silent --show-error --fail"
120 |   for i in `seq 1 3`;
121 |   do
122 |     $curl -O $pig_tar_url
123 |   done
124 | 
125 |   if [ ! -e $pig_tar_file ]; then
126 |     echo "Failed to download $pig_tar_url. Pig will not be installed."
127 |   else
128 |     tar zxf $pig_tar_file -C /usr/local
129 |     rm -f $pig_tar_file
130 |  
131 |     if [ ! -e $HADOOP_CONF_DIR ]; then
132 |       echo "Hadoop must be installed.  Aborting."
133 |       exit 1
134 |     fi
135 | 
136 |     cp $HADOOP_CONF_DIR/*.xml $PIG_CONF_DIR/
137 | 
138 |     echo "export PIG_HOME=$PIG_HOME" >> ~root/.bashrc
139 |     echo 'export PATH=$JAVA_HOME/bin:$PIG_HOME/bin:$PATH' >> ~root/.bashrc
140 |  fi
141 | }
142 | 
143 | function prep_disk() {
144 |   mount=$1
145 |   device=$2
146 |   automount=${3:-false}
147 | 
148 |   echo "warning: ERASING CONTENTS OF $device"
149 |   mkfs.xfs -f $device
150 |   if [ ! -e $mount ]; then
151 |     mkdir $mount
152 |   fi
153 |   mount -o defaults,noatime $device $mount
154 |   if $automount ; then
155 |     echo "$device $mount xfs defaults,noatime 0 0" >> /etc/fstab
156 |   fi
157 | }
158 | 
159 | function wait_for_mount {
160 |   mount=$1
161 |   device=$2
162 | 
163 |   mkdir $mount
164 | 
165 |   i=1
166 |   echo "Attempting to mount $device"
167 |   while true ; do
168 |     sleep 10
169 |     echo -n "$i "
170 |     i=$[$i+1]
171 |     mount -o defaults,noatime $device $mount || continue
172 |     echo " Mounted."
173 |     break;
174 |   done
175 | }
176 | 
177 | function make_hadoop_dirs {
178 |   for mount in "$@"; do
179 |     if [ ! -e $mount/hadoop ]; then
180 |       mkdir -p $mount/hadoop
181 |       chown hadoop:hadoop $mount/hadoop
182 |     fi
183 |   done
184 | }
185 | 
186 | # Configure Hadoop by setting up disks and site file
187 | function configure_hadoop() {
188 | 
189 |   install_packages xfsprogs # needed for XFS
190 | 
191 |   INSTANCE_TYPE=`wget -q -O - http://169.254.169.254/latest/meta-data/instance-type`
192 | 
193 |   if [ -n "$EBS_MAPPINGS" ]; then
194 |     # EBS_MAPPINGS is like "nn,/ebs1,/dev/sdj;dn,/ebs2,/dev/sdk"
195 |     # EBS_MAPPINGS is like "ROLE,MOUNT_POINT,DEVICE;ROLE,MOUNT_POINT,DEVICE"
196 |     DFS_NAME_DIR=''
197 |     FS_CHECKPOINT_DIR=''
198 |     DFS_DATA_DIR=''
199 |     for mapping in $(echo "$EBS_MAPPINGS" | tr ";" "\n"); do
200 |       role=`echo $mapping | cut -d, -f1`
201 |       mount=`echo $mapping | cut -d, -f2`
202 |       device=`echo $mapping | cut -d, -f3`
203 |       wait_for_mount $mount $device
204 |       DFS_NAME_DIR=${DFS_NAME_DIR},"$mount/hadoop/hdfs/name"
205 |       FS_CHECKPOINT_DIR=${FS_CHECKPOINT_DIR},"$mount/hadoop/hdfs/secondary"
206 |       DFS_DATA_DIR=${DFS_DATA_DIR},"$mount/hadoop/hdfs/data"
207 |       FIRST_MOUNT=${FIRST_MOUNT-$mount}
208 |       make_hadoop_dirs $mount
209 |     done
210 |     # Remove leading commas
211 |     DFS_NAME_DIR=${DFS_NAME_DIR#?}
212 |     FS_CHECKPOINT_DIR=${FS_CHECKPOINT_DIR#?}
213 |     DFS_DATA_DIR=${DFS_DATA_DIR#?}
214 | 
215 |     DFS_REPLICATION=3 # EBS is internally replicated, but we also use HDFS replication for safety
216 |   else
217 |     case $INSTANCE_TYPE in
218 |     m1.xlarge|c1.xlarge)
219 |       DFS_NAME_DIR=/mnt/hadoop/hdfs/name,/mnt2/hadoop/hdfs/name
220 |       FS_CHECKPOINT_DIR=/mnt/hadoop/hdfs/secondary,/mnt2/hadoop/hdfs/secondary
221 |       DFS_DATA_DIR=/mnt/hadoop/hdfs/data,/mnt2/hadoop/hdfs/data,/mnt3/hadoop/hdfs/data,/mnt4/hadoop/hdfs/data
222 |       ;;
223 |     m1.large)
224 |       DFS_NAME_DIR=/mnt/hadoop/hdfs/name,/mnt2/hadoop/hdfs/name
225 |       FS_CHECKPOINT_DIR=/mnt/hadoop/hdfs/secondary,/mnt2/hadoop/hdfs/secondary
226 |       DFS_DATA_DIR=/mnt/hadoop/hdfs/data,/mnt2/hadoop/hdfs/data
227 |       ;;
228 |     *)
229 |       # "m1.small" or "c1.medium"
230 |       DFS_NAME_DIR=/mnt/hadoop/hdfs/name
231 |       FS_CHECKPOINT_DIR=/mnt/hadoop/hdfs/secondary
232 |       DFS_DATA_DIR=/mnt/hadoop/hdfs/data
233 |       ;;
234 |     esac
235 |     FIRST_MOUNT=/mnt
236 |     DFS_REPLICATION=3
237 |   fi
238 | 
239 |   case $INSTANCE_TYPE in
240 |   m1.xlarge|c1.xlarge)
241 |     prep_disk /mnt2 /dev/sdc true &
242 |     disk2_pid=$!
243 |     prep_disk /mnt3 /dev/sdd true &
244 |     disk3_pid=$!
245 |     prep_disk /mnt4 /dev/sde true &
246 |     disk4_pid=$!
247 |     wait $disk2_pid $disk3_pid $disk4_pid
248 |     MAPRED_LOCAL_DIR=/mnt/hadoop/mapred/local,/mnt2/hadoop/mapred/local,/mnt3/hadoop/mapred/local,/mnt4/hadoop/mapred/local
249 |     MAX_MAP_TASKS=4
250 |     MAX_REDUCE_TASKS=2
251 |     CHILD_OPTS=-Xmx2000m
252 |     CHILD_ULIMIT=4000000
253 |     ;;
254 |   m1.large)
255 |     prep_disk /mnt2 /dev/sdc true
256 |     MAPRED_LOCAL_DIR=/mnt/hadoop/mapred/local,/mnt2/hadoop/mapred/local
257 |     MAX_MAP_TASKS=2
258 |     MAX_REDUCE_TASKS=1
259 |     CHILD_OPTS=-Xmx2000m
260 |     CHILD_ULIMIT=4000000
261 |     ;;
262 |   c1.medium)
263 |     MAPRED_LOCAL_DIR=/mnt/hadoop/mapred/local
264 |     MAX_MAP_TASKS=4
265 |     MAX_REDUCE_TASKS=2
266 |     CHILD_OPTS=-Xmx550m
267 |     CHILD_ULIMIT=1126400
268 |     ;;
269 |   *)
270 |     # "m1.small"
271 |     MAPRED_LOCAL_DIR=/mnt/hadoop/mapred/local
272 |     MAX_MAP_TASKS=2
273 |     MAX_REDUCE_TASKS=1
274 |     CHILD_OPTS=-Xmx550m
275 |     CHILD_ULIMIT=1126400
276 |     ;;
277 |   esac
278 | 
279 |   make_hadoop_dirs `ls -d /mnt*`
280 | 
281 |   # Create tmp directory
282 |   mkdir /mnt/tmp
283 |   chmod a+rwxt /mnt/tmp
284 |   
285 |   mkdir /etc/hadoop
286 |   ln -s $HADOOP_CONF_DIR /etc/hadoop/conf
287 | 
288 |   ##############################################################################
289 |   # Modify this section to customize your Hadoop cluster.
290 |   ##############################################################################
291 |   cat > $HADOOP_CONF_DIR/hadoop-site.xml <<EOF
292 | <?xml version="1.0"?>
293 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
294 | <configuration>
295 | <property>
296 |   <name>dfs.block.size</name>
297 |   <value>134217728</value>
298 |   <final>true</final>
299 | </property>
300 | <property>
301 |   <name>dfs.data.dir</name>
302 |   <value>$DFS_DATA_DIR</value>
303 |   <final>true</final>
304 | </property>
305 | <property>
306 |   <name>dfs.datanode.du.reserved</name>
307 |   <value>1073741824</value>
308 |   <final>true</final>
309 | </property>
310 | <property>
311 |   <name>dfs.datanode.handler.count</name>
312 |   <value>3</value>
313 |   <final>true</final>
314 | </property>
315 | <!--property>
316 |   <name>dfs.hosts</name>
317 |   <value>$HADOOP_CONF_DIR/dfs.hosts</value>
318 |   <final>true</final>
319 | </property-->
320 | <property>
321 |   <name>dfs.hosts.exclude</name>
322 |   <value>$HADOOP_CONF_DIR/exclude</value>
323 |   <final>true</final>
324 | </property>
325 | <property>
326 |   <name>mapred.hosts.exclude</name>
327 |   <value>$HADOOP_CONF_DIR/exclude</value>
328 |   <final>true</final>
329 | </property>
330 | <property>
331 |   <name>dfs.name.dir</name>
332 |   <value>$DFS_NAME_DIR</value>
333 |   <final>true</final>
334 | </property>
335 | <property>
336 |   <name>dfs.namenode.handler.count</name>
337 |   <value>5</value>
338 |   <final>true</final>
339 | </property>
340 | <property>
341 |   <name>dfs.permissions</name>
342 |   <value>true</value>
343 |   <final>true</final>
344 | </property>
345 | <property>
346 |   <name>dfs.replication</name>
347 |   <value>$DFS_REPLICATION</value>
348 | </property>
349 | <property>
350 |   <name>fs.checkpoint.dir</name>
351 |   <value>$FS_CHECKPOINT_DIR</value>
352 |   <final>true</final>
353 | </property>
354 | <property>
355 |   <name>fs.default.name</name>
356 |   <value>hdfs://$NN_HOST:8020/</value>
357 | </property>
358 | <property>
359 |   <name>fs.trash.interval</name>
360 |   <value>1440</value>
361 |   <final>true</final>
362 | </property>
363 | <property>
364 |   <name>hadoop.tmp.dir</name>
365 |   <value>/mnt/tmp/hadoop-\${user.name}</value>
366 |   <final>true</final>
367 | </property>
368 | <property>
369 |   <name>io.file.buffer.size</name>
370 |   <value>65536</value>
371 | </property>
372 | <property>
373 |   <name>mapred.child.java.opts</name>
374 |   <value>$CHILD_OPTS</value>
375 | </property>
376 | <property>
377 |   <name>mapred.child.ulimit</name>
378 |   <value>$CHILD_ULIMIT</value>
379 |   <final>true</final>
380 | </property>
381 | <property>
382 |   <name>mapred.job.tracker</name>
383 |   <value>$JT_HOST:8021</value>
384 | </property>
385 | <property>
386 |   <name>mapred.job.tracker.handler.count</name>
387 |   <value>5</value>
388 |   <final>true</final>
389 | </property>
390 | <property>
391 |   <name>mapred.local.dir</name>
392 |   <value>$MAPRED_LOCAL_DIR</value>
393 |   <final>true</final>
394 | </property>
395 | <property>
396 |   <name>mapred.map.tasks.speculative.execution</name>
397 |   <value>true</value>
398 | </property>
399 | <property>
400 |   <name>mapred.reduce.parallel.copies</name>
401 |   <value>10</value>
402 | </property>
403 | <property>
404 |   <name>mapred.reduce.tasks</name>
405 |   <value>$CLUSTER_SIZE</value>
406 | </property>
407 | <property>
408 |   <name>mapred.reduce.tasks.speculative.execution</name>
409 |   <value>false</value>
410 | </property>
411 | <property>
412 |   <name>mapred.submit.replication</name>
413 |   <value>10</value>
414 | </property>
415 | <property>
416 |   <name>mapred.system.dir</name>
417 |   <value>/hadoop/system/mapred</value>
418 | </property>
419 | <property>
420 |   <name>mapred.tasktracker.map.tasks.maximum</name>
421 |   <value>$MAX_MAP_TASKS</value>
422 |   <final>true</final>
423 | </property>
424 | <property>
425 |   <name>mapred.tasktracker.reduce.tasks.maximum</name>
426 |   <value>$MAX_REDUCE_TASKS</value>
427 |   <final>true</final>
428 | </property>
429 | <property>
430 |   <name>tasktracker.http.threads</name>
431 |   <value>46</value>
432 |   <final>true</final>
433 | </property>
434 | <property>
435 |   <name>mapred.compress.map.output</name>
436 |   <value>true</value>
437 | </property>
438 | <property>
439 |   <name>mapred.output.compression.type</name>
440 |   <value>BLOCK</value>
441 | </property>
442 | <property>
443 |   <name>hadoop.rpc.socket.factory.class.default</name>
444 |   <value>org.apache.hadoop.net.StandardSocketFactory</value>
445 |   <final>true</final>
446 | </property>
447 | <property>
448 |   <name>hadoop.rpc.socket.factory.class.ClientProtocol</name>
449 |   <value></value>
450 |   <final>true</final>
451 | </property>
452 | <property>
453 |   <name>hadoop.rpc.socket.factory.class.JobSubmissionProtocol</name>
454 |   <value></value>
455 |   <final>true</final>
456 | </property>
457 | <property>
458 |   <name>io.compression.codecs</name>
459 |   <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec</value>
460 | </property>
461 | <property>
462 |   <name>fs.s3.awsAccessKeyId</name>
463 |   <value>$AWS_ACCESS_KEY_ID</value>
464 | </property>
465 | <property>
466 |   <name>fs.s3.awsSecretAccessKey</name>
467 |   <value>$AWS_SECRET_ACCESS_KEY</value>
468 | </property>
469 | <property>
470 |   <name>fs.s3n.awsAccessKeyId</name>
471 |   <value>$AWS_ACCESS_KEY_ID</value>
472 | </property>
473 | <property>
474 |   <name>fs.s3n.awsSecretAccessKey</name>
475 |   <value>$AWS_SECRET_ACCESS_KEY</value>
476 | </property>
477 | </configuration>
478 | EOF
479 | 
480 |   # Keep PID files in a non-temporary directory
481 |   sed -i -e "s|# export HADOOP_PID_DIR=.*|export HADOOP_PID_DIR=/var/run/hadoop|" \
482 |     $HADOOP_CONF_DIR/hadoop-env.sh
483 |   mkdir -p /var/run/hadoop
484 |   chown -R hadoop:hadoop /var/run/hadoop
485 | 
486 |   # Set SSH options within the cluster
487 |   sed -i -e 's|# export HADOOP_SSH_OPTS=.*|export HADOOP_SSH_OPTS="-o StrictHostKeyChecking=no"|' \
488 |     $HADOOP_CONF_DIR/hadoop-env.sh
489 | 
490 |   # Hadoop logs should be on the /mnt partition
491 |   sed -i -e 's|# export HADOOP_LOG_DIR=.*|export HADOOP_LOG_DIR=/var/log/hadoop/logs|' \
492 |     $HADOOP_CONF_DIR/hadoop-env.sh
493 |   rm -rf /var/log/hadoop
494 |   mkdir /mnt/hadoop/logs
495 |   chown hadoop:hadoop /mnt/hadoop/logs
496 |   ln -s /mnt/hadoop/logs /var/log/hadoop
497 |   chown -R hadoop:hadoop /var/log/hadoop
498 | 
499 | }
500 | 
501 | # Sets up small website on cluster.
502 | function setup_web() {
503 | 
504 |   if which dpkg &> /dev/null; then
505 |     apt-get -y install thttpd
506 |     WWW_BASE=/var/www
507 |   elif which rpm &> /dev/null; then
508 |     yum install -y thttpd
509 |     chkconfig --add thttpd
510 |     WWW_BASE=/var/www/thttpd/html
511 |   fi
512 | 
513 |   cat > $WWW_BASE/index.html << END
514 | <html>
515 | <head>
516 | <title>Hadoop EC2 Cluster</title>
517 | </head>
518 | <body>
519 | <h1>Hadoop EC2 Cluster</h1>
520 | To browse the cluster you need to have a proxy configured.
521 | Start the proxy with <tt>hadoop-ec2 proxy &lt;cluster_name&gt;</tt>,
522 | and point your browser to
523 | <a href="http://apache-hadoop-ec2.s3.amazonaws.com/proxy.pac">this Proxy
524 | Auto-Configuration (PAC)</a> file.  To manage multiple proxy configurations,
525 | you may wish to use
526 | <a href="https://addons.mozilla.org/en-US/firefox/addon/2464">FoxyProxy</a>.
527 | <ul>
528 | <li><a href="http://$NN_HOST:50070/">NameNode</a>
529 | <li><a href="http://$JT_HOST:50030/">JobTracker</a>
530 | </ul>
531 | </body>
532 | </html>
533 | END
534 | 
535 |   service thttpd start
536 | 
537 | }
538 | 
539 | function start_namenode() {
540 |   if which dpkg &> /dev/null; then
541 |     AS_HADOOP="su -s /bin/bash - hadoop -c"
542 |   elif which rpm &> /dev/null; then
543 |     AS_HADOOP="/sbin/runuser -s /bin/bash - hadoop -c"
544 |   fi
545 | 
546 |   # Format HDFS
547 |   [ ! -e $FIRST_MOUNT/hadoop/hdfs ] && $AS_HADOOP "$HADOOP_HOME/bin/hadoop namenode -format"
548 | 
549 |   $AS_HADOOP "$HADOOP_HOME/bin/hadoop-daemon.sh start namenode"
550 | 
551 |   $AS_HADOOP "$HADOOP_HOME/bin/hadoop dfsadmin -safemode wait"
552 |   $AS_HADOOP "$HADOOP_HOME/bin/hadoop fs -mkdir /user"
553 |   # The following is questionable, as it allows a user to delete another user
554 |   # It's needed to allow users to create their own user directories
555 |   $AS_HADOOP "$HADOOP_HOME/bin/hadoop fs -chmod +w /user"
556 | 
557 | }
558 | 
559 | function start_daemon() {
560 |   if which dpkg &> /dev/null; then
561 |     AS_HADOOP="su -s /bin/bash - hadoop -c"
562 |   elif which rpm &> /dev/null; then
563 |     AS_HADOOP="/sbin/runuser -s /bin/bash - hadoop -c"
564 |   fi
565 |   $AS_HADOOP "$HADOOP_HOME/bin/hadoop-daemon.sh start $1"
566 | }
567 | 
568 | register_auto_shutdown
569 | install_user_packages
570 | install_hadoop
571 | configure_hadoop
572 | install_pig
573 | 
574 | for role in $(echo "$ROLES" | tr "," "\n"); do
575 |   case $role in
576 |   nn)
577 |     setup_web
578 |     start_namenode
579 |     ;;
580 |   snn)
581 |     start_daemon secondarynamenode
582 |     ;;
583 |   jt)
584 |     start_daemon jobtracker
585 |     ;;
586 |   dn)
587 |     start_daemon datanode
588 |     ;;
589 |   tt)
590 |     start_daemon tasktracker
591 |     if [ ! -z "$INSTALL_PROFILER" ]; then
592 |        install_yourkit
593 |     fi
594 |     ;;
595 |   esac
596 | done
597 | 
598 | 


--------------------------------------------------------------------------------
/example_scripts/hbase-ec2-init-remote.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -x
  2 | 
  3 | # Licensed to the Apache Software Foundation (ASF) under one or more
  4 | # contributor license agreements.  See the NOTICE file distributed with
  5 | # this work for additional information regarding copyright ownership.
  6 | # The ASF licenses this file to You under the Apache License, Version 2.0
  7 | # (the "License"); you may not use this file except in compliance with
  8 | # the License.  You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | ################################################################################
 19 | # Script that is run on each EC2 instance on boot. It is passed in the EC2 user
 20 | # data, so should not exceed 16K in size after gzip compression.
 21 | #
 22 | # This script is executed by /etc/init.d/ec2-run-user-data, and output is
 23 | # logged to /var/log/messages.
 24 | #
 25 | # This script will set up a Hadoop/HBase cluster.  Zookeeper is
 26 | # installed and launched on the namenode/jobtracker/master node.  This is a 
 27 | # configuration that is suitable for small clusters or for testing, but not for
 28 | # most production environments.
 29 | #
 30 | # Since the regionservers all need the private dns name of the zookeeper
 31 | # machine, that machine must be started before the regionservers.  By default, 
 32 | # a zookeeper is running on the same machine as the master, so you will need to
 33 | # first launch the master node, then the slaves:
 34 | #
 35 | #   stratus exec my-cluster launch-master
 36 | #   stratus exec my-cluster launch-slaves 10
 37 | #
 38 | # 
 39 | ################################################################################
 40 | 
 41 | ################################################################################
 42 | # Initialize variables
 43 | ################################################################################
 44 | 
 45 | # Substitute environment variables passed by the client
 46 | export %ENV%
 47 | 
 48 | echo "export %ENV%" >> ~root/.bash_profile
 49 | #for some reason, the .bash_profile in some distros does not source .bashrc
 50 | cat >> ~root/.bash_profile <<EOF
 51 | if [ -f ~/.bashrc ]; then
 52 |    source ~/.bashrc
 53 | fi
 54 | EOF
 55 | echo "export %ENV%" >> ~root/.bashrc
 56 | 
 57 | # up ulimits if necessary
 58 | if [ `ulimit -n` -lt 128000 ]; then 
 59 | 	ulimit -n 128000
 60 | fi
 61 | 
 62 | HADOOP_VERSION=${HADOOP_VERSION:-0.20.2-cdh3u0}
 63 | HADOOP_HOME=/usr/local/hadoop-$HADOOP_VERSION
 64 | HADOOP_CONF_DIR=$HADOOP_HOME/conf
 65 | 
 66 | HBASE_VERSION=${HBASE_VERSION:-0.90.1-cdh3u0}
 67 | HBASE_HOME=/usr/local/hbase-$HBASE_VERSION
 68 | HBASE_CONF_DIR=$HBASE_HOME/conf
 69 | 
 70 | ZK_VERSION=${ZK_VERSION:-3.3.3-cdh3u0}
 71 | ZK_HOME=/usr/local/zookeeper-$ZK_VERSION
 72 | ZK_CONF_DIR=$ZK_HOME/conf
 73 | 
 74 | PIG_VERSION=${PIG_VERSION:-pig-0.8.0-cdh3u0}
 75 | PIG_HOME=/usr/local/pig-$PIG_VERSION
 76 | PIG_CONF_DIR=$PIG_HOME/conf
 77 | 
 78 | #HDFS settings to support HBase
 79 | DFS_DATANODE_HANDLER_COUNT=10
 80 | DFS_DATANODE_MAX_XCIEVERS=10000
 81 | #end of HDFS settings
 82 | 
 83 | SELF_HOST=`wget -q -O - http://169.254.169.254/latest/meta-data/public-hostname`
 84 | for role in $(echo "$ROLES" | tr "," "\n"); do
 85 |   case $role in
 86 |   nn)
 87 |     NN_HOST=$SELF_HOST
 88 |     # By default the HBase master and Zookeeper run on the Namenode host
 89 |     # Zookeeper uses the private IP address of the namenode
 90 |     ZOOKEEPER_QUORUM=`echo $HOSTNAME`
 91 |     ;;
 92 |   jt)
 93 |     JT_HOST=$SELF_HOST
 94 |     ;;
 95 |   esac
 96 | done
 97 | 
 98 | # Set up the macro that we will use to execute commands as "hadoop"
 99 | if which dpkg &> /dev/null; then
100 |   AS_HADOOP="su -s /bin/bash - hadoop -c"
101 | elif which rpm &> /dev/null; then
102 |   AS_HADOOP="/sbin/runuser -s /bin/bash - hadoop -c"
103 | fi
104 | 
105 | function register_auto_shutdown() {
106 |   if [ ! -z "$AUTO_SHUTDOWN" ]; then
107 |     shutdown -h +$AUTO_SHUTDOWN >/dev/null &
108 |   fi
109 | }
110 | 
111 | # Install a list of packages on debian or redhat as appropriate
112 | function install_packages() {
113 |   if which dpkg &> /dev/null; then
114 |     apt-get update
115 |     apt-get -y install $@
116 |   elif which rpm &> /dev/null; then
117 |     yum install -y $@
118 |   else
119 |     echo "No package manager found."
120 |   fi
121 | }
122 | 
123 | # Install any user packages specified in the USER_PACKAGES environment variable
124 | function install_user_packages() {
125 |   if [ ! -z "$USER_PACKAGES" ]; then
126 |     install_packages $USER_PACKAGES
127 |   fi
128 | }
129 | 
130 | function install_yourkit() {
131 |     mkdir /mnt/yjp
132 |     YOURKIT_URL="http://www.yourkit.com/download/yjp-9.0.7-linux.tar.bz2"
133 |     curl="curl --retry 3 --silent --show-error --fail"
134 |     $curl -O $YOURKIT_URL
135 |     yourkit_tar_file=`basename $YOURKIT_URL`
136 |     tar xjf $yourkit_tar_file -C /mnt/yjp
137 |     rm -f $yourkit_tar_file
138 |     chown -R hadoop /mnt/yjp
139 | 	chgrp -R hadoop /mnt/yjp
140 | }
141 | 
142 | function install_hadoop() {
143 |   #The EBS volumes are already set up with hadoop:hadoop equal to 500:500
144 |   if which dpkg &> /dev/null; then
145 |     addgroup hadoop --gid 500
146 |     adduser --disabled-login --ingroup hadoop --gecos GECOS --uid 500 hadoop
147 |   else
148 |      groupadd hadoop -g 500
149 |      useradd hadoop -u 500 -g 500
150 |   fi
151 | 
152 |   
153 |   hadoop_tar_url=http://archive.cloudera.com/cdh/3/hadoop-$HADOOP_VERSION.tar.gz
154 |   hadoop_tar_file=`basename $hadoop_tar_url`
155 | 
156 |   curl="curl --retry 3 --silent --show-error --fail"
157 |   $curl -O $hadoop_tar_url
158 | 
159 |   if [ ! -e $hadoop_tar_file ]; then
160 |     echo "Failed to download $hadoop_tar_url. Aborting."
161 |     exit 1
162 |   fi
163 | 
164 |   tar zxf $hadoop_tar_file -C /usr/local
165 |   cp $HADOOP_HOME/contrib/fairscheduler/hadoop-*-fairscheduler.jar $HADOOP_HOME/lib
166 |   rm -f $hadoop_tar_file $hadoop_tar_md5_file
167 | 
168 |   echo "export HADOOP_HOME=$HADOOP_HOME" >> ~root/.bashrc
169 |   echo 'export PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$PATH' >> ~root/.bashrc
170 |   
171 |   #set up the native compression libraries
172 |   if [ `arch` == 'x86_64' ]; then
173 |     cp $HADOOP_HOME/lib/native/Linux-amd64-64/libhadoop.* /usr/lib/
174 |   else
175 |     cp $HADOOP_HOME/lib/native/ Linux-i386-32/libhadoop.* /usr/lib/
176 |   fi
177 |   ldconfig -n /usr/lib/
178 | }
179 | 
180 | function install_hbase() {
181 |   hbase_tar_url=http://archive.cloudera.com/cdh/3/hbase-$HBASE_VERSION.tar.gz
182 |   hbase_tar_file=`basename $hbase_tar_url`
183 | 
184 |   curl="curl --retry 3 --silent --show-error --fail"
185 |   $curl -O $hbase_tar_url
186 | 
187 |   if [ ! -e $hbase_tar_file ]; then
188 |     echo "Failed to download $hbase_tar_url. Aborting."
189 |     exit 1
190 |   fi
191 | 
192 |   tar zxf $hbase_tar_file -C /usr/local
193 |   rm -f $hbase_tar_file $hbase_tar_md5_file
194 | 
195 |   echo "export HBASE_HOME=$HBASE_HOME" >> ~root/.bashrc
196 |   echo 'export PATH=$JAVA_HOME/bin:$HBASE_HOME/bin:$PATH' >> ~root/.bashrc
197 | }
198 | 
199 | function install_zookeeper() {
200 |   zk_tar_url=http://archive.cloudera.com/cdh/3/zookeeper-$ZK_VERSION.tar.gz
201 |   zk_tar_file=`basename $zk_tar_url`
202 | 
203 |   curl="curl --retry 3 --silent --show-error --fail"
204 |   $curl -O $zk_tar_url
205 | 
206 |   if [ ! -e $zk_tar_file ]; then
207 |     echo "Failed to download $zk_tar_url. Aborting."
208 |     exit 1
209 |   fi
210 | 
211 |   tar zxf $zk_tar_file -C /usr/local
212 |   rm -f $zk_tar_file $zk_tar_md5_file
213 | 
214 |   echo "export ZOOKEEPER_HOME=$ZK_HOME" >> ~root/.bashrc
215 |   echo 'export PATH=$JAVA_HOME/bin:$ZK_HOME/bin:$PATH' >> ~root/.bashrc
216 | }
217 | 
218 | function install_pig()
219 | {
220 |   pig_tar_url=http://archive.cloudera.com/cdh/3/$PIG_VERSION.tar.gz
221 |   pig_tar_file=`basename $pig_tar_url`
222 | 
223 |   curl="curl --retry 3 --silent --show-error --fail"
224 |   for i in `seq 1 3`;
225 |   do
226 |     $curl -O $pig_tar_url
227 |   done
228 | 
229 |   if [ ! -e $pig_tar_file ]; then
230 |     echo "Failed to download $pig_tar_url. Pig will not be installed."
231 |   else
232 |     tar zxf $pig_tar_file -C /usr/local
233 |     rm -f $pig_tar_file
234 |  
235 |     if [ ! -e $HADOOP_CONF_DIR ]; then
236 |       echo "Hadoop must be installed.  Aborting."
237 |       exit 1
238 |     fi
239 | 
240 |     cp $HADOOP_CONF_DIR/*.xml $PIG_CONF_DIR/
241 | 
242 |     echo "export PIG_HOME=$PIG_HOME" >> ~root/.bashrc
243 |     echo 'export PATH=$JAVA_HOME/bin:$PIG_HOME/bin:$PATH' >> ~root/.bashrc
244 |  fi
245 | }
246 | 
247 | function prep_disk() {
248 |   mount=$1
249 |   device=$2
250 |   automount=${3:-false}
251 | 
252 |   echo "warning: ERASING CONTENTS OF $device"
253 |   mkfs.xfs -f $device
254 |   if [ ! -e $mount ]; then
255 |     mkdir $mount
256 |   fi
257 |   mount -o defaults,noatime $device $mount
258 |   if $automount ; then
259 |     echo "$device $mount xfs defaults,noatime 0 0" >> /etc/fstab
260 |   fi
261 | }
262 | 
263 | function wait_for_mount {
264 |   mount=$1
265 |   device=$2
266 | 
267 |   mkdir $mount
268 | 
269 |   i=1
270 |   echo "Attempting to mount $device"
271 |   while true ; do
272 |     sleep 10
273 |     echo -n "$i "
274 |     i=$[$i+1]
275 |     mount -o defaults,noatime $device $mount || continue
276 |     echo " Mounted."
277 |     break;
278 |   done
279 | }
280 | 
281 | function make_hadoop_dirs {
282 |   for mount in "$@"; do
283 |     if [ ! -e $mount/hadoop ]; then
284 |       mkdir -p $mount/hadoop
285 |       chown hadoop:hadoop $mount/hadoop
286 |     fi
287 |   done
288 | }
289 | 
290 | # Configure Hadoop by setting up disks and site file
291 | function configure_hadoop() {
292 |     #set up hadoop's env. to have the same path and vars as root
293 |     #this ensures that commands work correctly when the user su's to hadoop
294 |     cp ~/.bash_profile /home/hadoop/
295 |     cp ~/.bashrc /home/hadoop/
296 |     chown hadoop /home/hadoop/.bash*
297 |     chgrp hadoop /home/hadoop/.bash*
298 | 
299 |   install_packages xfsprogs # needed for XFS
300 | 
301 |   INSTANCE_TYPE=`wget -q -O - http://169.254.169.254/latest/meta-data/instance-type`
302 | 
303 |   if [ -n "$EBS_MAPPINGS" ]; then
304 |     # EBS_MAPPINGS is like "nn,/ebs1,/dev/sdj;dn,/ebs2,/dev/sdk"
305 |     # EBS_MAPPINGS is like "ROLE,MOUNT_POINT,DEVICE;ROLE,MOUNT_POINT,DEVICE"
306 |     DFS_NAME_DIR=''
307 |     FS_CHECKPOINT_DIR=''
308 |     DFS_DATA_DIR=''
309 |     for mapping in $(echo "$EBS_MAPPINGS" | tr ";" "\n"); do
310 |       role=`echo $mapping | cut -d, -f1`
311 |       mount=`echo $mapping | cut -d, -f2`
312 |       device=`echo $mapping | cut -d, -f3`
313 |       wait_for_mount $mount $device
314 |       DFS_NAME_DIR=${DFS_NAME_DIR},"$mount/hadoop/hdfs/name"
315 |       FS_CHECKPOINT_DIR=${FS_CHECKPOINT_DIR},"$mount/hadoop/hdfs/secondary"
316 |       DFS_DATA_DIR=${DFS_DATA_DIR},"$mount/hadoop/hdfs/data"
317 |       FIRST_MOUNT=${FIRST_MOUNT-$mount}
318 |       make_hadoop_dirs $mount
319 |     done
320 |     # Remove leading commas
321 |     DFS_NAME_DIR=${DFS_NAME_DIR#?}
322 |     FS_CHECKPOINT_DIR=${FS_CHECKPOINT_DIR#?}
323 |     DFS_DATA_DIR=${DFS_DATA_DIR#?}
324 | 
325 |     DFS_REPLICATION=3 # EBS is internally replicated, but we also use HDFS replication for safety
326 |   else
327 |     case $INSTANCE_TYPE in
328 |     m2.2xlarge)
329 |       DFS_NAME_DIR=/mnt/hadoop/hdfs/name
330 |       FS_CHECKPOINT_DIR=/mnt/hadoop/hdfs/secondary
331 |       DFS_DATA_DIR=/mnt/hadoop/hdfs/data
332 |       ;;
333 |     m1.xlarge|c1.xlarge)
334 |       DFS_NAME_DIR=/mnt/hadoop/hdfs/name,/mnt2/hadoop/hdfs/name
335 |       FS_CHECKPOINT_DIR=/mnt/hadoop/hdfs/secondary,/mnt2/hadoop/hdfs/secondary
336 |       DFS_DATA_DIR=/mnt/hadoop/hdfs/data,/mnt2/hadoop/hdfs/data,/mnt3/hadoop/hdfs/data,/mnt4/hadoop/hdfs/data
337 |       ;;
338 |     m1.large)
339 |       DFS_NAME_DIR=/mnt/hadoop/hdfs/name,/mnt2/hadoop/hdfs/name
340 |       FS_CHECKPOINT_DIR=/mnt/hadoop/hdfs/secondary,/mnt2/hadoop/hdfs/secondary
341 |       DFS_DATA_DIR=/mnt/hadoop/hdfs/data,/mnt2/hadoop/hdfs/data
342 |       ;;
343 |     *)
344 |       # "m1.small" or "c1.medium"
345 |       DFS_NAME_DIR=/mnt/hadoop/hdfs/name
346 |       FS_CHECKPOINT_DIR=/mnt/hadoop/hdfs/secondary
347 |       DFS_DATA_DIR=/mnt/hadoop/hdfs/data
348 |       ;;
349 |     esac
350 |     FIRST_MOUNT=/mnt
351 |     DFS_REPLICATION=3
352 |   fi
353 | 
354 |   case $INSTANCE_TYPE in
355 |   m2.2xlarge)
356 |     MAPRED_LOCAL_DIR=/mnt/hadoop/mapred/local
357 |     MAX_MAP_TASKS=5
358 |     MAX_REDUCE_TASKS=3
359 |     CHILD_OPTS=-Xmx2000m
360 |     CHILD_ULIMIT=4000000
361 |     IO_SORT_FACTOR=25
362 |     IO_SORT_MB=250
363 |     ;;
364 |   m1.xlarge|c1.xlarge)
365 |     prep_disk /mnt2 /dev/sdc true &
366 |     disk2_pid=$!
367 |     prep_disk /mnt3 /dev/sdd true &
368 |     disk3_pid=$!
369 |     prep_disk /mnt4 /dev/sde true &
370 |     disk4_pid=$!
371 |     wait $disk2_pid $disk3_pid $disk4_pid
372 |     MAPRED_LOCAL_DIR=/mnt/hadoop/mapred/local,/mnt2/hadoop/mapred/local,/mnt3/hadoop/mapred/local,/mnt4/hadoop/mapred/local
373 |     MAX_MAP_TASKS=4
374 |     MAX_REDUCE_TASKS=2
375 |     CHILD_OPTS=-Xmx2000m
376 |     CHILD_ULIMIT=4000000
377 |     IO_SORT_FACTOR=20
378 |     IO_SORT_MB=200
379 |     ;;
380 |   m1.large)
381 |     prep_disk /mnt2 /dev/sdc true
382 |     MAPRED_LOCAL_DIR=/mnt/hadoop/mapred/local,/mnt2/hadoop/mapred/local
383 |     MAX_MAP_TASKS=2
384 |     MAX_REDUCE_TASKS=1
385 |     CHILD_OPTS=-Xmx2000m
386 |     CHILD_ULIMIT=4000000
387 |     IO_SORT_FACTOR=10
388 |     IO_SORT_MB=100
389 |     ;;
390 |   c1.medium)
391 |     MAPRED_LOCAL_DIR=/mnt/hadoop/mapred/local
392 |     MAX_MAP_TASKS=4
393 |     MAX_REDUCE_TASKS=2
394 |     CHILD_OPTS=-Xmx550m
395 |     CHILD_ULIMIT=1126400
396 |     IO_SORT_FACTOR=10
397 |     IO_SORT_MB=100
398 |     ;;
399 |   *)
400 |     # "m1.small"
401 |     MAPRED_LOCAL_DIR=/mnt/hadoop/mapred/local
402 |     MAX_MAP_TASKS=2
403 |     MAX_REDUCE_TASKS=1
404 |     CHILD_OPTS=-Xmx550m
405 |     CHILD_ULIMIT=1126400
406 |     IO_SORT_FACTOR=10
407 |     IO_SORT_MB=100
408 |     ;;
409 |   esac
410 | 
411 |   make_hadoop_dirs `ls -d /mnt*`
412 | 
413 |   # Create tmp directory
414 |   mkdir /mnt/tmp
415 |   chmod a+rwxt /mnt/tmp
416 |   
417 |   mkdir /etc/hadoop
418 |   ln -s $HADOOP_CONF_DIR /etc/hadoop/conf
419 | 
420 |   ##############################################################################
421 |   # Modify this section to customize your Hadoop cluster.
422 |   ##############################################################################
423 |   cat > $HADOOP_CONF_DIR/hadoop-site.xml <<EOF
424 | <?xml version="1.0"?>
425 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
426 | <configuration>
427 | <property>
428 |   <name>dfs.block.size</name>
429 |   <value>134217728</value>
430 |   <final>true</final>
431 | </property>
432 | <property>
433 |   <name>dfs.data.dir</name>
434 |   <value>$DFS_DATA_DIR</value>
435 |   <final>true</final>
436 | </property>
437 | <property>
438 |   <name>dfs.datanode.du.reserved</name>
439 |   <value>1073741824</value>
440 |   <final>true</final>
441 | </property>
442 | <property>
443 |   <name>dfs.datanode.handler.count</name>
444 |   <value>$DFS_DATANODE_HANDLER_COUNT</value>
445 |   <final>true</final>
446 | </property>
447 | <!--property>
448 |   <name>dfs.hosts</name>
449 |   <value>$HADOOP_CONF_DIR/dfs.hosts</value>
450 |   <final>true</final>
451 | </property-->
452 | <property>
453 |   <name>dfs.hosts.exclude</name>
454 |   <value>$HADOOP_CONF_DIR/exclude</value>
455 |   <final>true</final>
456 | </property>
457 | <property>
458 |   <name>mapred.hosts.exclude</name>
459 |   <value>$HADOOP_CONF_DIR/exclude</value>
460 |   <final>true</final>
461 | </property>
462 | <property>
463 |   <name>dfs.name.dir</name>
464 |   <value>$DFS_NAME_DIR</value>
465 |   <final>true</final>
466 | </property>
467 | <property>
468 |   <name>dfs.namenode.handler.count</name>
469 |   <value>64</value>
470 |   <final>true</final>
471 | </property>
472 | <property>
473 |   <name>dfs.permissions</name>
474 |   <value>true</value>
475 |   <final>true</final>
476 | </property>
477 | <property>
478 |   <name>dfs.replication</name>
479 |   <value>$DFS_REPLICATION</value>
480 | </property>
481 | <property>
482 |  <name>dfs.datanode.max.xcievers</name>
483 |  <value>$DFS_DATANODE_MAX_XCIEVERS</value>
484 | </property>
485 | <property>
486 |   <name>fs.checkpoint.dir</name>
487 |   <value>$FS_CHECKPOINT_DIR</value>
488 |   <final>true</final>
489 | </property>
490 | <property>
491 |   <name>fs.default.name</name>
492 |   <value>hdfs://$NN_HOST:8020/</value>
493 | </property>
494 | <property>
495 |   <name>fs.trash.interval</name>
496 |   <value>1440</value>
497 |   <final>true</final>
498 | </property>
499 | <property>
500 |   <name>hadoop.tmp.dir</name>
501 |   <value>/mnt/tmp/hadoop-\${user.name}</value>
502 |   <final>true</final>
503 | </property>
504 | <property>
505 |   <name>io.file.buffer.size</name>
506 |   <value>65536</value>
507 | </property>
508 | <property>
509 |   <name>io.sort.factor</name>
510 |   <value>$IO_SORT_FACTOR</value>
511 | </property>
512 | <property>
513 |   <name>io.sort.mb</name>
514 |   <value>$IO_SORT_MB</value>
515 | </property>
516 | <property>
517 |   <name>mapred.child.java.opts</name>
518 |   <value>$CHILD_OPTS</value>
519 | </property>
520 | <property>
521 |   <name>mapred.child.ulimit</name>
522 |   <value>$CHILD_ULIMIT</value>
523 |   <final>true</final>
524 | </property>
525 | <property>
526 |   <name>mapred.job.tracker</name>
527 |   <value>$JT_HOST:8021</value>
528 | </property>
529 | <property>
530 |   <name>mapred.job.tracker.handler.count</name>
531 |   <value>64</value>
532 |   <final>true</final>
533 | </property>
534 | <property>
535 |   <name>mapred.local.dir</name>
536 |   <value>$MAPRED_LOCAL_DIR</value>
537 |   <final>true</final>
538 | </property>
539 | <property>
540 |   <name>mapred.map.tasks.speculative.execution</name>
541 |   <value>true</value>
542 | </property>
543 | <property>
544 |   <name>mapred.reduce.parallel.copies</name>
545 |   <value>10</value>
546 | </property>
547 | <property>
548 |   <name>mapred.reduce.tasks</name>
549 |   <value>$CLUSTER_SIZE</value>
550 | </property>
551 | <property>
552 |   <name>mapred.reduce.tasks.speculative.execution</name>
553 |   <value>false</value>
554 | </property>
555 | <property>
556 |   <name>mapred.submit.replication</name>
557 |   <value>10</value>
558 | </property>
559 | <property>
560 |   <name>mapred.system.dir</name>
561 |   <value>/hadoop/system/mapred</value>
562 | </property>
563 | <property>
564 |   <name>mapred.tasktracker.map.tasks.maximum</name>
565 |   <value>$MAX_MAP_TASKS</value>
566 |   <final>true</final>
567 | </property>
568 | <property>
569 |   <name>mapred.tasktracker.reduce.tasks.maximum</name>
570 |   <value>$MAX_REDUCE_TASKS</value>
571 |   <final>true</final>
572 | </property>
573 | <property>
574 |   <name>tasktracker.http.threads</name>
575 |   <value>40</value>
576 |   <final>true</final>
577 | </property>
578 | <property>
579 |   <name>mapred.output.compress</name>
580 |   <value>true</value>
581 | </property>
582 | <property>
583 |   <name>mapred.compress.map.output</name>
584 |   <value>true</value>
585 | </property>
586 | <property>
587 |   <name>mapred.output.compression.type</name>
588 |   <value>BLOCK</value>
589 | </property>
590 | <property>
591 |   <name>hadoop.rpc.socket.factory.class.default</name>
592 |   <value>org.apache.hadoop.net.StandardSocketFactory</value>
593 |   <final>true</final>
594 | </property>
595 | <property>
596 |   <name>hadoop.rpc.socket.factory.class.ClientProtocol</name>
597 |   <value></value>
598 |   <final>true</final>
599 | </property>
600 | <property>
601 |   <name>hadoop.rpc.socket.factory.class.JobSubmissionProtocol</name>
602 |   <value></value>
603 |   <final>true</final>
604 | </property>
605 | <property>
606 |   <name>io.compression.codecs</name>
607 |   <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec</value>
608 | </property>
609 | </configuration>
610 | EOF
611 | 
612 |   # Keep PID files in a non-temporary directory
613 |   sed -i -e "s|# export HADOOP_PID_DIR=.*|export HADOOP_PID_DIR=/var/run/hadoop|" \
614 |     $HADOOP_CONF_DIR/hadoop-env.sh
615 |   mkdir -p /var/run/hadoop
616 |   chown -R hadoop:hadoop /var/run/hadoop
617 | 
618 |   # Set SSH options within the cluster
619 |   sed -i -e 's|# export HADOOP_SSH_OPTS=.*|export HADOOP_SSH_OPTS="-o StrictHostKeyChecking=no"|' \
620 |     $HADOOP_CONF_DIR/hadoop-env.sh
621 | 
622 |   # Hadoop logs should be on the /mnt partition
623 |   sed -i -e 's|# export HADOOP_LOG_DIR=.*|export HADOOP_LOG_DIR=/var/log/hadoop/logs|' \
624 |     $HADOOP_CONF_DIR/hadoop-env.sh
625 |     
626 |   rm -rf /var/log/hadoop
627 |   mkdir /mnt/hadoop/logs
628 |   chown hadoop:hadoop /mnt/hadoop/logs
629 |   ln -s /mnt/hadoop/logs /var/log/hadoop
630 |   chown -R hadoop:hadoop /var/log/hadoop
631 | 
632 | }
633 | 
634 | # Sets up the HBase configuration
635 | function configure_hbase() {
636 | 	
637 |   ##############################################################################
638 |   # Modify this section to customize your HBase cluster.
639 |   ##############################################################################
640 | 
641 |   HBASE_TMP_DIR=/mnt/hbase
642 |   mkdir $HBASE_TMP_DIR
643 |   chown hadoop:hadoop $HBASE_TMP_DIR
644 |   
645 |   ZOOKEEPER_DATA_DIR=/mnt/hbase/zk
646 |   mkdir $ZOOKEEPER_DATA_DIR
647 |   chown hadoop:hadoop $ZOOKEEPER_DATA_DIR
648 |   
649 |   cat > $HBASE_CONF_DIR/hbase-site.xml <<EOF
650 | <?xml version="1.0"?>
651 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
652 | <configuration>
653 | <property>
654 |  <name>hbase.rootdir</name>
655 |  <value>hdfs://$NN_HOST:8020/hbase</value>
656 | </property>
657 | <property>
658 |  <name>hbase.cluster.distributed</name>
659 |  <value>true</value>
660 | </property>
661 | <property>
662 |  <name>hbase.regionserver.handler.count</name>
663 |  <value>200</value>
664 | </property>
665 | <property>
666 |  <name>hbase.tmp.dir</name>
667 |  <value>$HBASE_TMP_DIR</value>
668 | </property>
669 | <property>
670 |  <name>dfs.replication</name>
671 |  <value>$DFS_REPLICATION</value>
672 | </property>
673 | <!-- zookeeper properties -->
674 | <property>
675 |  <name>hbase.zookeeper.quorum</name>
676 |  <value>$ZOOKEEPER_QUORUM</value>
677 | </property>
678 | <property>
679 |  <name>zookeeper.session.timeout</name>
680 |  <value>60000</value>
681 | </property>
682 | <property>
683 |   <name>hbase.zookeeper.property.dataDir</name>
684 |   <value>$ZOOKEEPER_DATA_DIR</value>
685 | </property>
686 | <property>
687 |   <name>hbase.zookeeper.property.maxClientCnxns</name>
688 |   <value>100</value>
689 | </property>
690 | </configuration>
691 | EOF
692 | 
693 |   # Override JVM options - use 2G heap for master and 8G for region servers
694 |   cat >> $HBASE_CONF_DIR/hbase-env.sh <<EOF
695 | export HBASE_MASTER_OPTS="-Xms2048m -Xmx2048m -Xmn256m -XX:+UseConcMarkSweepGC -XX:+AggressiveOpts -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:/mnt/hbase/logs/hbase-master-gc.log"
696 | export HBASE_REGIONSERVER_OPTS="-Xms8g -Xmx12g -Xmn256m -XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode -XX:ParallelGCThreads=8 -XX:+AggressiveOpts -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:/mnt/hbase/logs/hbase-regionserver-gc.log"
697 | HBASE_LOG_DIR=${HBASE_TMP_DIR}/logs
698 | EOF
699 | 
700 |   mkdir /etc/hbase/
701 |   ln -s $HBASE_CONF_DIR /etc/hbase/conf
702 |   ln -s $HBASE_LOG_DIR /var/log/hbase
703 | }
704 | 
705 | # Sets up small website on cluster.
706 | function setup_web() {
707 | 
708 |   if which dpkg &> /dev/null; then
709 |     apt-get -y install thttpd
710 |     WWW_BASE=/var/www
711 |   elif which rpm &> /dev/null; then
712 |     yum install -y thttpd
713 |     chkconfig --add thttpd
714 |     WWW_BASE=/var/www/thttpd/html
715 |   fi
716 | 
717 |   cat > $WWW_BASE/index.html << END
718 | <html>
719 | <head>
720 | <title>Hadoop EC2 Cluster</title>
721 | </head>
722 | <body>
723 | <h1>Hadoop EC2 Cluster</h1>
724 | To browse the cluster you need to have a proxy configured.
725 | Start the proxy with <tt>hadoop-ec2 proxy &lt;cluster_name&gt;</tt>,
726 | and point your browser to
727 | <a href="http://apache-hadoop-ec2.s3.amazonaws.com/proxy.pac">this Proxy
728 | Auto-Configuration (PAC)</a> file.  To manage multiple proxy configurations,
729 | you may wish to use
730 | <a href="https://addons.mozilla.org/en-US/firefox/addon/2464">FoxyProxy</a>.
731 | <ul>
732 | <li><a href="http://$NN_HOST:50070/">NameNode</a>
733 | <li><a href="http://$JT_HOST:50030/">JobTracker</a>
734 | </ul>
735 | </body>
736 | </html>
737 | END
738 | 
739 |   service thttpd start
740 | 
741 | }
742 | 
743 | function start_namenode() {
744 | 
745 |   # Format HDFS
746 |   [ ! -e $FIRST_MOUNT/hadoop/hdfs ] && $AS_HADOOP "$HADOOP_HOME/bin/hadoop namenode -format"
747 | 
748 |   $AS_HADOOP "$HADOOP_HOME/bin/hadoop-daemon.sh start namenode"
749 | 
750 |   #$AS_HADOOP "$HADOOP_HOME/bin/hadoop dfsadmin -safemode wait"
751 |   $AS_HADOOP "$HADOOP_HOME/bin/hadoop fs -mkdir /user"
752 |   # The following is questionable, as it allows a user to delete another user
753 |   # It's needed to allow users to create their own user directories
754 |   $AS_HADOOP "$HADOOP_HOME/bin/hadoop fs -chmod +w /user"
755 | 
756 | }
757 | 
758 | function start_daemon() {
759 |   $AS_HADOOP "$HADOOP_HOME/bin/hadoop-daemon.sh start $1"
760 | }
761 | 
762 | # Launch the Zookeeper and the HBase master node - these must be started
763 | # before adding region servers
764 | function start_master() {
765 |    #Start the zookeeper process first
766 |    $AS_HADOOP "$HBASE_HOME/bin/hbase-daemon.sh start zookeeper"
767 |    #Then start the master
768 |    $AS_HADOOP "$HBASE_HOME/bin/hbase-daemon.sh start master"
769 | }
770 | 
771 | # Launch a region server
772 | function start_region() {
773 |    $AS_HADOOP "$HBASE_HOME/bin/hbase-daemon.sh start regionserver"
774 | }
775 | 
776 | register_auto_shutdown
777 | install_user_packages
778 | install_hadoop
779 | install_hbase
780 | install_zookeeper
781 | configure_hadoop
782 | configure_hbase
783 | install_pig
784 | 
785 | for role in $(echo "$ROLES" | tr "," "\n"); do
786 |   case $role in
787 |   nn)
788 |     setup_web
789 |     start_namenode
790 |     start_master
791 |     ;;
792 |   snn)
793 |     start_daemon secondarynamenode
794 |     ;;
795 |   jt)
796 |     start_daemon jobtracker
797 |     ;;
798 |   dn)
799 |     start_daemon datanode
800 |     start_region
801 |     ;;
802 |   tt)
803 |     start_daemon tasktracker
804 |     if [ ! -z "$INSTALL_PROFILER" ]; then
805 |        install_yourkit
806 |     fi
807 |     ;;
808 |   esac
809 | done
810 | 
811 | 


--------------------------------------------------------------------------------
/example_scripts/zookeeper-ec2-init-remote.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -x
  2 | 
  3 | # Licensed to the Apache Software Foundation (ASF) under one or more
  4 | # contributor license agreements.  See the NOTICE file distributed with
  5 | # this work for additional information regarding copyright ownership.
  6 | # The ASF licenses this file to You under the Apache License, Version 2.0
  7 | # (the "License"); you may not use this file except in compliance with
  8 | # the License.  You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | ################################################################################
 19 | # Script that is run on each EC2 instance on boot. It is passed in the EC2 user
 20 | # data, so should not exceed 16K in size after gzip compression.
 21 | #
 22 | # This script is executed by /etc/init.d/ec2-run-user-data, and output is
 23 | # logged to /var/log/messages.
 24 | ################################################################################
 25 | 
 26 | ################################################################################
 27 | # Initialize variables
 28 | ################################################################################
 29 | 
 30 | # Substitute environment variables passed by the client
 31 | export %ENV%
 32 | 
 33 | ZK_VERSION=${ZK_VERSION:-3.2.2}
 34 | ZOOKEEPER_HOME=/usr/local/zookeeper-$ZK_VERSION
 35 | ZK_CONF_DIR=/etc/zookeeper/conf
 36 | 
 37 | function register_auto_shutdown() {
 38 |   if [ ! -z "$AUTO_SHUTDOWN" ]; then
 39 |     shutdown -h +$AUTO_SHUTDOWN >/dev/null &
 40 |   fi
 41 | }
 42 | 
 43 | # Install a list of packages on debian or redhat as appropriate
 44 | function install_packages() {
 45 |   if which dpkg &> /dev/null; then
 46 |     apt-get update
 47 |     apt-get -y install $@
 48 |   elif which rpm &> /dev/null; then
 49 |     yum install -y $@
 50 |   else
 51 |     echo "No package manager found."
 52 |   fi
 53 | }
 54 | 
 55 | # Install any user packages specified in the USER_PACKAGES environment variable
 56 | function install_user_packages() {
 57 |   if [ ! -z "$USER_PACKAGES" ]; then
 58 |     install_packages $USER_PACKAGES
 59 |   fi
 60 | }
 61 | 
 62 | function install_zookeeper() {
 63 |   zk_tar_url=http://www.apache.org/dist/hadoop/zookeeper/zookeeper-$ZK_VERSION/zookeeper-$ZK_VERSION.tar.gz
 64 |   zk_tar_file=`basename $zk_tar_url`
 65 |   zk_tar_md5_file=`basename $zk_tar_url.md5`
 66 | 
 67 |   curl="curl --retry 3 --silent --show-error --fail"
 68 |   for i in `seq 1 3`;
 69 |   do
 70 |     $curl -O $zk_tar_url
 71 |     $curl -O $zk_tar_url.md5
 72 |     if md5sum -c $zk_tar_md5_file; then
 73 |       break;
 74 |     else
 75 |       rm -f $zk_tar_file $zk_tar_md5_file
 76 |     fi
 77 |   done
 78 | 
 79 |   if [ ! -e $zk_tar_file ]; then
 80 |     echo "Failed to download $zk_tar_url. Aborting."
 81 |     exit 1
 82 |   fi
 83 | 
 84 |   tar zxf $zk_tar_file -C /usr/local
 85 |   rm -f $zk_tar_file $zk_tar_md5_file
 86 | 
 87 |   echo "export ZOOKEEPER_HOME=$ZOOKEEPER_HOME" >> ~root/.bashrc
 88 |   echo 'export PATH=$JAVA_HOME/bin:$ZOOKEEPER_HOME/bin:$PATH' >> ~root/.bashrc
 89 | }
 90 | 
 91 | function configure_zookeeper() {
 92 |   mkdir -p /mnt/zookeeper/logs
 93 |   ln -s /mnt/zookeeper/logs /var/log/zookeeper
 94 |   mkdir -p /var/log/zookeeper/txlog
 95 |   mkdir -p $ZK_CONF_DIR
 96 |   cp $ZOOKEEPER_HOME/conf/log4j.properties $ZK_CONF_DIR
 97 | 
 98 |   sed -i -e "s|log4j.rootLogger=INFO, CONSOLE|log4j.rootLogger=INFO, ROLLINGFILE|" \
 99 |          -e "s|log4j.appender.ROLLINGFILE.File=zookeeper.log|log4j.appender.ROLLINGFILE.File=/var/log/zookeeper/zookeeper.log|" \
100 |       $ZK_CONF_DIR/log4j.properties
101 |       
102 |   # Ensure ZooKeeper starts on boot
103 |   cat > /etc/rc.local <<EOF
104 | ZOOCFGDIR=$ZK_CONF_DIR $ZOOKEEPER_HOME/bin/zkServer.sh start > /dev/null 2>&1 &
105 | EOF
106 | 
107 | }
108 | 
109 | register_auto_shutdown
110 | install_user_packages
111 | install_zookeeper
112 | configure_zookeeper
113 | 


--------------------------------------------------------------------------------
/plugins/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/digitalreasoning/PyStratus/c7e25c9e7dcc5a98f8d317c0f9f0985fbf79ca59/plugins/__init__.py


--------------------------------------------------------------------------------
/plugins/cassandra/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/digitalreasoning/PyStratus/c7e25c9e7dcc5a98f8d317c0f9f0985fbf79ca59/plugins/cassandra/__init__.py


--------------------------------------------------------------------------------
/plugins/cassandra/cli.plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = cassandra
 3 | Module = cli
 4 | 
 5 | [Documentation]
 6 | Author = Abe Music
 7 | Website = http://github.com/digitalreasoning/PyStratus
 8 | Description = A Cassandra CLI implementation for PyStratus
 9 | 
10 | 


--------------------------------------------------------------------------------
/plugins/cassandra/cli.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import logging
  3 | import urllib
  4 | 
  5 | from optparse import make_option
  6 | 
  7 | from cloud.plugin import CLIPlugin
  8 | from cloud.plugin import BASIC_OPTIONS
  9 | from cloud.service import InstanceTemplate
 10 | from cloud.util import log_cluster_action
 11 | from optparse import make_option
 12 | from prettytable import PrettyTable
 13 | from pprint import pprint
 14 | 
 15 | # Add options here to override what's in the clusters.cfg file
 16 | # TODO
 17 | 
 18 | class CassandraServiceCLI(CLIPlugin):
 19 |     USAGE = """Cassandra service usage: CLUSTER COMMAND [OPTIONS]
 20 | where COMMAND and [OPTIONS] may be one of:
 21 |             
 22 |                                CASSANDRA COMMANDS
 23 |   ----------------------------------------------------------------------------------
 24 |   start-cassandra                     starts the cassandra service on all nodes
 25 |   stop-cassandra                      stops the cassandra service on all nodes
 26 |   print-ring [INSTANCE_IDX]           displays the cluster's ring information
 27 |   rebalance                           recalculates tokens evenly and moves nodes
 28 |   remove-down-nodes                   removes nodes that are down from the ring
 29 | 
 30 |                                CLUSTER COMMANDS
 31 |   ----------------------------------------------------------------------------------
 32 |   details                             list instances in CLUSTER
 33 |   launch-cluster NUM_NODES            launch NUM_NODES Cassandra nodes
 34 |   expand-cluster NUM_NODES            adds new nodes
 35 |   terminate-cluster                   terminate all instances in CLUSTER
 36 |   login                               log in to the master in CLUSTER over SSH
 37 | 
 38 |                                STORAGE COMMANDS
 39 |   ----------------------------------------------------------------------------------
 40 |   list-storage                        list storage volumes for CLUSTER
 41 |   create-storage NUM_INSTANCES        create volumes for NUM_INSTANCES instances
 42 |     SPEC_FILE                           for CLUSTER, using SPEC_FILE
 43 |   delete-storage                      delete all storage volumes for CLUSTER
 44 | """
 45 |     
 46 |     def __init__(self):
 47 |         super(CassandraServiceCLI, self).__init__()
 48 | 
 49 |         #self._logger = logging.getLogger("CassandraServiceCLI")
 50 |  
 51 |     def execute_command(self, argv, options_dict):
 52 |         if len(argv) < 2:
 53 |             self.print_help()
 54 | 
 55 |         self._cluster_name = argv[0]
 56 |         self._command_name = argv[1]
 57 | 
 58 |         # strip off the cluster name and command from argv
 59 |         argv = argv[2:]
 60 | 
 61 |         # handle all known commands and error on an unknown command
 62 |         if self._command_name == "details":
 63 |             self.print_instances()
 64 |             
 65 |         elif self._command_name == "simple-details":
 66 |             self.simple_print_instances(argv, options_dict)
 67 | 
 68 |         elif self._command_name == "terminate-cluster":
 69 |             self.terminate_cluster(argv, options_dict)
 70 | 
 71 |         elif self._command_name == "launch-cluster":
 72 |             self.launch_cluster(argv, options_dict)
 73 | 
 74 |         elif self._command_name == "expand-cluster":
 75 |             self.expand_cluster(argv, options_dict)
 76 | 
 77 |         elif self._command_name == "replace-down-nodes":
 78 |             self.replace_down_nodes(argv, options_dict)
 79 | 
 80 |         elif self._command_name == "login":
 81 |             self.login(argv, options_dict)
 82 | 
 83 |         elif self._command_name == "run-command":
 84 |             self.run_command(argv, options_dict)
 85 | 
 86 |         elif self._command_name == "transfer-files":
 87 |             self.transfer_files(argv, options_dict)
 88 | 
 89 |         elif self._command_name == "create-storage":
 90 |             self.create_storage(argv, options_dict)
 91 | 
 92 |         elif self._command_name == "delete-storage":
 93 |             self.delete_storage(argv, options_dict)
 94 | 
 95 |         elif self._command_name == "list-storage":
 96 |             self.print_storage()
 97 | 
 98 |         elif self._command_name == "stop-cassandra":
 99 |             self.stop_cassandra(argv, options_dict)
100 | 
101 |         elif self._command_name == "start-cassandra":
102 |             self.start_cassandra(argv, options_dict)
103 | 
104 |         elif self._command_name == "print-ring":
105 |             self.print_ring(argv, options_dict)
106 | 
107 |         elif self._command_name == "hack-config-for-multi-region":
108 |             self.hack_config_for_multi_region(argv, options_dict)
109 |             
110 |         elif self._command_name == "rebalance":
111 |             self.rebalance(argv, options_dict)
112 | 
113 |         elif self._command_name == "remove-down-nodes":
114 |             self.remove_down_nodes(argv, options_dict)
115 |         else:
116 |             self.print_help()
117 | 
118 |     def expand_cluster(self, argv, options_dict):
119 |         expected_arguments = ["NUM_INSTANCES"]
120 |         opt, args = self.parse_options(self._command_name,
121 |                                        argv,
122 |                                        expected_arguments=expected_arguments,
123 |                                        unbounded_args=True)
124 |         opt.update(options_dict)
125 | 
126 |         number_of_nodes = int(args[0])
127 |         instance_template = InstanceTemplate(
128 |             (self.service.CASSANDRA_NODE,),
129 |             number_of_nodes,
130 |             opt.get('image_id'),
131 |             opt.get('instance_type'),
132 |             opt.get('key_name'),
133 |             opt.get('public_key'),
134 |             opt.get('user_data_file'),
135 |             opt.get('availability_zone'),
136 |             opt.get('user_packages'),
137 |             opt.get('auto_shutdown'),
138 |             opt.get('env'),
139 |             opt.get('security_groups'))
140 | #        instance_template.add_env_strings(["CLUSTER_SIZE=%d" % number_of_nodes])
141 | 
142 |         print "Expanding cluster by %d instance(s)...please wait." % number_of_nodes
143 | 
144 |         self.service.expand_cluster(instance_template)
145 | 
146 |     def replace_down_nodes(self, argv, options_dict):
147 |         opt, args = self.parse_options(self._command_name,
148 |                                        argv)
149 |         opt.update(options_dict)
150 | 
151 |         # test files
152 |         for key in ['cassandra_config_file']:
153 |             if opt.get(key) is not None:
154 |                 try:
155 |                     url = urllib.urlopen(opt.get(key))
156 |                     data = url.read()
157 |                 except:
158 |                     raise
159 |                     print "The file defined by %s (%s) does not exist. Aborting." % (key, opt.get(key))
160 |                     sys.exit(1)
161 | 
162 |         number_of_nodes = len(self.service.calc_down_nodes())
163 |         instance_template = InstanceTemplate(
164 |             (self.service.CASSANDRA_NODE,),
165 |             number_of_nodes,
166 |             opt.get('image_id'),
167 |             opt.get('instance_type'),
168 |             opt.get('key_name'),
169 |             opt.get('public_key'),
170 |             opt.get('user_data_file'),
171 |             opt.get('availability_zone'),
172 |             opt.get('user_packages'),
173 |             opt.get('auto_shutdown'),
174 |             opt.get('env'),
175 |             opt.get('security_groups'))
176 | #        instance_template.add_env_strings(["CLUSTER_SIZE=%d" % number_of_nodes])
177 | 
178 |         print "Replacing %d down instance(s)...please wait." % number_of_nodes
179 | 
180 |         self.service.replace_down_nodes(instance_template,
181 |                                         opt.get('cassandra_config_file'))
182 | 
183 |     def launch_cluster(self, argv, options_dict):
184 |         """
185 |         """
186 |         expected_arguments = ["NUM_INSTANCES"]
187 |         opt, args = self.parse_options(self._command_name, 
188 |                                       argv,
189 |                                       expected_arguments=expected_arguments)
190 |         opt.update(options_dict)
191 | 
192 |         if self.service.get_instances() :
193 |             print "This cluster is already running.  It must be terminated prior to being launched again."
194 |             sys.exit(1)
195 | 
196 |         number_of_nodes = int(args[0])
197 |         instance_template = InstanceTemplate(
198 |             (self.service.CASSANDRA_NODE,), 
199 |             number_of_nodes,
200 |             opt.get('image_id'),
201 |             opt.get('instance_type'),
202 |             opt.get('key_name'),
203 |             opt.get('public_key'), 
204 |             opt.get('user_data_file'),
205 |             opt.get('availability_zone'), 
206 |             opt.get('user_packages'),
207 |             opt.get('auto_shutdown'), 
208 |             opt.get('env'),
209 |             opt.get('security_groups'))
210 |         instance_template.add_env_strings(["CLUSTER_SIZE=%d" % number_of_nodes])
211 | 
212 |         print "Launching cluster with %d instance(s)...please wait." % number_of_nodes
213 | 
214 |         self.service.launch_cluster(instance_template, opt)
215 | 
216 | 
217 |         log_cluster_action(opt.get('config_dir'), self._cluster_name,
218 |             "launch-cluster", number_of_nodes, opt.get("instance_type"),
219 |             None, "cassandra")
220 | 
221 |     def stop_cassandra(self, argv, options_dict):
222 |         instances = self.service.get_instances()
223 |         if not instances:
224 |             print "No running instances. Aborting."
225 |             sys.exit(1)
226 | 
227 |         print "Stopping Cassandra service on %d instance(s)...please wait." % len(instances)
228 |         self.service.stop_cassandra(instances=instances)
229 | 
230 |     def start_cassandra(self, argv, options_dict):
231 |         instances = self.service.get_instances()
232 |         if not instances:
233 |             print "No running instances. Aborting."
234 |             sys.exit(1)
235 | 
236 |         print "Starting Cassandra service on %d instance(s)...please wait." % len(instances)
237 |         self.service.start_cassandra(instances=instances)
238 | 
239 |     def print_ring(self, argv, options_dict):
240 |         instances = self.service.get_instances()
241 |         if not instances:
242 |             print("No running instances. Aborting.")
243 |             sys.exit(1)
244 | 
245 |         idx = 0
246 |         if len(argv) > 0 :
247 |             idx = int(argv[0])
248 | 
249 |         print(self.service.print_ring(instances[idx]))
250 | 
251 |     def hack_config_for_multi_region(self, argv, options_dict):
252 |         instances = self.service.get_instances()
253 |         if not instances:
254 |             print "No running instances. Aborting."
255 |             sys.exit(1)
256 | 
257 |         opt_list = BASIC_OPTIONS + [make_option("--seeds", metavar="SEEDS", action="store", type="str", default="",  help="explicit comma separated seed list")]
258 |         opt, args = self.parse_options(self._command_name, argv, opt_list)
259 | 
260 |         self.service.hack_config_for_multi_region(options_dict.get('ssh_options'), opt['seeds'])
261 |         
262 |     def rebalance(self, argv, options_dict):
263 |         instances = self.service.get_instances()
264 |         if not instances:
265 |             print "No running instances. Aborting."
266 |             sys.exit(1)
267 | 
268 |         opt, args = self.parse_options(self._command_name, argv, [make_option("--offset", metavar="OFFSET", action="store", type=int, default=0, help="token offset")])
269 |         self.service.rebalance(offset=opt['offset'])
270 | 
271 |     def remove_down_nodes(self, argv, options_dict):
272 |         instances = self.service.get_instances()
273 |         if not instances:
274 |             print "No running instances. Aborting."
275 |             sys.exit(1)
276 | 
277 |         self.service.remove_down_nodes()
278 | 
279 |     def create_storage(self, argv, options_dict):
280 |         opt, args = self.parse_options(self._command_name, argv, BASIC_OPTIONS,
281 |                                        ["NUM_INSTANCES", "SPEC_FILE"])
282 |         opt.update(options_dict)
283 | 
284 |         role = self.service.CASSANDRA_NODE
285 |         number_of_instances = int(args[0])
286 |         spec_file = args[1]
287 | 
288 |         # FIXME
289 |         # check_options_set(opt, ['availability_zone'])
290 | 
291 |         self.service.create_storage(role, 
292 |                                     number_of_instances,
293 |                                     opt.get('availability_zone'),
294 |                                     spec_file)
295 |         self.print_storage()
296 | 


--------------------------------------------------------------------------------
/plugins/cassandra/service.plugin:
--------------------------------------------------------------------------------
1 | [Core]
2 | Name = cassandra
3 | Module = service
4 | 
5 | [Documentation]
6 | Author = Abe Music
7 | Website = http://github.com/digitalreasoning/PyStratus
8 | Description = A Cassandra service implementation for PyStratus
9 | 


--------------------------------------------------------------------------------
/plugins/hadoop/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/digitalreasoning/PyStratus/c7e25c9e7dcc5a98f8d317c0f9f0985fbf79ca59/plugins/hadoop/__init__.py


--------------------------------------------------------------------------------
/plugins/hadoop/cli.plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = hadoop 
 3 | Module = cli
 4 | 
 5 | [Documentation]
 6 | Author = Abe Music
 7 | Website = http://github.com/digitalreasoning/PyStratus
 8 | Description = A Hadoop CLI implementation for PyStratus
 9 | 
10 | 


--------------------------------------------------------------------------------
/plugins/hadoop/cli.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import logging
  4 | import urllib
  5 | 
  6 | from cloud.plugin import CLIPlugin
  7 | from cloud.plugin import BASIC_OPTIONS
  8 | from cloud.service import InstanceTemplate
  9 | from cloud.util import log_cluster_action
 10 | from optparse import make_option
 11 | from prettytable import PrettyTable
 12 | 
 13 | class HadoopServiceCLI(CLIPlugin):
 14 |     USAGE = """Hadoop service usage: CLUSTER COMMAND [OPTIONS]
 15 | where COMMAND and [OPTIONS] may be one of:
 16 |             
 17 |                                HADOOP COMMANDS
 18 |   ----------------------------------------------------------------------------------
 19 |   launch-master                       launch or find a master in CLUSTER
 20 |   launch-slaves NUM_SLAVES            launch NUM_SLAVES slaves in CLUSTER
 21 |   terminate-dead-nodes                find and terminate dead nodes in CLUSTER
 22 |   start-hadoop                        starts all processes on namenode and datanodes
 23 |   stop-hadoop                         stops all processes on namenode and datanodes
 24 |   send-config-files                   sends the given config files to each node and
 25 |                                         overwrites the existing file in the hadoop
 26 |                                         conf directory (BE CAREFUL!)
 27 |   get-config-files                    gets the given config files from the namenode
 28 |                                         and stores them in the cwd 
 29 | 
 30 |                                HBASE COMMANDS
 31 |   ----------------------------------------------------------------------------------
 32 |   start-hbase                         starts processes on namenode and datanodes
 33 |   stop-hbase                          stops processes on namenode and datanodes
 34 |   send-hbase-config-files             sends the given config files to each node and
 35 |                                         overwrites the existing file in the hadoop
 36 |                                         conf directory (BE CAREFUL!)
 37 |   get-hbase-config-files              gets the given config files from the namenode
 38 |                                         and stores them in the cwd 
 39 | 
 40 |                              CLOUDBASE COMMANDS
 41 |   ----------------------------------------------------------------------------------
 42 |   start-cloudbase                     starts processes on namenode and datanodes
 43 |   stop-cloudbase                      stops proceses on namenode and datanodes
 44 | 
 45 |                                CLUSTER COMMANDS
 46 |   ----------------------------------------------------------------------------------
 47 |   details                             list instances in CLUSTER
 48 |   launch-cluster NUM_SLAVES           launch a master and NUM_SLAVES slaves in 
 49 |                                         CLUSTER
 50 |   terminate-cluster                   terminate all instances in CLUSTER
 51 |   login                               log in to the master in CLUSTER over SSH
 52 |   proxy                               start a SOCKS proxy on localhost into the
 53 |                                         CLUSTER
 54 | 
 55 |                                STORAGE COMMANDS
 56 |   ----------------------------------------------------------------------------------
 57 |   list-storage                        list storage volumes for CLUSTER
 58 |   create-storage ROLE NUM_INSTANCES   create volumes for NUM_INSTANCES instances of
 59 |     SPEC_FILE                           type ROLE for CLUSTER, using SPEC_FILE
 60 |   delete-storage                      delete all storage volumes for CLUSTER
 61 | """
 62 |     
 63 |     def __init__(self):
 64 |         super(HadoopServiceCLI, self).__init__()
 65 |  
 66 |     def execute_command(self, argv, options_dict):
 67 |         if len(argv) < 2:
 68 |             self.print_help()
 69 | 
 70 |         self._cluster_name = argv[0]
 71 |         self._command_name = argv[1]
 72 | 
 73 |         # strip off the cluster name and command from argv
 74 |         argv = argv[2:]
 75 | 
 76 |         # get spot configuration
 77 |         self._spot_config = {
 78 |                 "spot_cluster": True if os.environ.get("SPOT_CLUSTER", options_dict.get("spot_cluster", "false")).lower() == "true" else False,
 79 |                 "master_spot": True if options_dict.get("master_spot", "false").lower() == "true" else False,
 80 |                 "max_price": options_dict.get("max_price", None),
 81 |                 "launch_group": options_dict.get("launch_group", None),
 82 |             }
 83 | 
 84 |         # handle all known commands and error on an unknown command
 85 |         if self._command_name == "details":
 86 |             self.print_instances()
 87 | 
 88 |         elif self._command_name == "simple-details":
 89 |             self.simple_print_instances(argv, options_dict)
 90 | 
 91 |         elif self._command_name == "proxy":
 92 |             self.proxy(argv, options_dict)
 93 | 
 94 |         elif self._command_name == "terminate-cluster":
 95 |             self.terminate_cluster(argv, options_dict)
 96 | 
 97 |         elif self._command_name == "launch-cluster":        
 98 |             self.launch_cluster(argv, options_dict)
 99 | 
100 |         elif self._command_name == "terminate-dead-nodes":
101 |             self.terminate_dead_nodes(argv, options_dict)
102 | 
103 |         elif self._command_name == "launch-master":
104 |             self.launch_master(argv, options_dict)
105 | 
106 |         elif self._command_name == "launch-slaves":
107 |             self.launch_slaves(argv, options_dict)
108 | 
109 |         elif self._command_name == "start-hadoop":
110 |             self.start_hadoop(argv, options_dict)
111 | 
112 |         elif self._command_name == "stop-hadoop":
113 |             self.stop_hadoop(argv, options_dict)
114 | 
115 |         elif self._command_name == "start-hbase":
116 |             self.start_hbase(argv, options_dict)
117 | 
118 |         elif self._command_name == "stop-hbase":
119 |             self.stop_hbase(argv, options_dict)
120 | 
121 |         elif self._command_name == "send-config-files":
122 |             self.send_config_files(argv, options_dict)
123 | 
124 |         elif self._command_name == "get-config-files":
125 |             self.get_config_files(argv, options_dict)
126 | 
127 |         elif self._command_name == "send-hbase-config-files":
128 |             self.send_hbase_config_files(argv, options_dict)
129 | 
130 |         elif self._command_name == "get-hbase-config-files":
131 |             self.get_hbase_config_files(argv, options_dict)
132 | 
133 |         elif self._command_name == "login":
134 |             self.login(argv, options_dict)
135 | 
136 |         elif self._command_name == "run-command":
137 |             self.run_command(argv, options_dict)
138 | 
139 |         elif self._command_name == "transfer-files":
140 |             self.transfer_files(argv, options_dict)
141 | 
142 |         elif self._command_name == "create-storage":
143 |             self.create_storage(argv, options_dict)
144 | 
145 |         elif self._command_name == "delete-storage":
146 |             self.delete_storage(argv, options_dict)
147 | 
148 |         elif self._command_name == "list-storage":
149 |             self.print_storage()
150 | 
151 |         elif self._command_name == "start-cloudbase":
152 |             self.start_cloudbase(argv, options_dict)
153 | 
154 |         elif self._command_name == "stop-cloudbase":
155 |             self.stop_cloudbase(argv, options_dict)
156 |             
157 |         else:
158 |             self.print_help()
159 | 
160 |     def launch_cluster(self, argv, options_dict):
161 |         """
162 |         """
163 | 
164 |         expected_arguments = ["NUM_SLAVES"]
165 |         opt, args = self.parse_options(self._command_name,
166 |                                        argv,
167 |                                        expected_arguments=expected_arguments)
168 |         opt.update(options_dict)
169 | 
170 |         # if PROVIDER is set in the environment that takes precedence over
171 |         # anything in the clusters.cfg; hbase is the default if nothing is set
172 |         provider = os.environ.get("PROVIDER", opt.get("provider", "hbase")).lower()
173 | 
174 |         # default for spot clusters is for the master to NOT be spot; munging
175 |         # some things around here if the opposite is specified
176 |         spot_cluster_orig = self._spot_config["spot_cluster"]
177 |         if spot_cluster_orig and self._spot_config["master_spot"]:
178 |             self._spot_config["spot_cluster"] = True
179 |         else:
180 |             self._spot_config["spot_cluster"] = False
181 | 
182 |         number_of_slaves = int(args[0])
183 |         master_templates = [
184 |             InstanceTemplate(
185 |                 (
186 |                     self.service.NAMENODE, 
187 |                     self.service.SECONDARY_NAMENODE, 
188 |                     self.service.JOBTRACKER
189 |                 ),
190 |                 1,
191 |                 opt.get('image_id'),
192 |                 opt.get('instance_type'), 
193 |                 opt.get('key_name'),
194 |                 opt.get('public_key'), 
195 |                 opt.get('user_data_file'),
196 |                 opt.get('availability_zone'), 
197 |                 opt.get('user_packages'),
198 |                 opt.get('auto_shutdown'), 
199 |                 opt.get('env'),
200 |                 opt.get('security_groups'),
201 |                 self._spot_config)   # don't want the master to be a spot instance
202 |         ]
203 |         for it in master_templates:
204 |             it.add_env_strings([
205 |                 "CLUSTER_SIZE=%d" % (number_of_slaves+1),
206 |                 "PROVIDER=%s" % (provider)
207 |             ])
208 | 
209 |         print "Using %s as the backend datastore" % (provider)
210 | 
211 |         print "Launching cluster with %d instance(s) - starting master...please wait." % (number_of_slaves+1)
212 |         
213 |         master = self.service.launch_cluster(master_templates, opt.get('client_cidr'), opt.get('config_dir'))
214 | 
215 |         if master is None:
216 |             print "An error occurred starting the master node. Check the logs for more information."
217 |             sys.exit(1)
218 | 
219 |         log_cluster_action(opt.get('config_dir'), self._cluster_name,
220 |             "launch-cluster", 1, opt.get("instance_type"),
221 |             provider, "hadoop")
222 | 
223 |         print "Master now running at %s - starting slaves" % master.public_dns_name
224 | 
225 |         self._spot_config["spot_cluster"] = spot_cluster_orig
226 | 
227 |         slave_templates = [
228 |             InstanceTemplate(
229 |                 (
230 |                     self.service.DATANODE, 
231 |                     self.service.TASKTRACKER
232 |                 ), 
233 |                 number_of_slaves,
234 |                 opt.get('image_id'),
235 |                 opt.get('instance_type'),
236 |                 opt.get('key_name'),
237 |                 opt.get('public_key'),
238 |                 opt.get('user_data_file'),
239 |                 opt.get('availability_zone'),
240 |                 opt.get('user_packages'),
241 |                 opt.get('auto_shutdown'),
242 |                 opt.get('env'),
243 |                 opt.get('security_groups'),
244 |                 self._spot_config)
245 |         ]
246 | 
247 |         for it in slave_templates:
248 |             it.add_env_strings([
249 |                 "CLUSTER_SIZE=%d" % (number_of_slaves+1),
250 |                 "NN_HOST=%s" % master.private_dns_name,
251 |                 "JT_HOST=%s" % master.private_dns_name,
252 |                 "ZOOKEEPER_QUORUM=%s" % master.private_dns_name,
253 |                 "PROVIDER=%s" % (provider)
254 |             ])
255 | 
256 |         print "Launching %d slave instance(s)...please wait." % (number_of_slaves)
257 |         slave = self.service.launch_cluster(slave_templates, opt.get('client_cidr'), opt.get('config_dir'))        
258 |         
259 |         if slave is None:
260 |             print "An error occurred starting the slave nodes.  Check the logs for more details"
261 |             sys.exit(1)
262 |             
263 |         log_cluster_action(opt.get('config_dir'), self._cluster_name,
264 |             "launch-cluster", number_of_slaves, opt.get("instance_type"),
265 |             provider, "hadoop")
266 | 
267 |         #Once the cluster is up, if the provider is Cloudbase, we need to ensure that Cloudbase has been initialized
268 |         #and launch the servers
269 |         if provider == "cloudbase":
270 | 
271 |             #log in to the master and run a startup script
272 |             print "Provider is cloudbase - starting cloudbase processes ... please wait"
273 |             self.service.start_cloudbase(options_dict,
274 |                 options_dict.get("hadoop_user", "hadoop"), 
275 |                 options_dict.get("ssh_user", "root"))
276 |             
277 |         print "Finished - browse the cluster at http://%s/" % master.public_dns_name
278 |  
279 |         self.logger.debug("Startup complete.")
280 | 
281 |     def launch_master(self, argv, options_dict):
282 |         """Launch the master node of a CLUSTER."""
283 | 
284 |         opt, args = self.parse_options(self._command_name, argv, BASIC_OPTIONS)
285 |         opt.update(options_dict)
286 |         
287 |         provider = opt.get("provider")
288 |         if provider is None:
289 |             provider = "hbase"
290 |         else:
291 |             provider.lower()
292 | 
293 |         # default for spot clusters is for the master to NOT be spot; munging
294 |         # some things around here if the opposite is specified
295 |         spot_cluster_orig = self._spot_config["spot_cluster"]
296 |         if spot_cluster_orig and self._spot_config["master_spot"]:
297 |             self._spot_config["spot_cluster"] = True
298 |         else:
299 |             self._spot_config["spot_cluster"] = False
300 | 
301 |         master_templates = [
302 |             InstanceTemplate(
303 |                 (
304 |                     self.service.NAMENODE, 
305 |                     self.service.SECONDARY_NAMENODE, 
306 |                     self.service.JOBTRACKER
307 |                 ),
308 |                 1,
309 |                 opt.get('image_id'),
310 |                 opt.get('instance_type'), 
311 |                 opt.get('key_name'),
312 |                 opt.get('public_key'), 
313 |                 opt.get('user_data_file'),
314 |                 opt.get('availability_zone'), 
315 |                 opt.get('user_packages'),
316 |                 opt.get('auto_shutdown'), 
317 |                 opt.get('env'),
318 |                 opt.get('security_groups'),
319 |                 self._spot_config)
320 |             ]
321 | 
322 |         for it in master_templates:
323 |             it.add_env_strings([
324 |                 "PROVIDER=%s" % (provider)
325 |             ])
326 | 
327 |         print "Launching cluster master...please wait." 
328 |         jobtracker = self.service.launch_cluster(master_templates, 
329 |                                                  opt.get('client_cidr'),
330 |                                                  opt.get('config_dir'))
331 | 
332 |         if jobtracker is None:
333 |             print "An error occurred started the Hadoop service. Check the logs for more information."
334 |             sys.exit(1)
335 | 
336 |         print "Browse the cluster at http://%s/" % jobtracker.public_dns_name
337 |         self.logger.debug("Startup complete.")
338 | 
339 |     def launch_slaves(self, argv, options_dict):
340 |         """Launch slave/datanodes in CLUSTER."""
341 | 
342 |         expected_arguments = ["NUM_SLAVES"]
343 |         opt, args = self.parse_options(self._command_name,
344 |                                        argv,
345 |                                        expected_arguments=expected_arguments)
346 |         opt.update(options_dict)
347 | 
348 |         provider = opt.get("provider")
349 |         if provider is None:
350 |             provider = "hbase"
351 |         else:
352 |             provider.lower()
353 | 
354 |         try:
355 |             number_of_slaves = int(args[0])
356 |         except ValueError:
357 |             print("Number of slaves must be an integer")
358 |             return
359 | 
360 |         instance_templates = [
361 |             InstanceTemplate(
362 |                 (
363 |                     self.service.DATANODE, 
364 |                     self.service.TASKTRACKER
365 |                 ), 
366 |                 number_of_slaves,
367 |                 opt.get('image_id'),
368 |                 opt.get('instance_type'),
369 |                 opt.get('key_name'),
370 |                 opt.get('public_key'),
371 |                 opt.get('user_data_file'),
372 |                 opt.get('availability_zone'),
373 |                 opt.get('user_packages'),
374 |                 opt.get('auto_shutdown'),
375 |                 opt.get('env'),
376 |                 opt.get('security_groups'),
377 |                 self._spot_config)
378 |             ]
379 | 
380 |         # @todo - this is originally passed in when creating a cluster from
381 |         # scratch, need to figure out what to do if we're growing a cluster
382 |         #instance_template.add_env_strings([
383 |         #    "CLUSTER_SIZE=%d" % (number_of_slaves+1)
384 |         #])
385 | 
386 |         print("Launching %s slave%s for %s" % (number_of_slaves, 
387 |             "" if number_of_slaves==1 else "s", self._cluster_name))
388 | 
389 |         # this is needed to filter the jobtracker/namenode down into
390 |         # hadoop-site.xml for the new nodes
391 |         namenode = self.service.get_namenode()
392 |         jobtracker = self.service.get_jobtracker()
393 |         for instance_template in instance_templates:
394 |             instance_template.add_env_strings([
395 |                 "NN_HOST=%s" % namenode.public_dns_name,
396 |                 "JT_HOST=%s" % jobtracker.public_dns_name,
397 |                 "ZOOKEEPER_QUORUM=%s" % namenode.private_dns_name,
398 |                 "PROVIDER=%s" % (provider)
399 |             ])
400 | 
401 |         # I think this count can be wrong if run too soon after running
402 |         # terminate_dead_nodes
403 |         existing_tasktrackers = self.service.get_tasktrackers()
404 |         num_tasktrackers = len(existing_tasktrackers) if existing_tasktrackers else 0
405 |         self.service.launch_cluster(instance_templates, 
406 |             opt.get('client_cidr'), opt.get('config_dir'),
407 |             num_existing_tasktrackers=num_tasktrackers)
408 | 
409 |     def start_cloudbase(self, argv, options_dict):
410 |         """Start the various cloudbase processes on the namenode and slave nodes - initialize the cloudbase instance, if necessary"""
411 |         
412 |         opt, args = self.parse_options(self._command_name, argv, BASIC_OPTIONS)
413 |         opt.update(options_dict)
414 | 
415 |         self.service.start_cloudbase(options_dict,
416 |             options_dict.get("hadoop_user", "hadoop"), 
417 |             options_dict.get("ssh_user", "root"))
418 | 
419 |     def stop_cloudbase(self, argv, options_dict):
420 |         """Stop the various cloudbase processes on the namenode and slave
421 |         nodes"""
422 |         
423 |         opt, args = self.parse_options(self._command_name, argv, BASIC_OPTIONS)
424 |         opt.update(options_dict)
425 | 
426 |         self.service.stop_cloudbase(options_dict)
427 |     
428 |     def start_hadoop(self, argv, options_dict):
429 |         """Start the various processes on the namenode and slave nodes"""
430 | 
431 |         opt, args = self.parse_options(self._command_name, argv, BASIC_OPTIONS)
432 |         opt.update(options_dict)
433 | 
434 |         print "Starting hadoop..."
435 |         self.service.start_hadoop(options_dict.get("hadoop_user", "hadoop"))
436 | 
437 |     def stop_hadoop(self, argv, options_dict):
438 |         """Stop the various processes on the namenode and slave nodes"""
439 | 
440 |         x = "n"
441 |         while True:
442 |             try:
443 |                 x = raw_input("Are you sure you want to stop Hadoop? (Y/n) ").lower()
444 |                 if x in ["y", "n"]:
445 |                     break
446 |                 print "Value must be either y or n. Try again."
447 |             except KeyboardInterrupt:
448 |                 x = "n"
449 |                 print ""
450 |                 break
451 |         
452 |         if x == "n":
453 |             print "Quitting"
454 |             sys.exit(1)
455 | 
456 |         opt, args = self.parse_options(self._command_name, argv, BASIC_OPTIONS)
457 |         opt.update(options_dict)
458 | 
459 |         print "Stopping hadoop..."
460 |         self.service.stop_hadoop(options_dict.get("hadoop_user", "hadoop"))
461 | 
462 |     def start_hbase(self, argv, options_dict):
463 |         """Start the various hbase processes on the namenode and slave nodes"""
464 | 
465 |         opt, args = self.parse_options(self._command_name, argv, BASIC_OPTIONS)
466 |         opt.update(options_dict)
467 | 
468 |         print "Starting hbase..."
469 |         self.service.start_hbase(options_dict.get("hadoop_user", "hadoop"))
470 | 
471 |     def stop_hbase(self, argv, options_dict):
472 |         """Stop the various hbase processes on the namenode and slave nodes"""
473 | 
474 |         x = "n"
475 |         while True:
476 |             try:
477 |                 x = raw_input("Are you sure you want to stop HBase? (Y/n) ").lower()
478 |                 if x in ["y", "n"]:
479 |                     break
480 |                 print "Value must be either y or n. Try again."
481 |             except KeyboardInterrupt:
482 |                 x = "n"
483 |                 print ""
484 |                 break
485 |         
486 |         if x == "n":
487 |             print "Quitting"
488 |             sys.exit(1)
489 | 
490 |         opt, args = self.parse_options(self._command_name, argv, BASIC_OPTIONS)
491 |         opt.update(options_dict)
492 | 
493 |         print "Stopping hbase..."
494 |         self.service.stop_hbase(options_dict.get("hadoop_user", "hadoop"))
495 | 
496 |     def get_config_files(self, argv, options_dict):
497 |         """
498 |         Gets the given config files from the name node and writes them
499 |         to the local directory.
500 |         """
501 | 
502 |         opt, args = self.parse_options(self._command_name, argv, expected_arguments=["FILE*"], unbounded_args=True)
503 |         opt.update(options_dict)
504 | 
505 |         self.service.get_config_files(args, options_dict)
506 | 
507 |     def send_config_files(self, argv, options_dict):
508 |         """
509 |         Sends the given config file to each node in the cluster, overwriting
510 |         the file located in hadoop/conf directory.
511 |         """
512 | 
513 |         opt, args = self.parse_options(self._command_name, argv, expected_arguments=["FILE*"], unbounded_args=True)
514 |         opt.update(options_dict)
515 | 
516 |         self.service.send_config_files(args, options_dict)
517 | 
518 |     def get_hbase_config_files(self, argv, options_dict):
519 |         """
520 |         Gets the given config files from the hbase master node and 
521 |         writes them to the local directory.
522 |         """
523 | 
524 |         opt, args = self.parse_options(self._command_name, argv, expected_arguments=["FILE*"], unbounded_args=True)
525 |         opt.update(options_dict)
526 | 
527 |         self.service.get_hbase_config_files(args, options_dict)
528 | 
529 |     def send_hbase_config_files(self, argv, options_dict):
530 |         """
531 |         Sends the given config file to each node in the cluster, overwriting
532 |         the file located in hadoop/conf directory.
533 |         """
534 | 
535 |         opt, args = self.parse_options(self._command_name, argv, expected_arguments=["FILE*"], unbounded_args=True)
536 |         opt.update(options_dict)
537 | 
538 |         self.service.send_hbase_config_files(args, options_dict)
539 | 
540 |     def terminate_dead_nodes(self, argv, options_dict):
541 |         """Find and terminate dead nodes in CLUSTER."""
542 | 
543 |         opt, args = self.parse_options(self._command_name, argv, BASIC_OPTIONS)
544 |         opt.update(options_dict)
545 | 
546 |         print("Looking for dead nodes in %s" % self._cluster_name)
547 |         dead_nodes = self.service.find_dead_nodes(self._cluster_name, opt)
548 |         if not dead_nodes:
549 |             print("No dead nodes found")
550 |             return 
551 | 
552 |         print ("Found %s dead nodes" % len(dead_nodes))
553 |         self.service.terminate_nodes(dead_nodes, opt)
554 | 
555 |     def create_storage(self, argv, options_dict):
556 |         opt, args = self.parse_options(self._command_name, argv, BASIC_OPTIONS,
557 |                                        ["ROLE", "NUM_INSTANCES", "SPEC_FILE"])
558 | 
559 |         opt.update(options_dict)
560 | 
561 |         role = args[0]
562 |         number_of_instances = int(args[1])
563 |         spec_file = args[2]
564 | 
565 |         valid_roles = (self.service.NAMENODE, self.service.DATANODE)
566 |         if role not in valid_roles:
567 |             raise RuntimeError("Role must be one of '%s' or '%s'" % valid_roles)
568 | 
569 |         self.service.create_storage(role, 
570 |                                     number_of_instances,
571 |                                     opt.get('availability_zone'),
572 |                                     spec_file)
573 |         self.print_storage()
574 |     
575 |     def proxy(self, argv, options_dict):
576 |         instances = self.service.get_instances()
577 |         if not instances:
578 |             "No running instances. Aborting."
579 |             sys.exit(1)
580 | 
581 |         result = self.service.proxy(ssh_options=options_dict.get('ssh_options'),   
582 |                                     instances=instances)
583 | 
584 |         if result is None:
585 |             print "Unable to create proxy. Check logs for more information."
586 |             sys.exit(1)
587 | 
588 |         print "Proxy created..."
589 |         print """export HADOOP_CLOUD_PROXY_PID=%s;
590 | echo Proxy pid %s;""" % (result, result)
591 | 


--------------------------------------------------------------------------------
/plugins/hadoop/service.plugin:
--------------------------------------------------------------------------------
1 | [Core]
2 | Name = hadoop
3 | Module = service
4 | 
5 | [Documentation]
6 | Author = Abe Music
7 | Website = http://github.com/digitalreasoning/PyStratus
8 | Description = A Hadoop service implementation for PyStratus
9 | 


--------------------------------------------------------------------------------
/plugins/hadoop_cassandra_hybrid/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/digitalreasoning/PyStratus/c7e25c9e7dcc5a98f8d317c0f9f0985fbf79ca59/plugins/hadoop_cassandra_hybrid/__init__.py


--------------------------------------------------------------------------------
/plugins/hadoop_cassandra_hybrid/cli.plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = hadoop_cassandra_hybrid
 3 | Module = cli
 4 | 
 5 | [Documentation]
 6 | Author = Abe Music
 7 | Website = http://github.com/digitalreasoning/PyStratus
 8 | Description = A hybrid Hadoop/Cassandra CLI implementation for PyStratus
 9 | 
10 | 


--------------------------------------------------------------------------------
/plugins/hadoop_cassandra_hybrid/cli.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import logging
  3 | import urllib
  4 | 
  5 | from cloud.plugin import CLIPlugin
  6 | from cloud.plugin import BASIC_OPTIONS
  7 | from cloud.service import InstanceTemplate
  8 | from optparse import make_option
  9 | from prettytable import PrettyTable
 10 | 
 11 | class HadoopCassandraServiceCLI(CLIPlugin):
 12 |     USAGE = """Hadoop service usage: CLUSTER COMMAND [OPTIONS]
 13 | where COMMAND and [OPTIONS] may be one of:
 14 |             
 15 |                                HADOOP COMMANDS
 16 |   ----------------------------------------------------------------------------------
 17 |   launch-master                       launch or find a master in CLUSTER
 18 |   launch-slaves NUM_SLAVES            launch NUM_SLAVES slaves in CLUSTER
 19 | 
 20 |                                CASSANDRA COMMANDS
 21 |   ----------------------------------------------------------------------------------
 22 |   start-cassandra                     starts the cassandra service on all nodes
 23 |   stop-cassandra                      stops the cassandra service on all nodes
 24 |   print-ring                          displays the cluster's ring information
 25 | 
 26 |                                CLUSTER COMMANDS
 27 |   ----------------------------------------------------------------------------------
 28 |   details                             list instances in CLUSTER
 29 |   launch-cluster NUM_SLAVES           launch a master and NUM_SLAVES slaves in 
 30 |                                         CLUSTER
 31 |   terminate-cluster                   terminate all instances in CLUSTER
 32 |   login                               log in to the master in CLUSTER over SSH
 33 |   proxy                               start a SOCKS proxy on localhost into the
 34 |                                         CLUSTER
 35 | 
 36 |                                STORAGE COMMANDS
 37 |   ----------------------------------------------------------------------------------
 38 |   list-storage                        list storage volumes for CLUSTER
 39 |   create-storage ROLE NUM_INSTANCES   create volumes for NUM_INSTANCES instances of
 40 |     SPEC_FILE                           type ROLE for CLUSTER, using SPEC_FILE
 41 |   delete-storage                      delete all storage volumes for CLUSTER
 42 | """
 43 |     
 44 |     def __init__(self):
 45 |         super(HadoopCassandraServiceCLI, self).__init__()
 46 |  
 47 |     def execute_command(self, argv, options_dict):
 48 |         if len(argv) < 2:
 49 |             self.print_help()
 50 | 
 51 |         self._cluster_name = argv[0]
 52 |         self._command_name = argv[1]
 53 | 
 54 |         # strip off the cluster name and command from argv
 55 |         argv = argv[2:]
 56 | 
 57 |         # handle all known commands and error on an unknown command
 58 |         if self._command_name == "details":
 59 |             self.print_instances()
 60 | 
 61 |         elif self._command_name == "simple-details":
 62 |             self.simple_print_instances(argv, options_dict)
 63 | 
 64 |         elif self._command_name == "proxy":
 65 |             self.proxy(argv, options_dict)
 66 | 
 67 |         elif self._command_name == "terminate-cluster":
 68 |             self.terminate_cluster(argv, options_dict)
 69 | 
 70 |         elif self._command_name == "launch-cluster":
 71 |             self.launch_cluster(argv, options_dict)
 72 | 
 73 |         elif self._command_name == "login":
 74 |             self.login(argv, options_dict)
 75 | 
 76 |         elif self._command_name == "run-command":
 77 |             self.run_command(argv, options_dict)
 78 | 
 79 |         elif self._command_name == "transfer-files":
 80 |             self.transfer_files(argv, options_dict)
 81 | 
 82 |         elif self._command_name == "create-storage":
 83 |             self.create_storage(argv, options_dict)
 84 | 
 85 |         elif self._command_name == "delete-storage":
 86 |             self.delete_storage(argv, options_dict)
 87 | 
 88 |         elif self._command_name == "list-storage":
 89 |             self.print_storage()
 90 | 
 91 |         elif self._command_name == "stop-cassandra":
 92 |             self.stop_cassandra(argv, options_dict)
 93 | 
 94 |         elif self._command_name == "start-cassandra":
 95 |             self.start_cassandra(argv, options_dict)
 96 | 
 97 |         elif self._command_name == "print-ring":
 98 |             self.print_ring(argv, options_dict)
 99 | 
100 |         else:
101 |             self.print_help()
102 | 
103 |     def launch_cluster(self, argv, options_dict):
104 |         """
105 |         """
106 | 
107 |         expected_arguments = ["NUM_SLAVES"]
108 |         opt, args = self.parse_options(self._command_name,
109 |                                        argv,
110 |                                        expected_arguments=expected_arguments)
111 |         opt.update(options_dict)
112 | 
113 |         # check for the cassandra-specific files
114 |         if opt.get('cassandra_config_file') is None:
115 |             print "ERROR: No cassandra_config_file configured. Aborting."
116 |             sys.exit(1)
117 | 
118 |         if opt.get('keyspace_definitions_file') is None:
119 |             print "WARNING: No keyspace_definitions_file configured. You can ignore this for Cassandra v0.6.x"
120 | 
121 |         # test files
122 |         for key in ['cassandra_config_file', 'keyspace_definitions_file']:
123 |             if opt.get(key) is not None:
124 |                 try:
125 |                     url = urllib.urlopen(opt.get(key))
126 |                     data = url.read()
127 |                 except: 
128 |                     raise
129 |                     print "The file defined by %s (%s) does not exist. Aborting." % (key, opt.get(key))
130 |                     sys.exit(1)
131 | 
132 |         number_of_slaves = int(args[0])
133 |         instance_templates = [
134 |             InstanceTemplate(
135 |                 (
136 |                     self.service.NAMENODE, 
137 |                     self.service.SECONDARY_NAMENODE, 
138 |                     self.service.JOBTRACKER,
139 |                     self.service.HADOOP_CASSANDRA_NODE,
140 |                 ),
141 |                 1,
142 |                 opt.get('image_id'),
143 |                 opt.get('instance_type'), 
144 |                 opt.get('key_name'),
145 |                 opt.get('public_key'), 
146 |                 opt.get('user_data_file'),
147 |                 opt.get('availability_zone'), 
148 |                 opt.get('user_packages'),
149 |                 opt.get('auto_shutdown'), 
150 |                 opt.get('env'),
151 |                 opt.get('security_groups')),
152 |             InstanceTemplate(
153 |                 (
154 |                     self.service.DATANODE, 
155 |                     self.service.TASKTRACKER,
156 |                     self.service.CASSANDRA_NODE,
157 |                 ), 
158 |                 number_of_slaves,
159 |                 opt.get('image_id'),
160 |                 opt.get('instance_type'),
161 |                 opt.get('key_name'),
162 |                 opt.get('public_key'),
163 |                 opt.get('user_data_file'),
164 |                 opt.get('availability_zone'),
165 |                 opt.get('user_packages'),
166 |                 opt.get('auto_shutdown'),
167 |                 opt.get('env'),
168 |                 opt.get('security_groups'))
169 |         ]
170 | 
171 |         for it in instance_templates:
172 |             it.add_env_strings([
173 |                 "CLUSTER_SIZE=%d" % (number_of_slaves+1)
174 |             ])
175 | 
176 |         print "Launching cluster with %d instance(s)...please wait." % (number_of_slaves+1)
177 |         jobtracker = self.service.launch_cluster(instance_templates,
178 |                                                  opt.get('client_cidr'),
179 |                                                  opt.get('config_dir'),
180 |                                                  opt.get('ssh_options'),
181 |                                                  opt.get('cassandra_config_file'),
182 |                                                  opt.get('keyspace_definitions_file'))
183 | 
184 |         if jobtracker is None:
185 |             print "An error occurred started the Hadoop service. Check the logs for more information."
186 |             sys.exit(1)
187 | 
188 |         print "Browse the cluster at http://%s/" % jobtracker.public_dns_name
189 |         self.logger.debug("Startup complete.")
190 | 
191 |     def create_storage(self, argv, options_dict):
192 |         opt, args = self.parse_options(self._command_name, argv, BASIC_OPTIONS,
193 |                                        ["ROLE", "NUM_INSTANCES", "SPEC_FILE"])
194 | 
195 |         opt.update(options_dict)
196 | 
197 |         role = args[0]
198 |         number_of_instances = int(args[1])
199 |         spec_file = args[2]
200 | 
201 |         valid_roles = (self.service.NAMENODE, self.service.DATANODE, self.service.CASSANDRA_NODE)
202 |         if role not in valid_roles:
203 |             raise RuntimeError("Role must be one of %s" % str(valid_roles))
204 | 
205 |         self.service.create_storage(role, 
206 |                                     number_of_instances,
207 |                                     opt.get('availability_zone'),
208 |                                     spec_file)
209 |         self.print_storage()
210 | 
211 |     def proxy(self, argv, options_dict):
212 |         instances = self.service.get_instances()
213 |         if not instances:
214 |             "No running instances. Aborting."
215 |             sys.exit(1)
216 | 
217 |         result = self.service.proxy(ssh_options=options_dict.get('ssh_options'),   
218 |                                     instances=instances)
219 | 
220 |         if result is None:
221 |             print "Unable to create proxy. Check logs for more information."
222 |             sys.exit(1)
223 | 
224 |         print "Proxy created..."
225 |         print """export HADOOP_CLOUD_PROXY_PID=%s;
226 | echo Proxy pid %s;""" % (result, result)
227 | 
228 |     def stop_cassandra(self, argv, options_dict):
229 |         instances = self.service.cluster.get_instances_in_role(self.service.DATANODE, "running")
230 |         if not instances:
231 |             print "No running instances. Aborting."
232 |             sys.exit(1)
233 | 
234 |         print "Stopping Cassandra service on %d instance(s)...please wait." % len(instances)
235 |         self.service.stop_cassandra(options_dict.get('ssh_options'), instances=instances)
236 | 
237 |     def start_cassandra(self, argv, options_dict):
238 |         instances = self.service.cluster.get_instances_in_role(self.service.DATANODE, "running")
239 |         if not instances:
240 |             print "No running instances. Aborting."
241 |             sys.exit(1)
242 | 
243 |         print "Starting Cassandra service on %d instance(s)...please wait." % len(instances)
244 |         self.service.start_cassandra(options_dict.get('ssh_options'), instances=instances)
245 | 
246 |     def print_ring(self, argv, options_dict):
247 |         instances = self.service.cluster.get_instances_in_role(self.service.DATANODE, "running")
248 |         if not instances:
249 |             print "No running instances. Aborting."
250 |             sys.exit(1)
251 | 
252 |         self.service.print_ring(options_dict.get('ssh_options'), instances[0])
253 | 


--------------------------------------------------------------------------------
/plugins/hadoop_cassandra_hybrid/service.plugin:
--------------------------------------------------------------------------------
1 | [Core]
2 | Name = hadoop_cassandra_hybrid
3 | Module = service
4 | 
5 | [Documentation]
6 | Author = Abe Music
7 | Website = http://github.com/digitalreasoning/PyStratus
8 | Description = A hybrid Hadoop/Cassandra service implementation for PyStratus
9 | 


--------------------------------------------------------------------------------
/plugins/hadoop_cassandra_hybrid/service.py:
--------------------------------------------------------------------------------
  1 | from __future__ import with_statement
  2 | 
  3 | import os
  4 | import sys
  5 | import time
  6 | import subprocess
  7 | import urllib
  8 | import tempfile
  9 | import socket
 10 | import re
 11 | 
 12 | from cloud.cluster import TimeoutException
 13 | from cloud.service import InstanceTemplate
 14 | from cloud.plugin import ServicePlugin 
 15 | from cloud.util import xstr
 16 | from cloud.util import url_get
 17 | 
 18 | from yaml import load as parse_yaml
 19 | from yaml import dump as dump_yaml
 20 | 
 21 | try:
 22 |     from cElementTree import parse as parse_xml
 23 |     from cElementTree import tostring as dump_xml
 24 |     from cElementTree import Element
 25 | except:
 26 |     try:
 27 |         from xml.etree.cElementTree import parse as parse_xml
 28 |         from xml.etree.cElementTree import tostring as dump_xml
 29 |         from xml.etree.cElementTree import Element
 30 |     except:
 31 |         print "*"*80
 32 |         print "WARNING: cElementTree module does not exist. Defaulting to elementtree instead."
 33 |         print "It's recommended that you install the cElementTree module for faster XML parsing."
 34 |         print "*"*80
 35 |         from elementtree.ElementTree import parse as parse_xml
 36 |         from elementtree.ElementTree import parse as parse_xml
 37 |         from elementtree.ElementTree import Element
 38 | 
 39 | class HadoopCassandraService(ServicePlugin):
 40 |     """
 41 |     """
 42 |     NAMENODE = "hybrid_nn"
 43 |     SECONDARY_NAMENODE = "hybrid_snn"
 44 |     JOBTRACKER = "hybrid_jt"
 45 |     DATANODE = "hybrid_dn"
 46 |     TASKTRACKER = "hybrid_tt"
 47 |     CASSANDRA_NODE = "hybrid_cn"
 48 |     HADOOP_CASSANDRA_NODE = "hcn"
 49 | 
 50 |     def __init__(self):
 51 |         super(HadoopCassandraService, self).__init__()
 52 | 
 53 |     def get_roles(self):
 54 |         return [self.NAMENODE]
 55 | 
 56 |     def get_instances(self):
 57 |         """
 58 |         Return a list of tuples resembling (role_of_instance, instance)
 59 |         """
 60 |         return self.cluster.get_instances_in_role(self.NAMENODE, "running") + \
 61 |                self.cluster.get_instances_in_role(self.DATANODE, "running")
 62 | 
 63 |     def launch_cluster(self, instance_templates, client_cidr, config_dir,
 64 |                              ssh_options, cassandra_config_file,
 65 |                              cassandra_keyspace_file=None):
 66 | 
 67 |         number_of_tasktrackers = 0
 68 |         roles = []
 69 |         for it in instance_templates:
 70 |           roles.extend(it.roles)
 71 |           if self.TASKTRACKER in it.roles:
 72 |             number_of_tasktrackers += it.number
 73 | 
 74 |         singleton_hosts = []
 75 |         started_instance_ids = [] 
 76 |         expected_instance_count = sum([it.number for it in instance_templates])
 77 | 
 78 |         for instance_template in instance_templates:
 79 |             self.logger.debug("Launching %d instance(s) with role(s) %s..." % (
 80 |                 instance_template.number,
 81 |                 str(instance_template.roles),
 82 |             ))
 83 |             self.logger.debug("Instance(s) will have extra environment variables: %s" % (
 84 |                 singleton_hosts,
 85 |             ))
 86 |             instance_template.add_env_strings(singleton_hosts)
 87 |             instance_ids = self._launch_instances(instance_template)
 88 | 
 89 |             if instance_template.number == 1:
 90 |                 if len(instance_ids) != 1:
 91 |                     logger.error("Expected a single '%s' instance, but found %s.",
 92 |                                  "".join(instance_template.roles), 
 93 |                                  len(instance_ids))
 94 |                     return False
 95 |                 else:
 96 |                     # wait for the instances to start
 97 |                     self.cluster.wait_for_instances(instance_ids)
 98 |                     instance = self.get_instances()[0]
 99 | 
100 |                     for role in instance_template.roles:
101 |                         singleton_host_env = "%s_HOST=%s" % (
102 |                             self._sanitize_role_name(role),
103 |                             instance.public_dns_name,
104 |                         )
105 |                         singleton_hosts.append(singleton_host_env)
106 | 
107 |             started_instance_ids.extend(instance_ids)
108 | 
109 |         if len(started_instance_ids) != expected_instance_count:
110 |             self.logger.warn("Total number of reported instance ids (%d) " \
111 |                              "does not match total requested number (%d)" % \
112 |                              (len(started_instance_ids), instance_template.number))
113 | 
114 |         self.logger.debug("Waiting for %s instance(s) to start: %s" % \
115 |             (len(started_instance_ids), ", ".join(started_instance_ids)))
116 |         time.sleep(1)
117 |         
118 |         try:
119 |             self.cluster.wait_for_instances(started_instance_ids)
120 |         except TimeoutException:
121 |             self.logger.error("Timeout while waiting for %d instances to start." % \
122 |                               len(started_instance_ids))
123 | 
124 |         instances = self.get_instances()
125 | 
126 |         self.logger.debug("Instances started: %s" % (str(instances),))
127 | 
128 |         self._create_client_hadoop_site_file(config_dir)
129 |         self._authorize_client_ports(client_cidr)
130 |         self._attach_storage(roles)
131 |         try:
132 |             self._wait_for_hadoop(number_of_tasktrackers)
133 |         except TimeoutException:
134 |             print "Timeout while waiting for Hadoop to start. Please check logs on" + \
135 |                   " cluster."
136 | 
137 |         # cassandra specific instances and setup
138 |         cassandra_instances = self.cluster.get_instances_in_role(self.DATANODE, "running")
139 |         self._transfer_config_files(ssh_options, 
140 |                                     cassandra_config_file, 
141 |                                     cassandra_keyspace_file, 
142 |                                     instances=cassandra_instances)
143 |         self.start_cassandra(ssh_options, 
144 |                              create_keyspaces=(cassandra_keyspace_file is not None), 
145 |                              instances=cassandra_instances)
146 | 
147 |         return self._get_jobtracker()
148 | 
149 |     def _sanitize_role_name(self, role):
150 |         """
151 |         Replace characters in role name with ones allowed in bash variable names
152 |         """
153 |         return role.replace('+', '_').upper()
154 |     
155 | 
156 |     def _get_namenode(self):
157 |         instances = self.cluster.get_instances_in_role(self.NAMENODE, "running")
158 |         if not instances:
159 |           return None
160 |         return instances[0]
161 | 
162 |     def _get_jobtracker(self):
163 |         instances = self.cluster.get_instances_in_role(self.JOBTRACKER, "running")
164 |         if not instances:
165 |           return None
166 |         return instances[0]
167 |     
168 |     def _create_client_hadoop_site_file(self, config_dir):
169 |         namenode = self._get_namenode()
170 |         jobtracker = self._get_jobtracker()
171 |         cluster_dir = os.path.join(config_dir, ".hadoop", self.cluster.name)
172 |         aws_access_key_id = os.environ['AWS_ACCESS_KEY_ID']
173 |         aws_secret_access_key = os.environ['AWS_SECRET_ACCESS_KEY']
174 | 
175 |         if not os.path.exists(cluster_dir):
176 |           os.makedirs(cluster_dir)
177 | 
178 |         params = {
179 |             'namenode': self._get_namenode().public_dns_name,
180 |             'jobtracker': self._get_jobtracker().public_dns_name,
181 |             'aws_access_key_id': os.environ['AWS_ACCESS_KEY_ID'],
182 |             'aws_secret_access_key': os.environ['AWS_SECRET_ACCESS_KEY']
183 |         }
184 |         self.logger.debug("hadoop-site.xml params: %s" % str(params))
185 | 
186 |         with open(os.path.join(cluster_dir, 'hadoop-site.xml'), 'w') as f:
187 |             f.write("""<?xml version="1.0"?>
188 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
189 | <!-- Put site-specific property overrides in this file. -->
190 | <configuration>
191 |     <property>
192 |         <name>hadoop.job.ugi</name>
193 |         <value>root,root</value>
194 |     </property>
195 |     <property>
196 |         <name>fs.default.name</name>
197 |         <value>hdfs://%(namenode)s:8020/</value>
198 |     </property>
199 |     <property>
200 |         <name>mapred.job.tracker</name>
201 |         <value>%(jobtracker)s:8021</value>
202 |     </property>
203 |     <property>
204 |         <name>hadoop.socks.server</name>
205 |         <value>localhost:6666</value>
206 |     </property>
207 |     <property>
208 |         <name>hadoop.rpc.socket.factory.class.default</name>
209 |         <value>org.apache.hadoop.net.SocksSocketFactory</value>
210 |     </property>
211 |     <property>
212 |         <name>fs.s3.awsAccessKeyId</name>
213 |         <value>%(aws_access_key_id)s</value>
214 |     </property>
215 |     <property>
216 |         <name>fs.s3.awsSecretAccessKey</name>
217 |         <value>%(aws_secret_access_key)s</value>
218 |     </property>
219 |     <property>
220 |         <name>fs.s3n.awsAccessKeyId</name>
221 |         <value>%(aws_access_key_id)s</value>
222 |     </property>
223 |     <property>
224 |         <name>fs.s3n.awsSecretAccessKey</name>
225 |         <value>%(aws_secret_access_key)s</value>
226 |     </property>
227 | </configuration>""" % params)
228 | 
229 |     def _authorize_client_ports(self, client_cidrs=[]):
230 |         if not client_cidrs:
231 |             self.logger.debug("No client CIDRs specified, using local address.")
232 |             client_ip = url_get('http://checkip.amazonaws.com/').strip()
233 |             client_cidrs = ("%s/32" % client_ip,)
234 |         self.logger.debug("Client CIDRs: %s", client_cidrs)
235 | 
236 |         namenode = self._get_namenode()
237 |         jobtracker = self._get_jobtracker()
238 | 
239 |         for client_cidr in client_cidrs:
240 |             # Allow access to port 80 on namenode from client
241 |             self.cluster.authorize_role(self.NAMENODE, 80, 80, client_cidr)
242 | 
243 |             # Allow access to jobtracker UI on master from client
244 |             # (so we can see when the cluster is ready)
245 |             self.cluster.authorize_role(self.JOBTRACKER, 50030, 50030, client_cidr)
246 | 
247 |         # Allow access to namenode and jobtracker via public address from each other
248 |         namenode_ip = socket.gethostbyname(namenode.public_dns_name)
249 |         jobtracker_ip = socket.gethostbyname(jobtracker.public_dns_name)
250 |         self.cluster.authorize_role(self.NAMENODE, 8020, 8020, "%s/32" % namenode_ip)
251 |         self.cluster.authorize_role(self.NAMENODE, 8020, 8020, "%s/32" % jobtracker_ip)
252 |         self.cluster.authorize_role(self.JOBTRACKER, 8021, 8021, "%s/32" % namenode_ip)
253 |         self.cluster.authorize_role(self.JOBTRACKER, 8021, 8021,
254 |                                     "%s/32" % jobtracker_ip)
255 | 
256 |     def _wait_for_hadoop(self, number, timeout=600):
257 |         wait_time = 3
258 |         start_time = time.time()
259 |         jobtracker = self._get_jobtracker()
260 |         if not jobtracker:
261 |             return
262 | 
263 |         self.logger.debug("Waiting for jobtracker to start...")
264 |         previous_running = 0
265 |         while True:
266 |             if (time.time() - start_time >= timeout):
267 |                 raise TimeoutException()
268 |             try:
269 |                 actual_running = self._number_of_tasktrackers(jobtracker.public_dns_name, 1)
270 |                 break
271 |             except IOError:
272 |                 pass
273 |             self.logger.debug("Sleeping for %d seconds..." % wait_time)
274 |             time.sleep(wait_time)
275 |         if number > 0:
276 |             self.logger.debug("Waiting for %d tasktrackers to start" % number)
277 |             while actual_running < number:
278 |                 if (time.time() - start_time >= timeout):
279 |                     raise TimeoutException()
280 |                 try:
281 |                     actual_running = self._number_of_tasktrackers(jobtracker.public_dns_name, 5, 2)
282 |                     self.logger.debug("Sleeping for %d seconds..." % wait_time)
283 |                     time.sleep(wait_time)
284 |                     previous_running = actual_running
285 |                 except IOError:
286 |                     pass
287 |         
288 |     # The optional ?type=active is a difference between Hadoop 0.18 and 0.20
289 |     _NUMBER_OF_TASK_TRACKERS = re.compile(r'<a href="machines.jsp(?:\?type=active)?">(\d+)</a>')
290 |   
291 |     def _number_of_tasktrackers(self, jt_hostname, timeout, retries=0):
292 |         url = "http://%s:50030/jobtracker.jsp" % jt_hostname
293 |         jt_page = url_get(url, timeout, retries)
294 |         m = self._NUMBER_OF_TASK_TRACKERS.search(jt_page)
295 |         if m:
296 |             return int(m.group(1))
297 |         return 0
298 | 
299 |     def proxy(self, ssh_options, instances=None):
300 |         if instances is None:
301 |             return None
302 | 
303 |         namenode = self._get_namenode()
304 |         if namenode is None:
305 |             self.logger.error("No namenode running. Aborting.")
306 |             return None
307 |         
308 |         options = '-o "ConnectTimeout 10" -o "ServerAliveInterval 60" ' \
309 |                   '-N -D 6666'
310 |         process = subprocess.Popen('ssh %s %s %s' % (
311 |                 xstr(ssh_options), 
312 |                 options, 
313 |                 namenode.public_dns_name
314 |             ),
315 |             stdin=subprocess.PIPE, 
316 |             stdout=subprocess.PIPE, 
317 |             stderr=subprocess.PIPE,
318 |             shell=True)
319 |         
320 |         return process.pid 
321 | 
322 |     def _wait_for_cassandra_install(self, instance, ssh_options):
323 |         """
324 |         Simply wait for the cassandra directory to be available so that we can begin configuring
325 |         the service before starting it
326 |         """
327 |         wait_time = 3
328 |         command = "ls /usr/local/apache-cassandra"
329 |         ssh_command = self._get_standard_ssh_command(instance, ssh_options, command)
330 |         self.logger.debug(ssh_command)
331 |         timeout = 600
332 | 
333 |         start_time = time.time()
334 |         while True:
335 |             if (time.time() - start_time >= timeout):
336 |                 raise TimeoutException()
337 |             retcode = subprocess.call(ssh_command, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
338 |             if retcode == 0:
339 |                 break
340 |             self.logger.debug("Sleeping for %d seconds..." % wait_time)
341 |             time.sleep(wait_time)
342 | 
343 |     def _transfer_config_files(self, ssh_options, config_file, keyspace_file=None, 
344 |                                      instances=None):
345 |         """
346 |         """
347 |         if instances is None:
348 |             instances = self.get_instances()
349 | 
350 |         self.logger.debug("Waiting for %d Cassandra instance(s) to install..." % len(instances))
351 |         for instance in instances:
352 |             self._wait_for_cassandra_install(instance, ssh_options)
353 | 
354 |         self.logger.debug("Copying configuration files to %d Cassandra instances..." % len(instances))
355 | 
356 |         seed_ips = [str(instance.private_dns_name) for instance in instances[:2]]
357 |         tokens = self._get_evenly_spaced_tokens_for_n_instances(len(instances))
358 | 
359 |         # for each instance, generate a config file from the original file and upload it to
360 |         # the cluster node
361 |         for i in range(len(instances)):
362 |             local_file, remote_file = self._modify_config_file(instances[i], config_file, seed_ips, str(tokens[i]))
363 | 
364 |             # Upload modified config file
365 |             scp_command = 'scp %s -r %s %s:/usr/local/apache-cassandra/conf/%s' % (xstr(ssh_options),
366 |                                                      local_file, instances[i].public_dns_name, remote_file)
367 |             subprocess.call(scp_command, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
368 | 
369 |             # delete temporary file
370 |             os.unlink(local_file)
371 | 
372 |         if keyspace_file:
373 |             keyspace_data = urllib.urlopen(keyspace_file).read()
374 |             fd, temp_keyspace_file = tempfile.mkstemp(prefix="keyspaces.txt_", text=True)
375 |             os.write(fd, keyspace_data)
376 |             os.close(fd)
377 | 
378 |             self.logger.debug("Copying keyspace definition file to first Cassandra instance...")
379 | 
380 |             # Upload keyspace definitions file
381 |             scp_command = 'scp %s -r %s %s:/usr/local/apache-cassandra/conf/keyspaces.txt' % \
382 |                           (xstr(ssh_options), temp_keyspace_file, instances[0].public_dns_name)
383 |             subprocess.call(scp_command, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
384 | 
385 |             # remove temporary file
386 |             os.unlink(temp_keyspace_file)
387 | 
388 |     def _modify_config_file(self, instance, config_file, seed_ips, token):
389 |         # XML (0.6.x) 
390 |         if config_file.endswith(".xml"):
391 |             remote_file = "storage-conf.xml"
392 | 
393 |             xml = parse_xml(urllib.urlopen(config_file)).getroot()
394 | 
395 |             #  Seeds
396 |             seeds = xml.find("Seeds")
397 |             if seeds is not None:
398 |                 while seeds.getchildren():
399 |                     seeds.remove(seeds.getchildren()[0])
400 |             else:
401 |                 seeds = Element("Seeds")
402 |                 xml.append(seeds)
403 | 
404 |             for seed_ip in seed_ips:
405 |                 seed = Element("Seed")
406 |                 seed.text = seed_ip
407 |                 seeds.append(seed)
408 | 
409 |             # Initial token
410 |             initial_token = xml.find("InitialToken")
411 |             if initial_token is None:
412 |                 initial_token = Element("InitialToken")
413 |                 xml.append(initial_token)
414 |             initial_token.text = token
415 | 
416 |             # Logs
417 |             commit_log_directory = xml.find("CommitLogDirectory")
418 |             if commit_log_directory is None:
419 |                 commit_log_directory = Element("CommitLogDirectory")
420 |                 xml.append(commit_log_directory)
421 |             commit_log_directory.text = "/mnt/cassandra-logs"
422 | 
423 |             # Data 
424 |             data_file_directories = xml.find("DataFileDirectories")
425 |             if data_file_directories is not None:
426 |                 while data_file_directories.getchildren():
427 |                     data_file_directories.remove(data_file_directories.getchildren()[0])
428 |             else:
429 |                 data_file_directories = Element("DataFileDirectories")
430 |                 xml.append(data_file_directories)
431 |             data_file_directory = Element("DataFileDirectory")
432 |             data_file_directory.text = "/mnt/cassandra-data"
433 |             data_file_directories.append(data_file_directory)
434 | 
435 | 
436 |             # listen address
437 |             listen_address = xml.find("ListenAddress")
438 |             if listen_address is None:
439 |                 listen_address = Element("ListenAddress")
440 |                 xml.append(listen_address)
441 |             listen_address.text = ""
442 | 
443 |             # thrift address
444 |             thrift_address = xml.find("ThriftAddress")
445 |             if thrift_address is None:
446 |                 thrift_address = Element("ThriftAddress")
447 |                 xml.append(thrift_address)
448 |             thrift_address.text = ""
449 | 
450 |             fd, temp_file = tempfile.mkstemp(prefix='storage-conf.xml_', text=True)
451 |             os.write(fd, dump_xml(xml))
452 |             os.close(fd)
453 |             
454 |         # YAML (0.7.x)
455 |         elif config_file.endswith(".yaml"):
456 |             remote_file = "cassandra.yaml"
457 | 
458 |             yaml = parse_yaml(urllib.urlopen(config_file))
459 |             yaml['seeds'] = seed_ips
460 |             yaml['initial_token'] = token
461 |             yaml['data_file_directories'] = ['/mnt/cassandra-data']
462 |             yaml['commitlog_directory'] = '/mnt/cassandra-logs'
463 |             yaml['listen_address'] = str(instance.private_dns_name)
464 |             yaml['rpc_address'] = str(instance.public_dns_name)
465 | 
466 |             fd, temp_file = tempfile.mkstemp(prefix='cassandra.yaml_', text=True)
467 |             os.write(fd, dump_yaml(yaml))
468 |             os.close(fd)
469 |         else:
470 |             raise Exception("Configuration file must be one of xml or yaml") 
471 | 
472 |         return temp_file, remote_file
473 |     
474 |     def _get_evenly_spaced_tokens_for_n_instances(self, n):
475 |         return [i*(2**127/n) for i in range(1,n+1)]
476 | 
477 |     def _create_keyspaces_from_definitions_file(self, instance, ssh_options):
478 |         # TODO: Keyspaces could already exist...how do I check this?
479 |         # TODO: Can it be an arbitrary node?
480 | 
481 |         self.logger.debug("Creating keyspaces using Thrift API via keyspaces_definitions_file...")
482 | 
483 |         # test for the keyspace file first
484 |         command = "ls /usr/local/apache-cassandra/conf/keyspaces.txt"
485 |         ssh_command = self._get_standard_ssh_command(instance, ssh_options, command)
486 |         retcode = subprocess.call(ssh_command, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
487 | 
488 |         if retcode != 0:
489 |             self.logger.warn("Unable to find /usr/local/apache-cassandra/conf/keyspaces.txt. Skipping keyspace generation.")
490 |             return
491 |         else:
492 |             self.logger.debug("Found keyspaces.txt...Proceeding with keyspace generation.")
493 | 
494 |         command = "/usr/local/apache-cassandra/bin/cassandra-cli --host %s --batch " \
495 |                   "< /usr/local/apache-cassandra/conf/keyspaces.txt" % instance.private_dns_name
496 |         ssh_command = self._get_standard_ssh_command(instance, ssh_options, command)
497 |         retcode = subprocess.call(ssh_command, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
498 | 
499 |         # TODO: do this or not?
500 |         # remove keyspace file
501 |         #command = "rm -rf /usr/local/apache-cassandra/conf/keyspaces.txt"
502 |         #ssh_command = self._get_standard_ssh_command(instance, ssh_options, command)
503 |         #subprocess.call(ssh_command, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
504 | 
505 |     def print_ring(self, ssh_options, instance=None):
506 |         if instance is None:
507 |           instance = self.get_instances()[0]
508 | 
509 |         print "\nRing configuration..."
510 |         print "NOTE: May not be accurate if the cluster just started."
511 |         command = "/usr/local/apache-cassandra/bin/nodetool -h localhost ring"
512 |         ssh_command = self._get_standard_ssh_command(instance, ssh_options, command)
513 |         subprocess.call(ssh_command, shell=True)
514 | 
515 |     def start_cassandra(self, ssh_options, create_keyspaces=False, instances=None):
516 |         if instances is None:
517 |             instances = self.get_instances()
518 | 
519 |         self.logger.debug("Starting Cassandra service on %d instance(s)..." % len(instances))
520 | 
521 |         for instance in instances:
522 |             # if checks to see if cassandra is already running 
523 |             command = "if [ ! -f /root/cassandra.pid ]; then `nohup /usr/local/apache-cassandra/bin/cassandra -p /root/cassandra.pid &> /root/cassandra.out &`; fi"
524 |             ssh_command = self._get_standard_ssh_command(instance, ssh_options, command)
525 |             retcode = subprocess.call(ssh_command, shell=True)
526 | 
527 |             if retcode != 0:
528 |                 self.logger.warn("Return code for starting Cassandra: %d" % retcode)
529 | 
530 |         # test connection
531 |         self.logger.debug("Testing connection to each Cassandra instance...")
532 | 
533 |         timeout = 600
534 |         temp_instances = instances[:]
535 |         start_time = time.time()
536 |         while len(temp_instances) > 0:
537 |             if (time.time() - start_time >= timeout):
538 |                 raise TimeoutException()
539 |             
540 |             command = "/usr/local/apache-cassandra/bin/nodetool -h %s ring" % temp_instances[-1].private_dns_name
541 |             ssh_command = self._get_standard_ssh_command(temp_instances[-1], ssh_options, command)
542 |             retcode = subprocess.call(ssh_command, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
543 | 
544 |             if retcode == 0:
545 |                 temp_instances.pop()
546 |             else:
547 |                 self.logger.warn("Return code for 'nodetool ring' on '%s': %d" % (temp_instances[-1].id, retcode))
548 | 
549 |         if create_keyspaces:
550 |             self._create_keyspaces_from_definitions_file(instances[0], ssh_options)
551 |         else:
552 |             self.logger.debug("create_keyspaces is False. Skipping keyspace generation.")
553 |         
554 |         # TODO: Do I need to wait for the keyspaces to propagate before printing the ring?
555 |         # print ring after everything started
556 |         self.print_ring(ssh_options, instances[0])
557 | 
558 |         self.logger.debug("Startup complete.")
559 | 
560 |     def stop_cassandra(self, ssh_options, instances=None):
561 |         if instances is None:
562 |             instances = self.get_instances()
563 | 
564 |         for instance in instances:
565 |             command = "kill `cat /root/cassandra.pid`"
566 |             ssh_command = self._get_standard_ssh_command(instance, ssh_options, command)
567 |             retcode = subprocess.call(ssh_command, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
568 | 
569 |     def login(self, instance, ssh_options):
570 |         ssh_command = self._get_standard_ssh_command(instance, ssh_options)
571 |         subprocess.call(ssh_command, shell=True)
572 | 


--------------------------------------------------------------------------------
/plugins/simple/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'jbunting'
2 |   


--------------------------------------------------------------------------------
/plugins/simple/cli.plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = simple
 3 | Module = cli
 4 | 
 5 | [Documentation]
 6 | Author = Abe Music
 7 | Website = http://github.com/digitalreasoning/PyStratus
 8 | Description = A simple CLI implementation for PyStratus
 9 | 
10 | 


--------------------------------------------------------------------------------
/plugins/simple/cli.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import logging
  4 | import urllib
  5 | 
  6 | from cloud.plugin import CLIPlugin
  7 | from cloud.plugin import BASIC_OPTIONS
  8 | from cloud.service import InstanceTemplate
  9 | from optparse import make_option
 10 | from prettytable import PrettyTable
 11 | from pprint import pprint
 12 | 
 13 | # Add options here to override what's in the clusters.cfg file
 14 | # TODO
 15 | 
 16 | class SimpleServiceCLI(CLIPlugin):
 17 |     USAGE = """Simple service usage: CLUSTER COMMAND [OPTIONS]
 18 | where COMMAND and [OPTIONS] may be one of:
 19 |             
 20 |                                APPLICATION COMMANDS
 21 |   ----------------------------------------------------------------------------------
 22 |   launch-load-balancer                launch a load balancer for CLUSTER
 23 |   launch-nodes NUM_NODES              launch NUM_NODES nodes in CLUSTER
 24 |   start-nodes                         start the nodes
 25 |   stop-nodes                          stop the nodes
 26 |   start-load-balancer                 start the load balancer
 27 |   stop-load-balancer                  stop the load balancer
 28 | 
 29 |                                CLUSTER COMMANDS
 30 |   ----------------------------------------------------------------------------------
 31 |   details                             list instances in CLUSTER
 32 |   launch-cluster NUM_NODES            launch NUM_NODES Cassandra nodes
 33 |   expand-cluster NUM_NODES            adds new nodes
 34 |   terminate-cluster                   terminate all instances in CLUSTER
 35 |   login                               log in to the master in CLUSTER over SSH
 36 | 
 37 |                                STORAGE COMMANDS
 38 |   ----------------------------------------------------------------------------------
 39 |   list-storage                        list storage volumes for CLUSTER
 40 |   create-storage NUM_INSTANCES        create volumes for NUM_INSTANCES instances
 41 |     SPEC_FILE                           for CLUSTER, using SPEC_FILE
 42 |   delete-storage                      delete all storage volumes for CLUSTER
 43 | """
 44 | #  transfer FILE DESTINATION           transfer a file to all nodes
 45 | #  execute COMMAND                     execute a command on all nodes
 46 | 
 47 |     def __init__(self):
 48 |         super(SimpleServiceCLI, self).__init__()
 49 | 
 50 |         #self._logger = logging.getLogger("CassandraServiceCLI")
 51 |  
 52 |     def execute_command(self, argv, options_dict):
 53 |         if len(argv) < 2:
 54 |             self.print_help()
 55 | 
 56 |         self._cluster_name = argv[0]
 57 |         self._command_name = argv[1]
 58 | 
 59 |         # strip off the cluster name and command from argv
 60 |         argv = argv[2:]
 61 | 
 62 |         # get spot configuration
 63 |         self._spot_config = {
 64 |                 "spot_cluster": True if os.environ.get("SPOT_CLUSTER", options_dict.get("spot_cluster", "false")).lower() == "true" else False,
 65 |                 "max_price": options_dict.get("max_price", None),
 66 |                 "launch_group": options_dict.get("launch_group", None),
 67 |          }
 68 | 
 69 |         # handle all known commands and error on an unknown command
 70 |         if self._command_name == "details":
 71 |             self.print_instances()
 72 | 
 73 |         elif self._command_name == "simple-details":
 74 |             self.simple_print_instances(argv, options_dict)
 75 | 
 76 |         elif self._command_name == "terminate-cluster":
 77 |             self.terminate_cluster(argv, options_dict)
 78 | 
 79 |         elif self._command_name == "launch-cluster":
 80 |             self.launch_cluster(argv, options_dict)
 81 | 
 82 |         elif self._command_name == "expand-cluster":
 83 |             self.expand_cluster(argv, options_dict)
 84 | 
 85 |         elif self._command_name == "login":
 86 |             self.login(argv, options_dict)
 87 | 
 88 |         elif self._command_name == "run-command":
 89 |             self.run_command(argv, options_dict)
 90 | 
 91 |         elif self._command_name == "transfer-files":
 92 |             self.transfer_files(argv, options_dict)
 93 | 
 94 |         elif self._command_name == "create-storage":
 95 |             self.create_storage(argv, options_dict)
 96 | 
 97 |         elif self._command_name == "delete-storage":
 98 |             self.delete_storage(argv, options_dict)
 99 | 
100 |         elif self._command_name == "list-storage":
101 |             self.print_storage()
102 | 
103 |         else:
104 |             self.print_help()
105 | 
106 |     def expand_cluster(self, argv, options_dict):
107 |         expected_arguments = ["NUM_INSTANCES"]
108 |         opt, args = self.parse_options(self._command_name,
109 |                                        argv,
110 |                                        expected_arguments=expected_arguments,
111 |                                        unbounded_args=True)
112 |         opt.update(options_dict)
113 | 
114 |         number_of_nodes = int(args[0])
115 |         instance_template = InstanceTemplate(
116 |             (self.service.SIMPLE_NODE,),
117 |             number_of_nodes,
118 |             opt.get('image_id'),
119 |             opt.get('instance_type'),
120 |             opt.get('key_name'),
121 |             opt.get('public_key'),
122 |             opt.get('user_data_file'),
123 |             opt.get('availability_zone'),
124 |             opt.get('user_packages'),
125 |             opt.get('auto_shutdown'),
126 |             opt.get('env'),
127 |             opt.get('security_groups'),
128 |             self._spot_config
129 |         )
130 | 
131 | #        instance_template.add_env_strings(["CLUSTER_SIZE=%d" % number_of_nodes])
132 | 
133 |         print "Expanding cluster by %d instance(s)...please wait." % number_of_nodes
134 | 
135 |         self.service.expand_cluster(instance_template,
136 |                                     opt.get('ssh_options'),opt.get('wait_dir', '/'))
137 | 
138 |     def launch_cluster(self, argv, options_dict):
139 |         """
140 |         """
141 |         expected_arguments = ["NUM_INSTANCES"]
142 |         opt, args = self.parse_options(self._command_name, 
143 |                                       argv,
144 |                                       expected_arguments=expected_arguments)
145 |         opt.update(options_dict)
146 | 
147 |         number_of_nodes = int(args[0])
148 |         instance_template = InstanceTemplate(
149 |             (self.service.SIMPLE_NODE,),
150 |             number_of_nodes,
151 |             opt.get('image_id'),
152 |             opt.get('instance_type'),
153 |             opt.get('key_name'),
154 |             opt.get('public_key'), 
155 |             opt.get('user_data_file'),
156 |             opt.get('availability_zone'), 
157 |             opt.get('user_packages'),
158 |             opt.get('auto_shutdown'), 
159 |             opt.get('env'),
160 |             opt.get('security_groups'),
161 |             self._spot_config
162 |         )
163 | 
164 |         instance_template.add_env_strings(["CLUSTER_SIZE=%d" % number_of_nodes])
165 | 
166 |         print "Launching cluster with %d instance(s)...please wait." % number_of_nodes
167 | 
168 |         self.service.launch_cluster(instance_template,
169 |                                     opt.get('ssh_options'),opt.get('wait_dir', '/'))
170 | 
171 |     def create_storage(self, argv, options_dict):
172 |         opt, args = self.parse_options(self._command_name, argv, BASIC_OPTIONS,
173 |                                        ["NUM_INSTANCES", "SPEC_FILE"])
174 |         opt.update(options_dict)
175 | 
176 |         role = self.service.SIMPLE_NODE
177 |         number_of_instances = int(args[0])
178 |         spec_file = args[1]
179 | 
180 |         # FIXME
181 |         # check_options_set(opt, ['availability_zone'])
182 | 
183 |         self.service.create_storage(role, 
184 |                                     number_of_instances,
185 |                                     opt.get('availability_zone'),
186 |                                     spec_file)
187 |         self.print_storage()
188 | 


--------------------------------------------------------------------------------
/plugins/simple/service.plugin:
--------------------------------------------------------------------------------
1 | [Core]
2 | Name = simple
3 | Module = service
4 | 
5 | [Documentation]
6 | Author = Abe Music
7 | Website = http://github.com/digitalreasoning/PyStratus
8 | Description = A simple service implementation for PyStratus
9 | 


--------------------------------------------------------------------------------
/plugins/simple/service.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import time
  4 | import subprocess
  5 | import urllib
  6 | import tempfile
  7 | 
  8 | from cloud.cluster import TimeoutException
  9 | from cloud.service import InstanceTemplate
 10 | from cloud.plugin import ServicePlugin 
 11 | from cloud.util import xstr
 12 | 
 13 | from yaml import load as parse_yaml
 14 | from yaml import dump as dump_yaml
 15 | 
 16 | try:
 17 |     from cElementTree import parse as parse_xml
 18 |     from cElementTree import tostring as dump_xml
 19 |     from cElementTree import Element
 20 | except:
 21 |     try:
 22 |         from xml.etree.cElementTree import parse as parse_xml
 23 |         from xml.etree.cElementTree import tostring as dump_xml
 24 |         from xml.etree.cElementTree import Element
 25 |     except:
 26 |         print "*"*80
 27 |         print "WARNING: cElementTree module does not exist. Defaulting to elementtree instead."
 28 |         print "It's recommended that you install the cElementTree module for faster XML parsing."
 29 |         print "*"*80
 30 |         from elementtree.ElementTree import parse as parse_xml
 31 |         from elementtree.ElementTree import parse as parse_xml
 32 |         from elementtree.ElementTree import Element
 33 | 
 34 | class SimpleService(ServicePlugin):
 35 |     """
 36 |     """
 37 |     SIMPLE_NODE = "sn"
 38 | 
 39 |     def __init__(self):
 40 |         super(SimpleService, self).__init__()
 41 | 
 42 |     def get_roles(self):
 43 |         return [self.SIMPLE_NODE]
 44 | 
 45 |     def get_instances(self):
 46 |         return self.cluster.get_instances_in_role(self.SIMPLE_NODE, "running")
 47 | 
 48 |     def _wait_for_install(self, instance, ssh_options, wait_dir):
 49 |         """
 50 |         Simply wait for the 'wait' directory to be available so that we can begin configuring
 51 |         the service before starting it
 52 |         """
 53 |         wait_time = 3
 54 |         errcount = 0
 55 |         command = "ls %s" % wait_dir
 56 |         ssh_command = self._get_standard_ssh_command(instance, ssh_options, command)
 57 | 
 58 |         self.logger.info("Waiting for install with command %s" % ssh_command)
 59 |         while True:
 60 |             if errcount >= 10:
 61 |                 raise TimeoutException("Maximum errors exceeded.")
 62 |             try:
 63 |                 subprocess.check_output(ssh_command, shell=True, stderr=subprocess.STDOUT)
 64 |                 break
 65 |             except subprocess.CalledProcessError, e:
 66 |                 error = e.output.strip()
 67 |                 retcode = e.returncode
 68 |                 if retcode != 255:
 69 |                     print error
 70 |                     print "Return code: %d" % retcode
 71 |                 elif retcode == 255 and "connection refused" in error.lower():
 72 |                     print "Connection refused error. Typically means SSH services have not been started yet. Retrying."
 73 |                     errcount += 1
 74 |                 else:
 75 |                     print "SSH error. Cause: %s" % e.output.strip()
 76 |                     print "Return code: %d" % retcode
 77 |                     raise
 78 | 
 79 |             self.logger.debug("Sleeping for %d seconds..." % wait_time)
 80 |             time.sleep(wait_time)
 81 | 
 82 |     def expand_cluster(self, instance_template, ssh_options, wait_dir):
 83 |         instances = self.get_instances()
 84 | 
 85 |         instance_ids = self._launch_instances(instance_template)
 86 | 
 87 |         if len(instance_ids) != instance_template.number:
 88 |             self.logger.warn("Number of reported instance ids (%d) " \
 89 |                              "does not match requested number (%d)" % \
 90 |                              (len(instance_ids), instance_template.number))
 91 |         self.logger.debug("Waiting for %s instance(s) to start: %s" % \
 92 |             (instance_template.number, ", ".join(instance_ids)))
 93 |         time.sleep(1)
 94 | 
 95 |         try:
 96 |             self.cluster.wait_for_instances(instance_ids)
 97 |             self.logger.debug("%d instances started" % (instance_template.number,))
 98 |         except TimeoutException:
 99 |             self.logger.error("Timeout while waiting for %s instance to start." % \
100 |                 ",".join(instance_template.roles))
101 | 
102 |         instances = self.get_instances()
103 |         self.logger.debug("We have %d current instances...", len(instances))
104 |         new_instances = [instance for instance in instances if instance.id in instance_ids]
105 |         if(len(new_instances) != len(instance_ids)) :
106 |             raise Exception("Could only find %d new instances, expected %s" % (len(new_instances), str(instance_ids)))
107 | 
108 |         for instance in instances:
109 |             self._wait_for_install(instance, ssh_options, wait_dir)
110 |         self.logger.info("Instances started: %s" % (str(new_instances),))
111 | 
112 |         self._attach_storage(instance_template.roles)
113 | 
114 | 
115 |     def launch_cluster(self, instance_template, ssh_options, wait_dir):
116 |         """
117 |         """
118 |         if self.get_instances() :
119 |             raise Exception("This cluster is already running.  It must be terminated prior to being launched again.")
120 | 
121 |         self.expand_cluster(instance_template, ssh_options, wait_dir)
122 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from setuptools import setup, find_packages
17 | 
18 | version = __import__('cloud').VERSION
19 | 
20 | setup(name='stratus',
21 |     version=version,
22 |     description='Scripts for running various services on cloud providers',
23 |     license = 'Apache License (2.0)',
24 |     author = 'Abe Music - Digital Reasoning Systems, Inc.',
25 |     author_email = 'abe.music@digitalreasoning.com',
26 |     packages=['cloud', 'cloud.providers', 'cloud.plugins', 'cloud.plugins.cassandra', 'cloud.plugins.hadoop', 'cloud.plugins.hadoop_cassandra_hybrid', 'cloud.plugins.simple'],
27 |     package_dir = {'cloud.plugins': 'plugins'},
28 |     scripts=['stratus'],
29 |     include_package_data=True,
30 |     package_data = {'': ['*.plugin']},
31 |     install_requires = ['boto==2.0','python-dateutil==1.5','simplejson','prettytable==0.5','yapsy==1.8','fabric','PyYAML'],
32 | )
33 | 


--------------------------------------------------------------------------------