├── .github
    ├── ISSUE_TEMPLATE.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   ├── flintrock.yaml
    │   └── infra.yaml
├── .gitignore
├── CHANGES.md
├── CONTRIBUTING.md
├── COPYRIGHT
├── LICENSE
├── MANIFEST.in
├── README.md
├── flintrock-logo.png
├── flintrock
    ├── __init__.py
    ├── __main__.py
    ├── config.yaml.template
    ├── core.py
    ├── ec2.py
    ├── exceptions.py
    ├── flintrock.py
    ├── scripts
    │   ├── adoptium.repo
    │   ├── download-package.py
    │   └── setup-ephemeral-storage.py
    ├── services.py
    ├── ssh.py
    ├── templates
    │   ├── hadoop
    │   │   └── conf
    │   │   │   ├── core-site.xml
    │   │   │   ├── hadoop-env.sh
    │   │   │   ├── hdfs-site.xml
    │   │   │   ├── masters
    │   │   │   └── slaves
    │   └── spark
    │   │   └── conf
    │   │       ├── slaves
    │   │       └── spark-env.sh
    └── util.py
├── generate-standalone-package.py
├── hook-flintrock.py
├── make-release.sh
├── pyproject.toml
├── requirements
    ├── developer.in
    ├── developer.pip
    ├── maintainer.in
    ├── maintainer.pip
    ├── user.in
    └── user.pip
├── setup.cfg
├── setup.py
├── standalone.py
├── test-infra
    ├── .gitignore
    ├── README.md
    ├── bastion.tf
    ├── delete-test-infra.sh
    ├── network.tf
    ├── provider.tf
    └── variables.tf
└── tests
    ├── README.md
    ├── conftest.py
    ├── test_acceptance.py
    ├── test_core.py
    ├── test_ec2.py
    ├── test_flintrock.py
    ├── test_pyinstaller_packaging.py
    ├── test_scripts.py
    ├── test_static.py
    └── test_util.py


/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | <!--
2 |     If you are reporting a potential bug, include as many of the following
3 |     as are relevant (and delete this sentence, of course):
4 | -->
5 | * Flintrock version: 
6 | * Python version: 
7 | * OS: 
8 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | This PR makes the following changes:
2 | * 
3 | * 
4 | 
5 | I tested this PR by...
6 | 
7 | Fixes #NN.
8 | Fixes #MM.
9 | 


--------------------------------------------------------------------------------
/.github/workflows/flintrock.yaml:
--------------------------------------------------------------------------------
 1 | name: flintrock
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |     - master
 7 |   pull_request:
 8 |     branches:
 9 |     - master
10 | 
11 | jobs:
12 |   build:
13 |     runs-on: ${{ matrix.os }}
14 |     strategy:
15 |       matrix:
16 |         os:
17 |           - ubuntu-20.04
18 |           - macos-14
19 |         python-version:
20 |           # Update the artifact upload steps below if modifying
21 |           # this list of Python versions.
22 |           - "3.9"
23 |           - "3.10"
24 |           - "3.11"
25 |           - "3.12"
26 |           - "3.13"
27 |     name: ${{ matrix.os }} / Python ${{ matrix.python-version }}
28 |     steps:
29 |       - uses: actions/checkout@v3
30 |       - name: Set up Python
31 |         uses: actions/setup-python@v3
32 |         with:
33 |           python-version: ${{ matrix.python-version }}
34 |       - run: "pip install -r requirements/maintainer.pip"
35 |       - run: "pytest"
36 |       - run: python -m build
37 |       - uses: actions/upload-artifact@v3
38 |         # Use the latest supported Python to build a standalone package.
39 |         if: ${{ matrix.python-version == '3.13' }}
40 |         with:
41 |           name: Flintrock Standalone - ${{ matrix.os }}
42 |           path: dist/Flintrock-*-standalone-*.zip
43 |       - uses: actions/upload-artifact@v3
44 |         # Use the oldest supported Python to build a wheel.
45 |         if: ${{ matrix.os == 'ubuntu-20.04' && matrix.python-version == '3.9' }}
46 |         with:
47 |           name: Flintrock Wheel
48 |           path: dist/Flintrock-*.whl
49 | 


--------------------------------------------------------------------------------
/.github/workflows/infra.yaml:
--------------------------------------------------------------------------------
 1 | name: test-infra
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |     - master
 7 |   pull_request:
 8 |     branches:
 9 |     - master
10 | 
11 | jobs:
12 |   terraform-lint:
13 |     runs-on: ubuntu-20.04
14 |     steps:
15 |     - uses: actions/checkout@v2
16 |     - name: Check Terraform Formatting
17 |       run: |
18 |         cd test-infra
19 |         terraform fmt -check -diff
20 |     - name: Validate Terraform Templates
21 |       run: |
22 |         cd test-infra
23 |         terraform init
24 |         terraform validate
25 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Started from: https://github.com/github/gitignore/blob/master/Python.gitignore
 2 | 
 3 | .vscode/
 4 | .metals/
 5 | config.yaml
 6 | venv/
 7 | *example.py
 8 | flintrock-logo/
 9 | .hypothesis/
10 | *.prf
11 | .DS_Store
12 | .pytest_cache/
13 | /resources/
14 | .python-version
15 | 
16 | # Byte-compiled / optimized / DLL files
17 | __pycache__/
18 | *.py[cod]
19 | 
20 | # C extensions
21 | *.so
22 | 
23 | # Distribution / packaging
24 | .Python
25 | env/
26 | build/
27 | develop-eggs/
28 | dist/
29 | downloads/
30 | eggs/
31 | .eggs/
32 | lib/
33 | lib64/
34 | parts/
35 | sdist/
36 | var/
37 | *.egg-info/
38 | .installed.cfg
39 | *.egg
40 | 
41 | # PyInstaller
42 | #  Usually these files are written by a python script from a template
43 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
44 | *.manifest
45 | *.spec
46 | 
47 | # Installer logs
48 | pip-log.txt
49 | pip-delete-this-directory.txt
50 | 
51 | # Unit test / coverage reports
52 | htmlcov/
53 | .tox/
54 | .coverage
55 | .coverage.*
56 | .cache
57 | nosetests.xml
58 | coverage.xml
59 | 
60 | # Translations
61 | *.mo
62 | *.pot
63 | 
64 | # Django stuff:
65 | *.log
66 | 
67 | # Sphinx documentation
68 | docs/_build/
69 | 
70 | # PyBuilder
71 | target/
72 | 


--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
  1 | # Change Log
  2 | 
  3 | ## [Unreleased]
  4 | 
  5 | [Unreleased]: https://github.com/nchammas/flintrock/compare/v2.1.0...master
  6 | 
  7 | ### Changed
  8 | 
  9 | * [#383]: Dropped support for Python 3.8 and added CI build for Python 3.13.
 10 | 
 11 | [#383]: https://github.com/nchammas/flintrock/pull/383
 12 | 
 13 | ## [2.1.0] - 2023-11-26
 14 | 
 15 | [2.1.0]: https://github.com/nchammas/flintrock/compare/v2.0.0...2.1.0
 16 | 
 17 | ### Changed
 18 | 
 19 | * [#348], [#367]: Bumped default Spark to 3.5.0 and default Hadoop to 3.3.6; dropped support for Python 3.6 and 3.7; added CI builds for Python 3.10, 3.11, and 3.12.
 20 | * [#361]: Migrated from AdoptOpenJDK, which is deprecated, to Adoptium OpenJDK.
 21 | * [#362], [#366]: Improved Flintrock's ability to cleanup after launch failures.
 22 | * [#366]: Deprecated `--ec2-spot-request-duration`, which is not needed for one-time spot instances launched using the RunInstances API.
 23 | * [#369]: Adopted `pyproject.toml` and tweaked Flintrock's Python packaging accordingly. This keeps Flintrock in line with modern Python packaging standards and should be transparent to end-users.
 24 | 
 25 | [#348]: https://github.com/nchammas/flintrock/pull/348
 26 | [#361]: https://github.com/nchammas/flintrock/pull/361
 27 | [#362]: https://github.com/nchammas/flintrock/pull/362
 28 | [#366]: https://github.com/nchammas/flintrock/pull/366
 29 | [#367]: https://github.com/nchammas/flintrock/pull/367
 30 | [#369]: https://github.com/nchammas/flintrock/pull/369
 31 | 
 32 | ## [2.0.0] - 2021-06-10
 33 | 
 34 | [2.0.0]: https://github.com/nchammas/flintrock/compare/v1.0.0...v2.0.0
 35 | 
 36 | ### Added
 37 | 
 38 | * [#296]: Added support for launching clusters into private VPCs. This includes new infrastructure added in [#302] to support testing against private VPCs.
 39 | * [#307]: Added support for Hadoop/HDFS 3.x.
 40 | * [#315]: Added a new `--ec2-spot-request-duration` option to support setting the EC2 spot request duration.
 41 | * [#316]: Added a new `--java-version` option and support for Java 11.
 42 | * [#323]: Flintrock now automatically selects the correct build of Spark to use, based on the version of Hadoop/HDFS that you specify.
 43 | * [#324]: Flintrock now supports S3 URLs as a download source for Hadoop or Spark. This makes it easy to host your own copies of the Hadoop and Spark release builds in a private bucket.
 44 | 
 45 | [#296]: https://github.com/nchammas/flintrock/pull/296
 46 | [#302]: https://github.com/nchammas/flintrock/pull/302
 47 | [#307]: https://github.com/nchammas/flintrock/pull/307
 48 | [#315]: https://github.com/nchammas/flintrock/pull/315
 49 | [#316]: https://github.com/nchammas/flintrock/pull/316
 50 | [#323]: https://github.com/nchammas/flintrock/pull/323
 51 | [#324]: https://github.com/nchammas/flintrock/pull/324
 52 | 
 53 | ### Changed
 54 | 
 55 | * [#285]: Flintrock now configures cluster nodes to use private IP addresses for internal communication. This should improve the reliability of cluster launches and restarts.
 56 | * [#304]: Fixed a bug in how `UserData` scripts are submitted to new cluster slaves.
 57 | * [#311]: Changed how Flintrock manages its own security groups to reduce the likelihood of hitting any limits on the number of rules per security group.
 58 | * [#326]: Switched some internals from using host names to IP addresses, which should improve Flintrock's behavior when running from an EC2 host.
 59 | * [#329]: Dropped support for Python 3.5 and added automated testing for Python 3.8 and 3.9.
 60 | * [#334]: Flintrock now ensures that `python3` is available on launched clusters and sets that as the default Python that PySpark will use.
 61 | 
 62 | [#285]: https://github.com/nchammas/flintrock/pull/285
 63 | [#304]: https://github.com/nchammas/flintrock/pull/304
 64 | [#311]: https://github.com/nchammas/flintrock/pull/311
 65 | [#326]: https://github.com/nchammas/flintrock/pull/326
 66 | [#329]: https://github.com/nchammas/flintrock/pull/329
 67 | [#334]: https://github.com/nchammas/flintrock/pull/334
 68 | 
 69 | ## [1.0.0] - 2020-01-11
 70 | 
 71 | [1.0.0]: https://github.com/nchammas/flintrock/compare/v0.11.0...v1.0.0
 72 | 
 73 | ### Changed
 74 | 
 75 | * [#297]: Dropped support for Python 3.4.
 76 | * [#252]: Flintrock now pins all its transitive dependencies via the files under `requirements/`. This is useful for users who want to build Flintrock themselves.
 77 | 
 78 | [#297]: https://github.com/nchammas/flintrock/pull/297
 79 | [#252]: https://github.com/nchammas/flintrock/pull/252
 80 | 
 81 | ## [0.11.0] - 2018-12-02
 82 | 
 83 | [0.11.0]: https://github.com/nchammas/flintrock/compare/v0.10.0...v0.11.0
 84 | 
 85 | ### Changed
 86 | 
 87 | * [#258], [#268]: Fixed up support for Python 3.7.
 88 | * [#264]: Fixed a logging error in `flintrock describe --master-hostname-only`.
 89 | * [#277]: Fixed a bug in resolving client IP addresses from behind proxy.
 90 | 
 91 | [#258]: https://github.com/nchammas/flintrock/pull/258
 92 | [#264]: https://github.com/nchammas/flintrock/pull/264
 93 | [#268]: https://github.com/nchammas/flintrock/pull/268
 94 | [#277]: https://github.com/nchammas/flintrock/pull/277
 95 | 
 96 | ## [0.10.0] - 2018-07-15
 97 | 
 98 | [0.10.0]: https://github.com/nchammas/flintrock/compare/v0.9.0...v0.10.0
 99 | 
100 | ### Added
101 | 
102 | * [#242]: Flintrock is now available on Homebrew:
103 |   ```
104 |   brew install flintrock
105 |   ```
106 |   This is a community-supported distribution.
107 | 
108 | [#242]: https://github.com/nchammas/flintrock/pull/242
109 | 
110 | ### Changed
111 | 
112 | * [#224]: Fixed a problem with some Flintrock config combinations
113 |   related to Hadoop.
114 | * [#232]: When you destroy a cluster, Flintrock now waits until the
115 |   instances are completely terminated before returning.
116 | * [#234]: Flintrock now tries more times by default to connect via
117 |   SSH, which should provide more launch stability in certain
118 |   environments.
119 | * [#246]: Fixed some bugs with `flintrock describe` that are exposed
120 |   when a cluster is transitioning states (e.g. from running to
121 |   terminated).
122 | * [#249]: **Flintrock now downloads both Spark and Hadoop from Apache
123 |   mirrors by default.** This is a significant change. You can read the
124 |   background on what prompted this change in [#238].
125 | * [#254]: Flintrock no longer configures hadoop-aws automatically due
126 |   to version incompatibilities that are difficult to resolve
127 |   automatically. Instead, the README now provides additional guidance
128 |   on using `s3a://`.
129 | * [#259]: Flintrock now correctly ignores tiny devices that show up
130 |   on some instance types, like the M5 series on EC2. This fixes the
131 |   problems Flintrock had getting HDFS to work on those instance
132 |   types.
133 | 
134 | [#224]: https://github.com/nchammas/flintrock/pull/224
135 | [#232]: https://github.com/nchammas/flintrock/pull/232
136 | [#234]: https://github.com/nchammas/flintrock/pull/234
137 | [#238]: https://github.com/nchammas/flintrock/pull/238
138 | [#246]: https://github.com/nchammas/flintrock/pull/246
139 | [#249]: https://github.com/nchammas/flintrock/pull/249
140 | [#254]: https://github.com/nchammas/flintrock/pull/254
141 | [#259]: https://github.com/nchammas/flintrock/pull/259
142 | 
143 | ## [0.9.0] - 2017-08-06
144 | 
145 | [0.9.0]: https://github.com/nchammas/flintrock/compare/v0.8.0...v0.9.0
146 | 
147 | ### Added
148 | 
149 | * [#178]: You can now see additional output during launch and other
150 |   operations with the new `--debug` option.
151 | * [#185]: Added a new mount point under `/media/tmp` that can be used
152 |   when `/tmp` is not big enough.
153 | * [#186]: You can now tag your clusters with arbitrary tags on launch
154 |   using the new `--ec2-tag` option. (Remember: As with all options,
155 |   you can also set this via `flintrock configure`.)
156 | * [#191]: You can now specify the size of the root EBS volume with the
157 |   new `--ec2-min-root-ebs-size-gb` option.
158 | * [#181]: You can now set the number of executors per worker with
159 |   `--spark-executor-instances`.
160 | 
161 | [#178]: https://github.com/nchammas/flintrock/pull/178
162 | [#185]: https://github.com/nchammas/flintrock/pull/185
163 | [#186]: https://github.com/nchammas/flintrock/pull/186
164 | [#191]: https://github.com/nchammas/flintrock/pull/191
165 | [#181]: https://github.com/nchammas/flintrock/pull/181
166 | 
167 | ### Changed
168 | 
169 | * [#195]: After launching a new cluster, Flintrock now shows the
170 |   master address and login command.
171 | * [#196], [#197]: Fixed some bugs that were preventing Flintrock from
172 |   launching Spark clusters at a specific commit.
173 | * [#204]: Flintrock now automatically retries starting the Spark and
174 |   HDFS masters if it encounters common issues with bringing the
175 |   cluster up. This greatly improves launch and restart reliability.
176 | * [#208]: Flintrock now provides a hint with possible causes for
177 |   certain SSH errors.
178 | 
179 | [#195]: https://github.com/nchammas/flintrock/pull/195
180 | [#196]: https://github.com/nchammas/flintrock/pull/196
181 | [#197]: https://github.com/nchammas/flintrock/pull/197
182 | [#204]: https://github.com/nchammas/flintrock/pull/204
183 | [#208]: https://github.com/nchammas/flintrock/pull/208
184 | 
185 | ## [0.8.0] - 2017-02-11
186 | 
187 | [0.8.0]: https://github.com/nchammas/flintrock/compare/v0.7.0...v0.8.0
188 | 
189 | ### Added
190 | 
191 | * [#180]: Accessing data on S3 from your Flintrock cluster is now much
192 |   easier! Just configure Flintrock to use Hadoop 2.7+ (which is the
193 |   default) and an appropriate IAM role, and you'll be able to access
194 |   paths on S3 using the new `s3a://` prefix. [Check the README] for
195 |   more information.
196 | * [#176], [#187]: Flintrock now supports users with non-standard home
197 |   directories.
198 | 
199 | [#180]: https://github.com/nchammas/flintrock/pull/180
200 | [#176]: https://github.com/nchammas/flintrock/pull/176
201 | [#187]: https://github.com/nchammas/flintrock/pull/187
202 | [Check the README]: https://github.com/nchammas/flintrock/tree/v0.8.0#accessing-data-on-s3
203 | 
204 | ### Changed
205 | 
206 | * [#168]: Flintrock now does a better job of cleaning up after
207 |   interrupted operations.
208 | * [#179], [#184]: Flintrock can now clean up malformed Flintrock
209 |   clusters.
210 | * [`6b426ae`]: We fixed an issue affecting some users of Flintrock's
211 |   standalone package that caused Flintrock to intermittently throw
212 |   `ImportError`s.
213 | 
214 | [#168]: https://github.com/nchammas/flintrock/pull/168
215 | [#179]: https://github.com/nchammas/flintrock/pull/179
216 | [#184]: https://github.com/nchammas/flintrock/pull/184
217 | [`6b426ae`]: https://github.com/nchammas/flintrock/commit/6b426aedc7e92b434021cc09c6e7eb181fca7eef
218 | 
219 | ## [0.7.0] - 2016-11-15
220 | 
221 | [0.7.0]: https://github.com/nchammas/flintrock/compare/v0.6.0...v0.7.0
222 | 
223 | ### Added
224 | 
225 | * [#146]: Flintrock now ensures that launched clusters have Java 8 or
226 |   higher installed.
227 | * [#149]: You can now specify an [EC2 user data] script to use on launch
228 |   with the new `--ec2-user-data` option.
229 | 
230 | [#146]: https://github.com/nchammas/flintrock/pull/146
231 | [#149]: https://github.com/nchammas/flintrock/pull/149
232 | [EC2 user data]: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html
233 | 
234 | ### Changed
235 | 
236 | * [#154], [#155], [#156]: Flintrock now provides friendly error messages
237 |   when it encounters common configuration or setup problems.
238 | 
239 | [#154]: https://github.com/nchammas/flintrock/pull/154
240 | [#155]: https://github.com/nchammas/flintrock/pull/155
241 | [#156]: https://github.com/nchammas/flintrock/pull/156
242 | 
243 | ## [0.6.0] - 2016-08-28
244 | 
245 | [0.6.0]: https://github.com/nchammas/flintrock/compare/v0.5.0...v0.6.0
246 | 
247 | ### Added
248 | 
249 | * [#115]: Flintrock can now resize existing clusters with the new
250 |   `add-slaves` and `remove-slaves` commands.
251 | 
252 | [#115]: https://github.com/nchammas/flintrock/pull/115
253 | 
254 | ### Changed
255 | 
256 | * [#115]: If you lost your master somehow, Flintrock can now still
257 |   destroy the cluster.
258 | * [#115]: You can no longer launch clusters with 0 slaves. The
259 |   implementation was broken. We may fix and add this capability back
260 |   in the future.
261 | 
262 | ## [0.5.0] - 2016-07-20
263 | 
264 | [0.5.0]: https://github.com/nchammas/flintrock/compare/v0.4.0...v0.5.0
265 | 
266 | ### Added
267 | 
268 | * [#118]: You can now specify `--hdfs-download-source` (or the
269 |   equivalent in your config file) to tell Flintrock to download Hadoop
270 |   from a specific URL when launching your cluster.
271 | * [#125]: You can now specify `--spark-download-source` (or the
272 |   equivalent in your config file) to tell Flintrock to download Spark
273 |   from a specific URL when launching your cluster.
274 | * [#112]: You can now specify `--ec2-security-group` to associate
275 |   additional security groups with your cluster on launch.
276 | 
277 | [#118]: https://github.com/nchammas/flintrock/pull/118
278 | [#125]: https://github.com/nchammas/flintrock/pull/125
279 | [#112]: https://github.com/nchammas/flintrock/pull/112
280 | 
281 | ### Changed
282 | 
283 | * [#103], [#114]: Flintrock now opens port 6066 and 7077 so local
284 |   clients like Apache Zeppelin can connect directly to the Spark
285 |   master on the cluster.
286 | * [#122]: Flintrock now automatically adds executables like
287 |   `spark-submit`, `pyspark`, and `hdfs` to the default `PATH`, so
288 |   they're available to call right when you login to the cluster.
289 | 
290 | [#103]: https://github.com/nchammas/flintrock/pull/103
291 | [#114]: https://github.com/nchammas/flintrock/pull/114
292 | [#122]: https://github.com/nchammas/flintrock/pull/122
293 | 
294 | ## [0.4.0] - 2016-03-27
295 | 
296 | [0.4.0]: https://github.com/nchammas/flintrock/compare/v0.3.0...v0.4.0
297 | 
298 | ### Added
299 | 
300 | * [#98], [#99]: You can now specify `latest` for `--spark-git-commit`
301 |   and Flintrock will automatically build Spark on your cluster at the
302 |   latest commit. This feature is only available for Spark repos
303 |   hosted on GitHub.
304 | * [#94]: Flintrock now supports launching clusters into non-default
305 |   VPCs.
306 | 
307 | [#94]: https://github.com/nchammas/flintrock/pull/94
308 | [#98]: https://github.com/nchammas/flintrock/pull/98
309 | [#99]: https://github.com/nchammas/flintrock/pull/99
310 | 
311 | ### Changed
312 | 
313 | * [#86]: Flintrock now correctly catches when spot requests fail and
314 |   bubbles up an appropriate error message.
315 | * [#93], [#97]: Fixed the ability to build Spark from git. (It was
316 |   broken for recent commits.)
317 | * [#96], [#100]: Flintrock launches should now work correctly whether
318 |   the default Python on the cluster is Python 2.7 or Python 3.4+.
319 | 
320 | [#86]: https://github.com/nchammas/flintrock/pull/86
321 | [#93]: https://github.com/nchammas/flintrock/pull/93
322 | [#96]: https://github.com/nchammas/flintrock/pull/96
323 | [#97]: https://github.com/nchammas/flintrock/pull/97
324 | [#100]: https://github.com/nchammas/flintrock/pull/100
325 | 
326 | ## [0.3.0] - 2016-02-14
327 | 
328 | [0.3.0]: https://github.com/nchammas/flintrock/compare/v0.2.0...v0.3.0
329 | 
330 | ### Changed
331 | 
332 | * [`eca59fc`], [`3cf6ee6`]: Tweaked a few things so that Flintrock
333 |   can launch 200+ node clusters without hitting certain limits.
334 | 
335 | [`eca59fc`]: https://github.com/nchammas/flintrock/commit/eca59fc0052874d9aa48b7d4d7d79192b5e609d1
336 | [`3cf6ee6`]: https://github.com/nchammas/flintrock/commit/3cf6ee64162ceaac6429d79c3bc6ef25988eaa8e
337 | 
338 | ## [0.2.0] - 2016-02-07
339 | 
340 | [0.2.0]: https://github.com/nchammas/flintrock/compare/v0.1.0...v0.2.0
341 | 
342 | ### Added
343 | 
344 | * [`b00fd12`]: Added `--assume-yes` option to the `launch` command.
345 |   Use `--assume-yes` to tell Flintrock to automatically destroy the
346 |   cluster if there are problems during launch.
347 | 
348 | [`b00fd12`]: https://github.com/nchammas/flintrock/commit/b00fd128f36e0a05dafca69b26c4d1b190fa42c9
349 | 
350 | ### Changed
351 | 
352 | * [#69]: Automatically retry Hadoop download from flaky Apache
353 |   mirrors.
354 | * [`0df7004`]: Delete unneeded security group after a cluster is
355 |   destroyed.
356 | * [`244f734`]: Default HDFS not to install. Going forward, Spark will
357 |   be the only service that Flintrock installs by default. Defaults can
358 |   easily be changed via Flintrock's config file.
359 | * [`de33412`]: Flintrock installs services, not modules. The
360 |   terminology has been updated accordingly throughout the code and
361 |   docs. Update your config file to use `services` instead of
362 |   `modules`. **Warning**: Flintrock will have problems managing
363 |   existing clusters that were launched with versions of Flintrock from
364 |   before this change.
365 | * [#73]: Major refactoring of Flintrock internals.
366 | * [#74]: Flintrock now catches common configuration problems upfront
367 |   and provides simple error messages, instead of barfing out errors
368 |   from EC2 or launching broken clusters.
369 | * [`bf766ba`]: Fixed a bug in how Flintrock polls SSH availability
370 |   from Linux. Cluster launches now work from Linux as intended.
371 | 
372 | [#69]: https://github.com/nchammas/flintrock/pull/69
373 | [`0df7004`]: https://github.com/nchammas/flintrock/commit/0df70043f3da215fe699165bc961bd0c4ba4ea88
374 | [`244f734`]: https://github.com/nchammas/flintrock/commit/244f7345696d1b8cec1d1b575a304b9bd9a77840
375 | [`de33412`]: https://github.com/nchammas/flintrock/commit/de3341221ca8d57f5a465b13f07c8e266ae11a59
376 | [#73]: https://github.com/nchammas/flintrock/pull/73
377 | [#74]: https://github.com/nchammas/flintrock/pull/74
378 | [`bf766ba`]: https://github.com/nchammas/flintrock/commit/bf766ba48f12a8752c2e32f9b3daf29501c30866
379 | 
380 | ## [0.1.0] - 2015-12-11
381 | 
382 | [0.1.0]: https://github.com/nchammas/flintrock/releases/tag/v0.1.0
383 | 
384 | * Initial release.
385 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing Guide
  2 | 
  3 | There are many ways to contribute to Flintrock.
  4 | 
  5 | ## Contributing Thanks
  6 | 
  7 | When we put our time and enthusiasm into an open source project like this, we hope that somewhere out there we are putting a smile on someone's face.
  8 | 
  9 | Most of the time we'll never know, though. When people reach out within an open source community, it's typically to report a problem, ask for help, or share an idea.
 10 | 
 11 | That's a bummer, because hearing first-hand that we made a positive impact on someone else's day, even if it's minor, can be a huge boost of joy and motivation.
 12 | 
 13 | Don't underestimate the power of a thank you. If Flintrock helped you in some way, share your story, even if it's "trivial", and know that at times this can be the most valuable way to contribute to the project.
 14 | 
 15 | 
 16 | ## Contributing Money
 17 | 
 18 | Most projects have various kinds of tests to make sure things are working correctly. The most valuable test for an orchestration tool like Flintrock is a full acceptance test, since the fundamental thing Flintrock does is manage remote resources.
 19 | 
 20 | This means that as Flintrock developers we are always launching and destroying instances on some cloud provider, which costs money. Any money you contribute will go towards paying those bills.
 21 | 
 22 | We're still figuring out how best to accept donations for these purposes, but [Amazon Allowance](http://www.amazon.com/b?ie=UTF8&node=11453461011) looks promising.
 23 | 
 24 | 
 25 | ## Contributing Bug Reports
 26 | 
 27 | When reporting a bug, do your best to provide a [short, self contained, and correct example](http://sscce.org/) of the problem you are seeing. Bug reports will otherwise likely be ignored, unless they are really easy to reproduce.
 28 | 
 29 | In addition to reporting bugs, you can also confirm or deny existing bug reports. This helps us prioritize bug fixes and understand if certain bugs are limited to certain configurations.
 30 | 
 31 | 
 32 | ## Contributing Feature Requests
 33 | 
 34 | ### Describe your problem first, not just your solution
 35 | 
 36 | What are you trying to do? Explain the root problem clearly. **This is more important than describing your proposed solution.**
 37 | 
 38 | When we understand your feature request in the context of what you are really trying to do, we can better evaluate any proposed solutions and perhaps even come up with a better solution that you might not see.
 39 | 
 40 | Describing your original problem or use case will also help us avoid the [X-Y Problem](http://mywiki.wooledge.org/XyProblem), which can waste a lot of everyone's time.
 41 | 
 42 | If you see an existing feature request that you are interested in, chime in. Your input will help us flesh out the request and understand how much demand there is for it.
 43 | 
 44 | 
 45 | ## Contributing Code
 46 | 
 47 | Sometimes, you just wanna write some code. Just keep these guidelines in mind before you do that if you want your code contribution accepted.
 48 | 
 49 | ### License
 50 | 
 51 | Unless you explicitly tell us otherwise, when you contribute code you affirm that the contribution is your original work and that you license it to the project under the project's [license](LICENSE).
 52 | 
 53 | Please make sure that you are OK with our license's terms before contributing code.
 54 | 
 55 | ### Setup
 56 | 
 57 | If you agree to our license, the next thing you'll want to do is get Flintrock's source code and install its development dependencies.
 58 | 
 59 | ```sh
 60 | git clone https://github.com/nchammas/flintrock
 61 | cd flintrock
 62 | 
 63 | python3 -m venv venv
 64 | source venv/bin/activate
 65 | 
 66 | pip3 install -r requirements/developer.pip
 67 | ```
 68 | 
 69 | When you `git pull` the latest changes, don't forget to also rerun the `pip install` step so that Flintrock's dependencies stay up-to-date.
 70 | 
 71 | ### Trivial bug fixes or changes
 72 | 
 73 | If you're making a small change, go right ahead and open that pull request. There's no need to coordinate beforehand.
 74 | 
 75 | ### New features, non-trivial changes
 76 | 
 77 | There are a few things you should do before diving in to write a new feature or implement some non-trivial change.
 78 | 
 79 | ### Changing dependencies
 80 | 
 81 | If you are changing anything about Flintrock's dependencies, be sure to update the compiled requirements using [pip-tools] and the lowest version of Python that Flintrock supports (Python 3.9):
 82 | 
 83 | [pip-tools]: https://github.com/jazzband/pip-tools
 84 | 
 85 | ```shell
 86 | function update-deps() {
 87 |     pip install -U "pip-tools==7.3.0"
 88 | 
 89 |     pip-compile -U requirements/user.in -o requirements/user.pip
 90 |     pip-compile -U requirements/developer.in -o requirements/developer.pip
 91 |     pip-compile -U requirements/maintainer.in -o requirements/maintainer.pip
 92 | 
 93 |     # Uncomment whichever set of requirements makes sense for you.
 94 |     # pip-sync requirements/user.pip
 95 |     # pip-sync requirements/developer.pip
 96 |     # pip-sync requirements/maintainer.pip
 97 | }
 98 | 
 99 | update-deps
100 | ```
101 | 
102 | `pip-compile` takes the provided set of input requirements, like `user.in` and compiles them into a full list of pinned transitive dependencies, like `user.pip`. This is similar to a lock file. `pip-sync` ensures that the current active virtual environment has exactly the dependencies listed in the provided pip file, no more and no less.
103 | 
104 | #### Coordinate first
105 | 
106 | Coordinating first means starting a discussion with the core developers to get a sense of how to approach the problem you want to work on.
107 | 
108 | If you don't do this and just submit a pull request out of the blue, there is a good chance you will write something that is unwanted, either because it doesn't fit the project, or because it was implemented in an undesirable way.
109 | 
110 | This doesn't mean that you need to wait for some official blessing before doing any interesting work. It just means that your chances of getting your work merged rise considerably when that work has had some input from those closest to the project.
111 | 
112 | #### Weigh the maintenance burden
113 | 
114 | Programming can be like intercourse. A neat new feature can cranked out after a passionate night of coding, but -- if accepted into the project -- it has to be maintained for years, often at much greater cumulative cost than what the initial implementation took.
115 | 
116 | When building something new, don't just consider the value it will provide. Consider also how much work it will take to keep it working over the years. Is it worth it in the long run? This is doubly important if you don't see yourself sticking around to take care of your baby. How easy will it be for others take responsibility for your work?
117 | 
118 | #### Capture one idea in one pull request
119 | 
120 | *Note: This section is largely a summary of the [guidance given here](https://secure.phabricator.com/book/phabflavor/article/recommendations_on_revision_control/) by Evan Priestley of the Phabricator project.*
121 | 
122 | Make sure each pull request you submit captures a single coherent idea. This limits the scope of any given pull request and makes it much easier for a reviewer to understand what you are doing and give precise feedback. Don't mix logically independent changes in the same request if they can be submitted separately.
123 | 
124 | #### Expect many revisions
125 | 
126 | If you are adding or touching lots of code, then be prepared to go through many rounds of revisions before your pull request is accepted. This is normal, especially as you are still getting acquainted with the project's standards and style.
127 | 
128 | ### Test your changes
129 | 
130 | Whether your changes are big or small, you'll want to test them. Flintrock includes [tests](./tests/) which you should use.
131 | 
132 | ### Don't expand the support matrix
133 | 
134 | We will generally reject contributions that expand the number of operating systems, configurations, or languages that Flintrock supports, because they impose a large maintenance burden on the project over its lifespan. In some cases this might mean rejecting contributions that could significantly expand the project's potential user base.
135 | 
136 | We accept this tradeoff because we have seen popular open source projects go to decay because their maintenance burden grew large enough to kill the fun of the project for the core developers.
137 | 
138 | Small open source projects like Flintrock, which do not have the backing of a company, run on the free time and interest of contributors. Keeping the project's maintenance burden as small as possible, sometimes at the cost of reach, makes it more likely that contributors will continue to take interest in the project for a long time. This better serves our user base over the long run.
139 | 


--------------------------------------------------------------------------------
/COPYRIGHT:
--------------------------------------------------------------------------------
 1 | Copyright 2015 Nicholas Chammas
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use the files in this repository except in compliance with
 5 | the License.
 6 | 
 7 | You may obtain a copy of the License at
 8 | 
 9 |     http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | See the License for the specific language governing permissions and
15 | limitations under the License.
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2024 Nicholas Chammas
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | # See: https://setuptools.pypa.io/en/latest/userguide/miscellaneous.html
 2 | graft flintrock
 3 | 
 4 | include README.md
 5 | include CHANGES.md
 6 | include COPYRIGHT
 7 | include LICENSE
 8 | 
 9 | global-exclude *.py[cod] __pycache__ .DS_Store
10 | global-exclude config.yaml
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![Flintrock logo](https://raw.githubusercontent.com/nchammas/flintrock/master/flintrock-logo.png)
  2 | 
  3 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/nchammas/flintrock/blob/master/LICENSE)
  4 | [![Build Status](https://github.com/nchammas/flintrock/actions/workflows/flintrock.yaml/badge.svg)](https://github.com/nchammas/flintrock/actions)
  5 | 
  6 | Flintrock is a command-line tool for launching [Apache Spark](http://spark.apache.org/) clusters.
  7 | 
  8 | 
  9 | ## Flintrock around the web
 10 | 
 11 | Flintrock has been featured in a few talks, guides, and papers around the web.
 12 | 
 13 | * Talks:
 14 |   * [Flintrock: A faster, better spark-ec2](https://www.youtube.com/watch?v=3aeIpOGrJOA) ([slides](http://www.slideshare.net/SparkSummit/flintrock-a-faster-better-sparkec2-by-nicholas-chammas))
 15 | * Guides:
 16 |   * Running Spark on a Cluster: The Basics (using Flintrock)
 17 |     * [Part 1: Start a Spark Cluster and Use the spark-shell](http://heather.miller.am/blog/launching-a-spark-cluster-part-1.html)
 18 |     * [Part 2: Dependencies, S3, and Deploying via spark-submit](http://heather.miller.am/blog/launching-a-spark-cluster-part-2.html)    
 19 |   * [Spark with Jupyter on AWS](https://github.com/PiercingDan/spark-Jupyter-AWS)
 20 |   * [Building a data science platform for R&D, part 2 – Deploying Spark on AWS using Flintrock](https://alexioannides.com/2016/08/18/building-a-data-science-platform-for-rd-part-2-deploying-spark-on-aws-using-flintrock/)
 21 |   * [AWS EC2를 활용 스파크 클러스터 생성](http://statkclee.github.io/ml/ml-aws-ec2-flintrock.html)
 22 | * Papers:
 23 |   * ["Birds in the Clouds": Adventures in Data Engineering](https://arxiv.org/pdf/1710.08521.pdf)
 24 | 
 25 | 
 26 | ## Usage
 27 | 
 28 | Here's a quick way to launch a cluster on EC2, assuming you already have an [AWS account set up](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/get-set-up-for-amazon-ec2.html). Flintrock works best with Amazon Linux. You can get the latest AMI IDs [from here](https://aws.amazon.com/amazon-linux-2/release-notes/).
 29 | 
 30 | ```sh
 31 | flintrock launch test-cluster \
 32 |     --num-slaves 1 \
 33 |     --spark-version 3.5.0 \
 34 |     --ec2-key-name key_name \
 35 |     --ec2-identity-file /path/to/key.pem \
 36 |     --ec2-ami ami-0588935a949f9ff17 \
 37 |     --ec2-user ec2-user
 38 | ```
 39 | 
 40 | If you [persist these options to a file](#configurable-cli-defaults), you'll be able to do the same thing much more concisely:
 41 | 
 42 | ```sh
 43 | flintrock configure
 44 | # Save your preferences via the opened editor, then...
 45 | flintrock launch test-cluster
 46 | ```
 47 | 
 48 | Once you're done using a cluster, don't forget to destroy it with:
 49 | 
 50 | ```sh
 51 | flintrock destroy test-cluster
 52 | ```
 53 | 
 54 | Other things you can do with Flintrock include:
 55 | 
 56 | ```sh
 57 | flintrock login test-cluster
 58 | flintrock describe test-cluster
 59 | flintrock add-slaves test-cluster --num-slaves 2
 60 | flintrock remove-slaves test-cluster --num-slaves 1
 61 | flintrock run-command test-cluster 'sudo yum install -y package'
 62 | flintrock copy-file test-cluster /local/path /remote/path
 63 | ```
 64 | 
 65 | To see what else Flintrock can do, or to see detailed help for a specific command, try:
 66 | 
 67 | ```sh
 68 | flintrock --help
 69 | flintrock <subcommand> --help
 70 | ```
 71 | 
 72 | That's not all. Flintrock has a few more [features](#features) that you may find interesting.
 73 | 
 74 | ### Accessing data on S3
 75 | 
 76 | We recommend you access data on S3 from your Flintrock cluster by following
 77 | these steps:
 78 | 
 79 | 1. Setup an [IAM Role](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html)
 80 |    that grants access to S3 as desired. Reference this role when you launch
 81 |    your cluster using the `--ec2-instance-profile-name` option (or its
 82 |    equivalent in your `config.yaml` file).
 83 | 2. Reference S3 paths in your Spark code using the `s3a://` prefix. `s3a://` is
 84 |    backwards compatible with `s3n://` and replaces both `s3n://` and `s3://`.
 85 |    The Hadoop project [recommends using `s3a://`](https://hadoop.apache.org/docs/current/hadoop-aws/tools/hadoop-aws/index.html#S3A)
 86 |    since it is actively developed, supports larger files, and offers
 87 |    better performance.
 88 | 3. Make sure Flintrock is configured to use Hadoop/HDFS 2.7+. Earlier
 89 |    versions of Hadoop do not have solid implementations of `s3a://`.
 90 |    Flintrock's default is Hadoop 3.3.6, so you don't need to do anything
 91 |    here if you're using a vanilla configuration.
 92 | 4. Call Spark with the hadoop-aws package to enable `s3a://`. For example:
 93 |    ```sh
 94 |    spark-submit --packages org.apache.hadoop:hadoop-aws:3.3.6 my-app.py
 95 |    pyspark --packages org.apache.hadoop:hadoop-aws:3.3.6
 96 |    ```
 97 |    If you have issues using the package, consult the [hadoop-aws troubleshooting
 98 |    guide](http://hadoop.apache.org/docs/current/hadoop-aws/tools/hadoop-aws/index.html)
 99 |    and try adjusting the version. As a rule of thumb, you should match the version
100 |    of hadoop-aws to the version of Hadoop that Spark was built against (which is
101 |    typically Hadoop 3.2 or 2.7), even if the version of Hadoop that you're deploying to
102 |    your Flintrock cluster is different.
103 | 
104 | With this approach you don't need to copy around your AWS credentials
105 | or pass them into your Spark programs. As long as the assigned IAM role
106 | allows it, Spark will be able to read and write data to S3 simply by
107 | referencing the appropriate path (e.g. `s3a://bucket/path/to/file`).
108 | 
109 | 
110 | ## Installation
111 | 
112 | Before using Flintrock, take a quick look at the
113 | [copyright](https://github.com/nchammas/flintrock/blob/master/COPYRIGHT)
114 | notice and [license](https://github.com/nchammas/flintrock/blob/master/LICENSE)
115 | and make sure you're OK with their terms.
116 | 
117 | **Flintrock requires Python 3.9 or newer**, unless you are using one
118 | of our **standalone packages**. Flintrock has been thoroughly tested
119 | only on OS X, but it should run on all POSIX systems.
120 | A motivated contributor should be able to add
121 | [Windows support](https://github.com/nchammas/flintrock/issues/46)
122 | without too much trouble, too.
123 | 
124 | ### Release version
125 | 
126 | To get the latest release of Flintrock, simply install it with [pip][pip].
127 | 
128 | Since Flintrock is a command-line application rather than a library, you may prefer to
129 | install it using [pipx][pipx], which automatically takes care of installing Flintrock to
130 | an isolated virtual environment for you.
131 | 
132 | [pip]: https://pip.pypa.io/en/stable/
133 | [pipx]: https://pypa.github.io/pipx/
134 | 
135 | ```
136 | pipx install flintrock
137 | ```
138 | 
139 | This will install Flintrock and place it on your path. You should be good to go now!
140 | 
141 | You'll probably want to get started with the following two commands:
142 | 
143 | ```sh
144 | flintrock --help
145 | flintrock configure
146 | ```
147 | 
148 | ### Standalone version (Python not required!)
149 | 
150 | We used to publish standalone versions of Flintrock that don't require you to have Python
151 | installed on your machine. Since Flintrock 2.1.0, we have stopped publishing these
152 | standalone builds.
153 | 
154 | If you used these standalone packages, please [chime in on this issue][standalone] and
155 | share a bit about your environment and use case.
156 | 
157 | [standalone]: https://github.com/nchammas/flintrock/issues/370
158 | 
159 | ### Community-supported distributions
160 | 
161 | Flintrock is also available via the following package managers:
162 | 
163 | * [Homebrew](https://brew.sh): `brew install flintrock`
164 | 
165 | These packages are not supported by the core contributors and **may be out of date**. Please reach out to the relevant communities directly if you have trouble using these distributions to install Flintrock. You can always find the latest release of Flintrock [on GitHub](https://github.com/nchammas/flintrock/releases/latest) and [on PyPI](https://pypi.org/project/Flintrock/).
166 | 
167 | ### Development version
168 | 
169 | If you like living on the edge, install the development version of Flintrock:
170 | 
171 | ```sh
172 | pipx install git+https://github.com/nchammas/flintrock
173 | ```
174 | 
175 | If you want to [contribute](https://github.com/nchammas/flintrock/blob/master/CONTRIBUTING.md), follow the instructions in our contributing guide on [how to install Flintrock](https://github.com/nchammas/flintrock/blob/master/CONTRIBUTING.md#contributing-code).
176 | 
177 | ## Use Cases
178 | 
179 | ### Experimentation
180 | 
181 | If you want to play around with Spark, develop a prototype application, run a one-off job, or otherwise just experiment, Flintrock is the fastest way to get you a working Spark cluster.
182 | 
183 | ### Performance testing
184 | 
185 | Flintrock exposes many options of its underlying providers (e.g. EBS-optimized volumes on EC2) which makes it easy to create a cluster with predictable performance for [Spark performance testing](https://github.com/databricks/spark-perf).
186 | 
187 | ### Automated pipelines
188 | 
189 | Most people will use Flintrock interactively from the command line, but Flintrock is also designed to be used as part of an automated pipeline. Flintrock's exit codes are carefully chosen; it offers options to disable interactive prompts; and when appropriate it prints output in YAML, which is both human- and machine-friendly.
190 | 
191 | 
192 | ## Anti-Use Cases
193 | 
194 | There are some things that Flintrock specifically *does not* support.
195 | 
196 | ### Managing permanent infrastructure
197 | 
198 | Flintrock is not for managing long-lived clusters, or any infrastructure that serves as a permanent part of some environment.
199 | 
200 | For starters, Flintrock provides no guarantee that clusters launched with one version of Flintrock can be managed by another version of Flintrock, and no considerations are made for any long-term use cases.
201 | 
202 | If you are looking for ways to manage permanent infrastructure, look at tools like [Terraform](https://www.terraform.io/), [Ansible](http://www.ansible.com/), or [Ubuntu Juju](http://www.ubuntu.com/cloud/tools/juju). You might also find a service like [Databricks](https://databricks.com/product/databricks) useful if you're looking for someone else to host and manage Spark for you. Amazon also offers [Spark on EMR](https://aws.amazon.com/elasticmapreduce/details/spark/).
203 | 
204 | ### Launching non-Spark-related services
205 | 
206 | Flintrock is meant for launching Spark clusters that include closely related services like HDFS.
207 | 
208 | Flintrock is not for launching external datasources (e.g. Cassandra), or other services that are not closely integrated with Spark (e.g. Tez).
209 | 
210 | If you are looking for an easy way to launch other services from the Hadoop ecosystem, look at the [Apache Bigtop](http://bigtop.apache.org/) project.
211 | 
212 | ### Launching out-of-date services
213 | 
214 | Flintrock will always take advantage of new features of Spark and related services to make the process of launching a cluster faster, simpler, and easier to maintain. If that means dropping support for launching older versions of a service, then we will generally make that tradeoff.
215 | 
216 | 
217 | ## Features
218 | 
219 | ### Polished CLI
220 | 
221 | Flintrock has a clean command-line interface.
222 | 
223 | ```sh
224 | flintrock --help
225 | flintrock describe
226 | flintrock destroy --help
227 | flintrock launch test-cluster --num-slaves 10
228 | ```
229 | 
230 | ### Configurable CLI Defaults
231 | 
232 | Flintrock lets you persist your desired configuration to a YAML file so that you don't have to keep typing out the same options over and over at the command line.
233 | 
234 | To setup and edit the default config file, run this:
235 | 
236 | ```sh
237 | flintrock configure
238 | ```
239 | 
240 | You can also point Flintrock to a non-default config file by using the `--config` option.
241 | 
242 | #### Sample `config.yaml`
243 | 
244 | ```yaml
245 | provider: ec2
246 | 
247 | services:
248 |   spark:
249 |     version: 3.5.0
250 | 
251 | launch:
252 |   num-slaves: 1
253 | 
254 | providers:
255 |   ec2:
256 |     key-name: key_name
257 |     identity-file: /path/to/.ssh/key.pem
258 |     instance-type: m5.large
259 |     region: us-east-1
260 |     ami: ami-0588935a949f9ff17
261 |     user: ec2-user
262 | ```
263 | 
264 | With a config file like that, you can now launch a cluster with just this:
265 | 
266 | ```sh
267 | flintrock launch test-cluster
268 | ```
269 | 
270 | And if you want, you can even override individual options in your config file at the command line:
271 | 
272 | ```sh
273 | flintrock launch test-cluster \
274 |     --num-slaves 10 \
275 |     --ec2-instance-type r5.xlarge
276 | ```
277 | 
278 | ### Fast Launches
279 | 
280 | Flintrock is really fast. It can launch a 100-node cluster in about three minutes (give or take a few seconds due to AWS's normal performance variability).
281 | 
282 | ### Advanced Storage Setup
283 | 
284 | Flintrock automatically configures any available [ephemeral storage](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html) on the cluster and makes it available to installed services like HDFS and Spark. This storage is fast and is perfect for use as a temporary store by those services.
285 | 
286 | ### Tests
287 | 
288 | Flintrock comes with a set of automated, end-to-end [tests](https://github.com/nchammas/flintrock/tree/master/tests). These tests help us develop Flintrock with confidence and guarantee a certain level of quality.
289 | 
290 | ### Low-level Provider Options
291 | 
292 | Flintrock exposes low-level provider options (e.g. [instance-initiated shutdown behavior](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/terminating-instances.html#Using_ChangingInstanceInitiatedShutdownBehavior)) so you can control the details of how your cluster is setup if you want.
293 | 
294 | ### No Custom Machine Image Dependencies
295 | 
296 | Flintrock is built and tested against vanilla Amazon Linux and CentOS. You can easily launch Flintrock clusters using your own custom machine images built from either of those distributions.
297 | 
298 | 
299 | ## Anti-Features
300 | 
301 | ### Support for out-of-date versions of Python, EC2 APIs, etc.
302 | 
303 | Supporting multiple versions of anything is tough. There's more surface area to cover for testing, and over the long term the maintenance burden of supporting something non-current with bug fixes and workarounds really adds up.
304 | 
305 | There are projects that support stuff across a wide cut of language or API versions. For example, Spark supports multiple versions of Java, Scala, R, and Python. The people behind these projects are gods. They take on an immense maintenance burden for the benefit and convenience of their users.
306 | 
307 | We here at project Flintrock are much more modest in our abilities. We are best able to serve the project over the long term when we limit ourselves to supporting a small but widely applicable set of configurations.
308 | 
309 | 
310 | ## Motivation
311 | 
312 | *Note: The explanation here is provided from the perspective of Flintrock's original author, Nicholas Chammas.*
313 | 
314 | I got started with Spark by using [spark-ec2](https://github.com/amplab/spark-ec2). It's one of the biggest reasons I found Spark so accessible. I didn't need to spend time upfront working through some setup guide before I could work on a "real" problem. Instead, with a simple spark-ec2 command I was able to launch a large, working cluster and get straight to business.
315 | 
316 | As I became a heavy user of spark-ec2, several limitations stood out and became an increasing pain. They provided me with the motivation for this project.
317 | 
318 | Among those limitations, the most frustrating ones were:
319 | 
320 | * **Slow launches**: spark-ec2 cluster launch times increase linearly with the number of slaves being created. For example, it takes spark-ec2 **[over an hour](https://issues.apache.org/jira/browse/SPARK-5189)** to launch a cluster with 100 slaves. ([SPARK-4325](https://issues.apache.org/jira/browse/SPARK-4325), [SPARK-5189](https://issues.apache.org/jira/browse/SPARK-5189))
321 | * **No support for configuration files**: spark-ec2 does not support reading options from a config file, so users are always forced to type them in at the command line. ([SPARK-925](https://issues.apache.org/jira/browse/SPARK-925))
322 | * **Un-resizable clusters**: Adding or removing slaves from an existing spark-ec2 cluster is not possible. ([SPARK-2008](https://issues.apache.org/jira/browse/SPARK-2008))
323 | * **Custom machine images**: spark-ec2 uses custom machine images, making it difficult for users to bring their own image. And since the process of updating those machine images is not automated, they have not been updated in years. ([SPARK-3821](https://issues.apache.org/jira/browse/SPARK-3821))
324 | 
325 | I built Flintrock to address all of these shortcomings, which it does.
326 | 
327 | ### Why build Flintrock when we have EMR?
328 | 
329 | I started work on Flintrock months before [EMR added support for Spark](https://aws.amazon.com/blogs/aws/new-apache-spark-on-amazon-emr/). It's likely that, had I considered building Flintrock a year later than I did, I would have decided against it.
330 | 
331 | Now that Flintrock exists, many users appreciate the lower cost of running Flintrock clusters as compared to EMR, as well as Flintrock's simpler interface. And for my part, I enjoy working on Flintrock in my free time.
332 | 
333 | ### Why didn't you build Flintrock on top of an orchestration tool?
334 | 
335 | People have asked me whether I considered building Flintrock on top of Ansible, Terraform, Docker, or something else. I looked into some of these things back when Flintrock was just an idea in my head and decided against using any of them for two basic reasons:
336 | 
337 | 1. **Fun**: I didn't have any experience with these tools, and it looked both simple enough and more fun to build something "from scratch".
338 | 2. **Focus**: I wanted a single-purpose tool with a very limited focus, not a module or set of scripts that were part of a sprawling framework that did a lot of different things.
339 | 
340 | These are not necessarily the right reasons to build "from scratch", but they were my reasons. If you are already comfortable with any of the popular orchestration tools out there, you may find it more attractive to use them rather than add a new standalone tool to your toolchain.
341 | 
342 | 
343 | ## About the Flintrock Logo
344 | 
345 | The [Flintrock logo](https://github.com/nchammas/flintrock/blob/master/flintrock-logo.png) was created using [Highbrow Cafetorium JNL](http://www.myfonts.com/fonts/jnlevine/highbrow-cafetorium/) and [this icon](https://thenounproject.com/term/stars/40856/). Licenses to use both the font and icon were purchased from their respective owners.
346 | 


--------------------------------------------------------------------------------
/flintrock-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nchammas/flintrock/7056d20a7f2dee78dd8c89c19d538458cc13288a/flintrock-logo.png


--------------------------------------------------------------------------------
/flintrock/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '2.2.0.dev0'
2 | 


--------------------------------------------------------------------------------
/flintrock/__main__.py:
--------------------------------------------------------------------------------
1 | # See: https://docs.python.org/3/library/__main__.html
2 | import sys
3 | 
4 | # Flintrock modules
5 | from .flintrock import main
6 | 
7 | if __name__ == '__main__':
8 |     sys.exit(main())
9 | 


--------------------------------------------------------------------------------
/flintrock/config.yaml.template:
--------------------------------------------------------------------------------
 1 | services:
 2 |   spark:
 3 |     version: 3.5.0
 4 |     # git-commit: latest  # if not 'latest', provide a full commit SHA; e.g. d6dc12ef0146ae409834c78737c116050961f350
 5 |     # git-repository:  # optional; defaults to https://github.com/apache/spark
 6 |     # optional; defaults to download from a dynamically selected Apache mirror
 7 |     #   - can be http, https, or s3 URL
 8 |     #   - must contain a {v} template corresponding to the version
 9 |     #   - Spark must be pre-built
10 |     #   - files must be named according to the release pattern shown here: https://dist.apache.org/repos/dist/release/spark/
11 |     # download-source: "https://www.example.com/files/spark/{v}/"
12 |     # download-source: "s3://some-bucket/spark/{v}/"
13 |     # executor-instances: 1
14 |   hdfs:
15 |     version: 3.3.6
16 |     # optional; defaults to download from a dynamically selected Apache mirror
17 |     #   - can be http, https, or s3 URL
18 |     #   - must contain a {v} template corresponding to the version
19 |     #   - files must be named according to the release pattern shown here: https://dist.apache.org/repos/dist/release/hadoop/common/
20 |     # download-source: "https://www.example.com/files/hadoop/{v}/"
21 |     # download-source: "http://www-us.apache.org/dist/hadoop/common/hadoop-{v}/"
22 |     # download-source: "s3://some-bucket/hadoop/{v}/"
23 | 
24 | provider: ec2
25 | 
26 | providers:
27 |   ec2:
28 |     key-name: key_name
29 |     identity-file: /path/to/key.pem
30 |     instance-type: m5.large
31 |     region: us-east-1
32 |     # availability-zone: <name>
33 |     ami: ami-0588935a949f9ff17  # Amazon Linux 2, us-east-1
34 |     user: ec2-user
35 |     # ami: ami-61bbf104  # CentOS 7, us-east-1
36 |     # user: centos
37 |     # spot-price: <price>
38 |     # vpc-id: <id>
39 |     # subnet-id: <id>
40 |     # placement-group: <name>
41 |     # security-groups:
42 |     #   - group-name1
43 |     #   - group-name2
44 |     # instance-profile-name:
45 |     # tags:
46 |     #   - key1,value1
47 |     #   - key2, value2  # leading/trailing spaces are trimmed
48 |     #   - key3,  # value will be empty
49 |     # min-root-ebs-size-gb: <size-gb>
50 |     tenancy: default  # default | dedicated
51 |     ebs-optimized: no  # yes | no
52 |     instance-initiated-shutdown-behavior: terminate  # terminate | stop
53 |     # user-data: /path/to/userdata/script
54 |     # authorize-access-from:
55 |     #   - 10.0.0.42/32
56 |     #   - sg-xyz4654564xyz
57 | 
58 | launch:
59 |   num-slaves: 1
60 |   # install-hdfs: True
61 |   # install-spark: False
62 |   # java-version: 8
63 | 
64 | debug: false
65 | 


--------------------------------------------------------------------------------
/flintrock/core.py:
--------------------------------------------------------------------------------
  1 | import concurrent.futures
  2 | import functools
  3 | import json
  4 | import os
  5 | import posixpath
  6 | import shlex
  7 | import sys
  8 | import logging
  9 | from concurrent.futures import FIRST_EXCEPTION
 10 | 
 11 | # External modules
 12 | import paramiko
 13 | 
 14 | # Flintrock modules
 15 | from .ssh import get_ssh_client, ssh_check_output, ssh, SSHKeyPair
 16 | from .exceptions import SSHError
 17 | 
 18 | FROZEN = getattr(sys, 'frozen', False)
 19 | 
 20 | if FROZEN:
 21 |     THIS_DIR = sys._MEIPASS
 22 | else:
 23 |     THIS_DIR = os.path.dirname(os.path.realpath(__file__))
 24 | 
 25 | SCRIPTS_DIR = os.path.join(THIS_DIR, 'scripts')
 26 | 
 27 | 
 28 | logger = logging.getLogger('flintrock.core')
 29 | 
 30 | 
 31 | class StorageDirs:
 32 |     def __init__(self, *, root, ephemeral, persistent):
 33 |         self.root = root
 34 |         self.ephemeral = ephemeral
 35 |         self.persistent = persistent
 36 | 
 37 | 
 38 | # TODO: Implement concept of ClusterNode. (?) That way we can
 39 | #       define a cluster as having several nodes, and implement
 40 | #       actions as `for node in nodes: node.action()`.
 41 | # NOTE: We take both IP addresses and host names because we
 42 | #       don't understand why Spark doesn't accept IP addresses
 43 | #       in its config, yet we prefer IP addresses when
 44 | #       connecting to hosts to avoid single-threaded DNS lookups.
 45 | #       See: https://github.com/nchammas/flintrock/issues/43
 46 | #       See: http://www.dalkescientific.com/writings/diary/archive/2012/01/19/concurrent.futures.html
 47 | class FlintrockCluster:
 48 |     def __init__(
 49 |         self,
 50 |         *,
 51 |         name,
 52 |         ssh_key_pair=None,
 53 |         storage_dirs=StorageDirs(root=None, ephemeral=None, persistent=None),
 54 |     ):
 55 |         self.name = name
 56 |         self.ssh_key_pair = ssh_key_pair
 57 |         self.storage_dirs = storage_dirs
 58 |         self.java_version = None
 59 |         self.services = []
 60 | 
 61 |     @property
 62 |     def master_ip(self) -> str:
 63 |         """
 64 |         The IP address of the master.
 65 | 
 66 |         Providers must override this property since it is typically derived from
 67 |         an underlying object, like an EC2 instance.
 68 |         """
 69 |         raise NotImplementedError
 70 | 
 71 |     @property
 72 |     def master_host(self) -> str:
 73 |         """
 74 |         The hostname of the master.
 75 | 
 76 |         Providers must override this property since it is typically derived from
 77 |         an underlying object, like an EC2 instance.
 78 |         """
 79 |         raise NotImplementedError
 80 | 
 81 |     @property
 82 |     def private_network(self) -> bool:
 83 |         """
 84 |         Indicate if this cluster runs on a private network.
 85 | 
 86 |         Providers must override this property since it is typically derived from
 87 |         an underlying object, like the VPC subnet of an EC2 Instance.
 88 |         """
 89 |         raise NotImplementedError
 90 | 
 91 |     @property
 92 |     def slave_ips(self) -> 'List[str]':
 93 |         """
 94 |         A list of the IP addresses of the slaves.
 95 | 
 96 |         Providers must override this property since it is typically derived from
 97 |         an underlying object, like an EC2 instance.
 98 |         """
 99 |         raise NotImplementedError
100 | 
101 |     @property
102 |     def slave_hosts(self) -> 'List[str]':
103 |         """
104 |         A list of the hostnames of the slaves.
105 | 
106 |         Providers must override this property since it is typically derived from
107 |         an underlying object, like an EC2 instance.
108 |         """
109 |         raise NotImplementedError
110 | 
111 |     @property
112 |     def num_masters(self) -> int:
113 |         """
114 |         How many masters the cluster has.
115 | 
116 |         This normally just equals 1, but in cases where the cluster master
117 |         has been destroyed this should return 0.
118 | 
119 |         Providers must override this property.
120 |         """
121 |         raise NotImplementedError
122 | 
123 |     @property
124 |     def num_slaves(self) -> int:
125 |         """
126 |         How many slaves the cluster has.
127 | 
128 |         This is typically just len(self.slave_ips), but we need a separate
129 |         property because slave IPs are not available when the cluster is
130 |         stopped, and sometimes in that situation we still want to know how
131 |         many slaves there are.
132 | 
133 |         Providers must override this property.
134 |         """
135 |         raise NotImplementedError
136 | 
137 |     def load_manifest(self, *, user: str, identity_file: str):
138 |         """
139 |         Load a cluster's manifest from the master. This will populate information
140 |         about installed services and configured storage.
141 | 
142 |         Providers shouldn't need to override this method.
143 |         """
144 |         if not self.master_ip:
145 |             return
146 | 
147 |         master_ssh_client = get_ssh_client(
148 |             user=user,
149 |             host=self.master_ip,
150 |             identity_file=identity_file,
151 |             wait=True,
152 |             print_status=False)
153 | 
154 |         with master_ssh_client:
155 |             manifest_raw = ssh_check_output(
156 |                 client=master_ssh_client,
157 |                 command="""
158 |                     cat "$HOME/.flintrock-manifest.json"
159 |                 """)
160 |             # TODO: Would it be better if storage (ephemeral and otherwise) was
161 |             #       implemented as a Flintrock service and tracked in the manifest?
162 |             ephemeral_dirs_raw = ssh_check_output(
163 |                 client=master_ssh_client,
164 |                 # It's generally safer to avoid using ls:
165 |                 # http://mywiki.wooledge.org/ParsingLs
166 |                 command="""
167 |                     shopt -s nullglob
168 |                     for f in /media/ephemeral*; do
169 |                         echo "$f"
170 |                     done
171 |                 """)
172 | 
173 |         manifest = json.loads(manifest_raw)
174 | 
175 |         self.ssh_key_pair = SSHKeyPair(
176 |             public=manifest['ssh_key_pair']['public'],
177 |             private=manifest['ssh_key_pair']['private'],
178 |         )
179 | 
180 |         self.java_version = manifest['java_version']
181 | 
182 |         services = []
183 |         for [service_name, manifest] in manifest['services']:
184 |             # TODO: Expose the classes being used here.
185 |             service = globals()[service_name](**manifest)
186 |             services.append(service)
187 |         self.services = services
188 | 
189 |         storage_dirs = StorageDirs(
190 |             root='/media/root',
191 |             ephemeral=sorted(ephemeral_dirs_raw.splitlines()),
192 |             persistent=None)
193 |         self.storage_dirs = storage_dirs
194 | 
195 |     def destroy_check(self):
196 |         """
197 |         Check that the cluster is in a state in which it can be destroyed.
198 | 
199 |         Providers should override this method since we have no way to perform
200 |         this check in a provider-agnostic way.
201 |         """
202 |         pass
203 | 
204 |     def destroy(self):
205 |         """
206 |         Destroy the cluster and any resources created specifically to support
207 |         it.
208 | 
209 |         Providers should override this method since we have no way to destroy a
210 |         cluster in a provider-agnostic way.
211 | 
212 |         Nonetheless, this method should be called before the underlying provider
213 |         destroys the nodes. That way, if we ever add cleanup logic here to destroy
214 |         resources external to the cluster it will get executed correctly.
215 |         """
216 |         pass
217 | 
218 |     def start_check(self):
219 |         """
220 |         Check that the cluster is in a state in which it can be started.
221 | 
222 |         The interface can use this method to decide whether it needs to prompt
223 |         the user for confirmation. If the cluster cannot be started (e.g.
224 |         because it's already running) then we don't want to show a prompt.
225 | 
226 |         Providers should override this method since we have no way to perform
227 |         this check in a provider-agnostic way.
228 |         """
229 |         pass
230 | 
231 |     def start(self, *, user: str, identity_file: str):
232 |         """
233 |         Start up all the services installed on the cluster.
234 | 
235 |         This method assumes that the nodes constituting cluster were just
236 |         started up by the provider (e.g. EC2, GCE, etc.) they're hosted on
237 |         and are running.
238 |         """
239 |         self.load_manifest(user=user, identity_file=identity_file)
240 | 
241 |         partial_func = functools.partial(
242 |             start_node,
243 |             services=self.services,
244 |             user=user,
245 |             identity_file=identity_file,
246 |             cluster=self)
247 |         hosts = [self.master_ip] + self.slave_ips
248 | 
249 |         run_against_hosts(partial_func=partial_func, hosts=hosts)
250 | 
251 |         master_ssh_client = get_ssh_client(
252 |             user=user,
253 |             host=self.master_ip,
254 |             identity_file=identity_file)
255 | 
256 |         with master_ssh_client:
257 |             for service in self.services:
258 |                 service.configure_master(
259 |                     ssh_client=master_ssh_client,
260 |                     cluster=self)
261 | 
262 |         for service in self.services:
263 |             service.health_check(master_host=self.master_ip)
264 | 
265 |     def stop_check(self):
266 |         """
267 |         Check that the cluster is in a state in which it can be stopped.
268 | 
269 |         Providers should override this method since we have no way to perform
270 |         this check in a provider-agnostic way.
271 |         """
272 |         pass
273 | 
274 |     def stop(self):
275 |         """
276 |         Prepare the cluster to be stopped by the underlying provider.
277 | 
278 |         There's currently nothing to do here, but this method should be called
279 |         before the underlying provider stops the nodes.
280 |         """
281 |         pass
282 | 
283 |     def add_slaves_check(self):
284 |         pass
285 | 
286 |     def add_slaves(self, *, user: str, identity_file: str, new_hosts: list):
287 |         """
288 |         Add new slaves to the cluster.
289 | 
290 |         Providers should implement this with the following signature:
291 | 
292 |             add_slaves(self, *, user: str, identity_file: str, num_slaves: int, **provider_specific_options)
293 | 
294 |         This method should be called after the new hosts are online and have been
295 |         added to the cluster's internal list.
296 |         """
297 |         hosts = [self.master_ip] + self.slave_ips
298 |         partial_func = functools.partial(
299 |             add_slaves_node,
300 |             java_version=self.java_version,
301 |             services=self.services,
302 |             user=user,
303 |             identity_file=identity_file,
304 |             cluster=self,
305 |             new_hosts=new_hosts)
306 |         run_against_hosts(partial_func=partial_func, hosts=hosts)
307 | 
308 |         master_ssh_client = get_ssh_client(
309 |             user=user,
310 |             host=self.master_ip,
311 |             identity_file=identity_file)
312 |         with master_ssh_client:
313 |             for service in self.services:
314 |                 service.configure_master(
315 |                     ssh_client=master_ssh_client,
316 |                     cluster=self)
317 | 
318 |     def remove_slaves(self, *, user: str, identity_file: str):
319 |         """
320 |         Remove some slaves from the cluster.
321 | 
322 |         Providers should implement this method with the following signature:
323 | 
324 |             remove_slaves(self, *, user: str, identity_file: str, num_slaves: int)
325 | 
326 |         This method should be called after the provider has removed the slaves
327 |         from the cluster's internal list but before the instances themselves
328 |         have been terminated.
329 | 
330 |         This method simply makes sure that the rest of the cluster knows that
331 |         the relevant slaves are no longer part of the cluster.
332 |         """
333 |         self.load_manifest(user=user, identity_file=identity_file)
334 | 
335 |         partial_func = functools.partial(
336 |             remove_slaves_node,
337 |             user=user,
338 |             identity_file=identity_file,
339 |             services=self.services,
340 |             cluster=self)
341 |         hosts = [self.master_ip] + self.slave_ips
342 | 
343 |         run_against_hosts(partial_func=partial_func, hosts=hosts)
344 | 
345 |     def run_command_check(self):
346 |         """
347 |         Check that the cluster is in a state that supports running commands.
348 | 
349 |         Providers should override this method since we have no way to perform
350 |         this check in a provider-agnostic way.
351 |         """
352 |         pass
353 | 
354 |     def run_command(
355 |             self,
356 |             *,
357 |             master_only: bool,
358 |             user: str,
359 |             identity_file: str,
360 |             command: tuple):
361 |         """
362 |         Run a shell command on each node of an existing cluster.
363 | 
364 |         If master_only is True, then run the comand on the master only.
365 |         """
366 |         if master_only:
367 |             target_hosts = [self.master_ip]
368 |         else:
369 |             target_hosts = [self.master_ip] + self.slave_ips
370 | 
371 |         partial_func = functools.partial(
372 |             run_command_node,
373 |             user=user,
374 |             identity_file=identity_file,
375 |             command=command)
376 |         hosts = target_hosts
377 | 
378 |         run_against_hosts(partial_func=partial_func, hosts=hosts)
379 | 
380 |     def copy_file_check(self):
381 |         """
382 |         Check that the cluster is in a state in which files can be copied to
383 |         it.
384 | 
385 |         Providers should override this method since we have no way to perform
386 |         this check in a provider-agnostic way.
387 |         """
388 |         pass
389 | 
390 |     def copy_file(
391 |             self,
392 |             *,
393 |             master_only: bool,
394 |             user: str,
395 |             identity_file: str,
396 |             local_path: str,
397 |             remote_path: str):
398 |         """
399 |         Copy a file to each node of an existing cluster.
400 | 
401 |         If master_only is True, then copy the file to the master only.
402 |         """
403 |         if master_only:
404 |             target_hosts = [self.master_ip]
405 |         else:
406 |             target_hosts = [self.master_ip] + self.slave_ips
407 | 
408 |         partial_func = functools.partial(
409 |             copy_file_node,
410 |             user=user,
411 |             identity_file=identity_file,
412 |             local_path=local_path,
413 |             remote_path=remote_path)
414 |         hosts = target_hosts
415 | 
416 |         run_against_hosts(partial_func=partial_func, hosts=hosts)
417 | 
418 |     def login(
419 |             self,
420 |             *,
421 |             user: str,
422 |             identity_file: str):
423 |         """
424 |         Interactively SSH into the cluster master.
425 |         """
426 |         ssh(
427 |             host=self.master_ip,
428 |             user=user,
429 |             identity_file=identity_file)
430 | 
431 | 
432 | def generate_template_mapping(
433 |     *,
434 |     cluster: FlintrockCluster,
435 |     # If we add additional services later on we may want to refactor
436 |     # this to take a list of services and dynamically pull the service
437 |     # name.
438 |     spark_executor_instances: int,
439 |     hadoop_version: str,
440 |     spark_version: str
441 | ) -> dict:
442 |     """
443 |     Generate a template mapping from a FlintrockCluster instance that we can use
444 |     to fill in template parameters.
445 |     """
446 |     hadoop_root_dir = posixpath.join(cluster.storage_dirs.root, 'hadoop')
447 |     hadoop_ephemeral_dirs = ','.join(
448 |         posixpath.join(path, 'hadoop')
449 |         for path in cluster.storage_dirs.ephemeral
450 |     )
451 |     spark_root_dir = posixpath.join(cluster.storage_dirs.root, 'spark')
452 |     spark_ephemeral_dirs = ','.join(
453 |         posixpath.join(path, 'spark')
454 |         for path in cluster.storage_dirs.ephemeral
455 |     )
456 | 
457 |     template_mapping = {
458 |         'master_ip': cluster.master_ip,
459 |         'master_host': cluster.master_host,
460 |         'master_private_host': cluster.master_private_host,
461 |         'slave_ips': '\n'.join(cluster.slave_ips),
462 |         'slave_hosts': '\n'.join(cluster.slave_hosts),
463 |         'slave_private_hosts': '\n'.join(cluster.slave_private_hosts),
464 | 
465 |         'hadoop_version': hadoop_version,
466 |         'hadoop_short_version': '.'.join(hadoop_version.split('.')[:2]),
467 |         'spark_version': spark_version,
468 |         'spark_short_version': '.'.join(spark_version.split('.')[:2]) if '.' in spark_version else spark_version,
469 | 
470 |         'spark_executor_instances': spark_executor_instances,
471 | 
472 |         'hadoop_root_dir': hadoop_root_dir,
473 |         'hadoop_ephemeral_dirs': hadoop_ephemeral_dirs,
474 |         'spark_root_dir': spark_root_dir,
475 |         'spark_ephemeral_dirs': spark_ephemeral_dirs,
476 | 
477 |         # If ephemeral storage is available, it replaces the root volume, which is
478 |         # typically persistent. We don't want to mix persistent and ephemeral
479 |         # storage since that causes problems after cluster stop/start; some volumes
480 |         # have leftover data, whereas others start fresh.
481 |         'hadoop_root_ephemeral_dirs': hadoop_ephemeral_dirs if hadoop_ephemeral_dirs else hadoop_root_dir,
482 |         'spark_root_ephemeral_dirs': spark_ephemeral_dirs if spark_ephemeral_dirs else spark_root_dir,
483 |     }
484 | 
485 |     return template_mapping
486 | 
487 | 
488 | # TODO: Cache these files. (?) They are being read potentially tens or
489 | #       hundreds of times. Maybe it doesn't matter because the files
490 | #       are so small.
491 | def get_formatted_template(*, path: str, mapping: dict) -> str:
492 |     with open(path) as f:
493 |         formatted = f.read().format(**mapping)
494 |     return formatted
495 | 
496 | 
497 | def run_against_hosts(*, partial_func: functools.partial, hosts: list):
498 |     """
499 |     Run a function asynchronously against each of the provided hosts.
500 | 
501 |     This function assumes that partial_func accepts `host` as a keyword argument.
502 |     """
503 |     with concurrent.futures.ThreadPoolExecutor(len(hosts)) as executor:
504 |         futures = {
505 |             executor.submit(functools.partial(partial_func, host=host))
506 |             for host in hosts
507 |         }
508 |         concurrent.futures.wait(futures, return_when=FIRST_EXCEPTION)
509 |         for future in futures:
510 |             future.result()
511 | 
512 | 
513 | def get_installed_java_version(client: paramiko.client.SSHClient):
514 |     """
515 |     :return: the major version (5,6,7,8...) of the currently installed Java or None if not installed
516 |     """
517 |     possible_cmds = [
518 |         "$JAVA_HOME/bin/java -version",
519 |         "java -version"
520 |     ]
521 | 
522 |     for command in possible_cmds:
523 |         try:
524 |             output = ssh_check_output(
525 |                 client=client,
526 |                 command=command)
527 |             tokens = output.split()
528 |             # First line of the output is like: 'openjdk version "1.8.0_252"' or 'openjdk version "11.0.7" 2020-04-14'
529 |             # Get the version string and strip out the first two parts of the
530 |             # version as an int: 7, 8, 9, 10...
531 |             if len(tokens) >= 3:
532 |                 version_parts = tokens[2].strip('"').split(".")
533 |                 if len(version_parts) >= 2:
534 |                     if version_parts[0] == "1":
535 |                         # Java 6, 7 or 8
536 |                         return int(version_parts[1])
537 |                     else:
538 |                         # Java 9+
539 |                         return int(version_parts[0])
540 |         except SSHError:
541 |             pass
542 | 
543 |     return None
544 | 
545 | 
546 | def ensure_java(client: paramiko.client.SSHClient, java_version: int):
547 |     """
548 |     Ensures that Java is available on the machine and that it has a
549 |     version of at least java_version.
550 | 
551 |     The specified version of Java will be installed if it does not
552 |     exist or the existing version has a major version lower than java_version.
553 | 
554 |     :param client:
555 |     :param java_version:
556 |         minimum version of Java required
557 |     :return:
558 |     """
559 |     host = client.get_transport().getpeername()[0]
560 |     installed_java_version = get_installed_java_version(client)
561 | 
562 |     if installed_java_version == java_version:
563 |         logger.info("Java {j} is already installed, skipping Java install".format(j=installed_java_version))
564 |         return
565 | 
566 |     if installed_java_version and installed_java_version > java_version:
567 |         logger.warning("""
568 |             Existing Java {j} installation is newer than the configured version {java_version}.
569 |             Your applications will be executed with Java {j}.
570 |             Please choose a different AMI if this does not work for you.
571 |             """.format(j=installed_java_version, java_version=java_version))
572 |         return
573 | 
574 |     if installed_java_version and installed_java_version < java_version:
575 |         logger.info("""
576 |                 Existing Java {j} will be upgraded to Adoptium OpenJDK {java_version}
577 |                 """.format(j=installed_java_version, java_version=java_version))
578 | 
579 |     # We will install Adoptium OpenJDK because it gives us access to Java 8 through 15
580 |     # Right now, Amazon Extras only provides Corretto Java 8, 11 and 15
581 |     logger.info("[{h}] Installing Adoptium OpenJDK Java {j}...".format(h=host, j=java_version))
582 | 
583 |     install_adoptium_repo(client)
584 |     java_package = "temurin-{j}-jdk".format(j=java_version)
585 |     ssh_check_output(
586 |         client=client,
587 |         command="""
588 |             set -e
589 | 
590 |             # Install Java first to protect packages that depend on Java from being removed.
591 |             sudo yum install -q -y {jp}
592 | 
593 |             # Remove any older versions of Java to force the default Java to the requested version.
594 |             # We don't use /etc/alternatives because it does not seem to update links in /usr/lib/jvm correctly,
595 |             # and we don't just rely on JAVA_HOME because some programs use java directly in the PATH.
596 |             sudo yum remove -y java-1.6.0-openjdk java-1.7.0-openjdk
597 | 
598 |             sudo sh -c "echo export JAVA_HOME=/usr/lib/jvm/{jp} >> /etc/environment"
599 |             source /etc/environment
600 |         """.format(jp=java_package))
601 | 
602 | 
603 | def install_adoptium_repo(client):
604 |     """
605 |     Installs the adoptium.repo file into /etc/yum.repos.d/
606 |     """
607 |     with client.open_sftp() as sftp:
608 |         sftp.put(
609 |             localpath=os.path.join(SCRIPTS_DIR, 'adoptium.repo'),
610 |             remotepath='/tmp/adoptium.repo')
611 |     ssh_check_output(
612 |         client=client,
613 |         command="""
614 |             # Use sudo to install the repo file
615 |             sudo mv /tmp/adoptium.repo /etc/yum.repos.d/
616 |         """
617 |     )
618 | 
619 | 
620 | def setup_node(
621 |         *,
622 |         # Change this to take host, user, and identity_file?
623 |         # Add some kind of caching for SSH connections so that they
624 |         # can be looked up by host and reused?
625 |         ssh_client: paramiko.client.SSHClient,
626 |         services: list,
627 |         java_version: int,
628 |         cluster: FlintrockCluster):
629 |     """
630 |     Setup a new node.
631 | 
632 |     Cluster methods like provision_node() and add_slaves_node() should
633 |     delegate the main work of setting up new nodes to this function.
634 |     """
635 |     host = ssh_client.get_transport().getpeername()[0]
636 |     ssh_check_output(
637 |         client=ssh_client,
638 |         command="""
639 |             set -e
640 | 
641 |             echo {private_key} > "$HOME/.ssh/id_rsa"
642 |             echo {public_key} >> "$HOME/.ssh/authorized_keys"
643 | 
644 |             chmod 400 "$HOME/.ssh/id_rsa"
645 |         """.format(
646 |             private_key=shlex.quote(cluster.ssh_key_pair.private),
647 |             public_key=shlex.quote(cluster.ssh_key_pair.public)))
648 | 
649 |     with ssh_client.open_sftp() as sftp:
650 |         sftp.put(
651 |             localpath=os.path.join(SCRIPTS_DIR, 'setup-ephemeral-storage.py'),
652 |             remotepath='/tmp/setup-ephemeral-storage.py')
653 | 
654 |     logger.info("[{h}] Configuring ephemeral storage...".format(h=host))
655 |     # TODO: Print some kind of warning if storage is large, since formatting
656 |     #       will take several minutes (~4 minutes for 2TB).
657 |     storage_dirs_raw = ssh_check_output(
658 |         client=ssh_client,
659 |         command="""
660 |             set -e
661 |             python /tmp/setup-ephemeral-storage.py
662 |             rm -f /tmp/setup-ephemeral-storage.py
663 |         """)
664 |     storage_dirs = json.loads(storage_dirs_raw)
665 | 
666 |     cluster.storage_dirs.root = storage_dirs['root']
667 |     cluster.storage_dirs.ephemeral = storage_dirs['ephemeral']
668 | 
669 |     # TODO: Move Python and Java setup to new service under services.py.
670 |     #       New service to cover Python/Scala/Java: LanguageRuntimes (name?)
671 |     ssh_check_output(
672 |         client=ssh_client,
673 |         command=(
674 |             """
675 |             set -e
676 |             sudo yum install -y python3
677 |             """
678 |         )
679 |     )
680 |     ensure_java(ssh_client, java_version)
681 | 
682 |     for service in services:
683 |         try:
684 |             service.install(
685 |                 ssh_client=ssh_client,
686 |                 cluster=cluster,
687 |             )
688 |         except Exception as e:
689 |             raise Exception(
690 |                 "Failed to install {}."
691 |                 .format(type(service).__name__)
692 |             ) from e
693 | 
694 | 
695 | def provision_cluster(
696 |         *,
697 |         cluster: FlintrockCluster,
698 |         java_version: int,
699 |         services: list,
700 |         user: str,
701 |         identity_file: str):
702 |     """
703 |     Connect to a freshly launched cluster and install the specified services.
704 |     """
705 |     partial_func = functools.partial(
706 |         provision_node,
707 |         java_version=java_version,
708 |         services=services,
709 |         user=user,
710 |         identity_file=identity_file,
711 |         cluster=cluster)
712 |     hosts = [cluster.master_ip] + cluster.slave_ips
713 | 
714 |     run_against_hosts(partial_func=partial_func, hosts=hosts)
715 | 
716 |     master_ssh_client = get_ssh_client(
717 |         user=user,
718 |         host=cluster.master_ip,
719 |         identity_file=identity_file)
720 | 
721 |     with master_ssh_client:
722 |         manifest = {
723 |             'java_version': java_version,
724 |             'services': [[type(m).__name__, m.manifest] for m in services],
725 |             'ssh_key_pair': cluster.ssh_key_pair._asdict(),
726 |         }
727 |         # The manifest tells us how the cluster is configured. We'll need this
728 |         # when we resize the cluster or restart it.
729 |         ssh_check_output(
730 |             client=master_ssh_client,
731 |             command="""
732 |                 echo {m} > "$HOME/.flintrock-manifest.json"
733 |                 chmod go-rw "$HOME/.flintrock-manifest.json"
734 |             """.format(
735 |                 m=shlex.quote(json.dumps(manifest, indent=4, sort_keys=True))
736 |             ))
737 | 
738 |         for service in services:
739 |             service.configure_master(
740 |                 ssh_client=master_ssh_client,
741 |                 cluster=cluster)
742 | 
743 |     for service in services:
744 |         service.health_check(master_host=cluster.master_ip)
745 | 
746 | 
747 | def provision_node(
748 |         *,
749 |         java_version: int,
750 |         services: list,
751 |         user: str,
752 |         host: str,
753 |         identity_file: str,
754 |         cluster: FlintrockCluster):
755 |     """
756 |     Connect to a freshly launched node, set it up for SSH access, configure ephemeral
757 |     storage, and install the specified services.
758 | 
759 |     This method is role-agnostic; it runs on both the cluster master and slaves.
760 |     This method is meant to be called asynchronously.
761 |     """
762 |     client = get_ssh_client(
763 |         user=user,
764 |         host=host,
765 |         identity_file=identity_file,
766 |         wait=True)
767 | 
768 |     with client:
769 |         setup_node(
770 |             ssh_client=client,
771 |             services=services,
772 |             java_version=java_version,
773 |             cluster=cluster)
774 |         for service in services:
775 |             service.configure(
776 |                 ssh_client=client,
777 |                 cluster=cluster)
778 | 
779 | 
780 | def start_node(
781 |         *,
782 |         services: list,
783 |         user: str,
784 |         host: str,
785 |         identity_file: str,
786 |         cluster: FlintrockCluster):
787 |     """
788 |     Connect to an existing node that has just been started up again and prepare it for
789 |     work.
790 | 
791 |     This method is role-agnostic; it runs on both the cluster master and slaves.
792 |     This method is meant to be called asynchronously.
793 |     """
794 |     ssh_client = get_ssh_client(
795 |         user=user,
796 |         host=host,
797 |         identity_file=identity_file,
798 |         wait=True)
799 | 
800 |     with ssh_client:
801 |         # TODO: Consider consolidating ephemeral storage code under a dedicated
802 |         #       Flintrock service.
803 |         if cluster.storage_dirs.ephemeral:
804 |             ssh_check_output(
805 |                 client=ssh_client,
806 |                 command="""
807 |                     sudo chown "{u}:{u}" {d}
808 |                 """.format(
809 |                     u=user,
810 |                     d=' '.join(cluster.storage_dirs.ephemeral)))
811 | 
812 |         for service in services:
813 |             service.configure(
814 |                 ssh_client=ssh_client,
815 |                 cluster=cluster)
816 | 
817 | 
818 | def add_slaves_node(
819 |         *,
820 |         user: str,
821 |         host: str,
822 |         identity_file: str,
823 |         java_version: int,
824 |         services: list,
825 |         cluster: FlintrockCluster,
826 |         new_hosts: list):
827 |     """
828 |     If the node is new, set it up. If not, just reconfigure it to recognize
829 |     the newly added nodes.
830 | 
831 |     This method is role-agnostic; it runs on both the cluster master and slaves.
832 |     This method is meant to be called asynchronously.
833 |     """
834 |     is_new_host = host in new_hosts
835 | 
836 |     client = get_ssh_client(
837 |         user=user,
838 |         host=host,
839 |         identity_file=identity_file,
840 |         wait=is_new_host)
841 | 
842 |     with client:
843 |         if is_new_host:
844 |             setup_node(
845 |                 ssh_client=client,
846 |                 services=services,
847 |                 java_version=java_version,
848 |                 cluster=cluster)
849 | 
850 |         for service in services:
851 |             service.configure(
852 |                 ssh_client=client,
853 |                 cluster=cluster)
854 | 
855 | 
856 | def remove_slaves_node(
857 |         *,
858 |         user: str,
859 |         host: str,
860 |         identity_file: str,
861 |         services: list,
862 |         cluster: FlintrockCluster):
863 |     """
864 |     Update the services on a node to remove the provided slaves.
865 | 
866 |     This method is role-agnostic; it runs on both the cluster master and slaves.
867 |     This method is meant to be called asynchronously.
868 |     """
869 |     ssh_client = get_ssh_client(
870 |         user=user,
871 |         host=host,
872 |         identity_file=identity_file)
873 | 
874 |     for service in services:
875 |         service.configure(
876 |             ssh_client=ssh_client,
877 |             cluster=cluster)
878 | 
879 | 
880 | def run_command_node(*, user: str, host: str, identity_file: str, command: tuple):
881 |     """
882 |     Run a shell command on a node.
883 | 
884 |     This method is role-agnostic; it runs on both the cluster master and slaves.
885 |     This method is meant to be called asynchronously.
886 |     """
887 |     ssh_client = get_ssh_client(
888 |         user=user,
889 |         host=host,
890 |         identity_file=identity_file)
891 | 
892 |     logger.info("[{h}] Running command...".format(h=host))
893 | 
894 |     command_str = ' '.join(command)
895 | 
896 |     with ssh_client:
897 |         ssh_check_output(
898 |             client=ssh_client,
899 |             command=command_str)
900 | 
901 |     logger.info("[{h}] Command complete.".format(h=host))
902 | 
903 | 
904 | def copy_file_node(
905 |         *,
906 |         user: str,
907 |         host: str,
908 |         identity_file: str,
909 |         local_path: str,
910 |         remote_path: str):
911 |     """
912 |     Copy a file to the specified remote path on a node.
913 | 
914 |     This method is role-agnostic; it runs on both the cluster master and slaves.
915 |     This method is meant to be called asynchronously.
916 |     """
917 |     ssh_client = get_ssh_client(
918 |         user=user,
919 |         host=host,
920 |         identity_file=identity_file)
921 | 
922 |     with ssh_client:
923 |         remote_dir = posixpath.dirname(remote_path)
924 | 
925 |         try:
926 |             ssh_check_output(
927 |                 client=ssh_client,
928 |                 command="""
929 |                     test -d {path}
930 |                 """.format(path=shlex.quote(remote_dir)))
931 |         except Exception as e:
932 |             # TODO: Catch more specific exception.
933 |             raise Exception("Remote directory does not exist: {d}".format(d=remote_dir))
934 | 
935 |         with ssh_client.open_sftp() as sftp:
936 |             logger.info("[{h}] Copying file...".format(h=host))
937 | 
938 |             sftp.put(localpath=local_path, remotepath=remote_path)
939 | 
940 |             logger.info("[{h}] Copy complete.".format(h=host))
941 | 
942 | 
943 | # This is necessary down here since we have a circular import dependency between
944 | # core.py and services.py. I've thought about how to remove this circular dependency,
945 | # but for now this seems like what we need to go with.
946 | # Flintrock modules
947 | from .services import HDFS, Spark  # Used by start_cluster() # noqa
948 | 


--------------------------------------------------------------------------------
/flintrock/exceptions.py:
--------------------------------------------------------------------------------
 1 | class NothingToDo(Exception):
 2 |     pass
 3 | 
 4 | 
 5 | class UsageError(Exception):
 6 |     pass
 7 | 
 8 | 
 9 | class UnsupportedProviderError(UsageError):
10 |     def __init__(self, provider: str):
11 |         super().__init__(
12 |             "This provider is not supported: {p}".format(p=provider))
13 |         self.provider = provider
14 | 
15 | 
16 | class Error(Exception):
17 |     pass
18 | 
19 | 
20 | class ClusterNotFound(Error):
21 |     pass
22 | 
23 | 
24 | class ClusterAlreadyExists(Error):
25 |     pass
26 | 
27 | 
28 | class ClusterInvalidState(Error):
29 |     def __init__(self, *, attempted_command: str, state: str):
30 |         super().__init__(
31 |             "Cluster is in state '{s}'. Cannot execute {c}.".format(
32 |                 c=attempted_command,
33 |                 s=state))
34 |         self.attempted_command = attempted_command
35 |         self.state = state
36 | 
37 | 
38 | class SSHError(Error):
39 |     def __init__(self, *, host: str, message: str):
40 |         super().__init__(
41 |             "[{h}] {m}".format(h=host, m=message))
42 |         self.host = host
43 |         self.message = message
44 | 
45 | 
46 | class InterruptedEC2Operation(Error):
47 |     def __init__(self, *, instances: list):
48 |         super().__init__(
49 |             "Operation aborted."
50 |         )
51 |         self.instances = instances
52 | 


--------------------------------------------------------------------------------
/flintrock/scripts/adoptium.repo:
--------------------------------------------------------------------------------
1 | # Source: https://adoptium.net/installation/linux/#_centosrhelfedora_instructions
2 | 
3 | [Adoptium]
4 | name=Adoptium
5 | baseurl=https://packages.adoptium.net/artifactory/rpm/amazonlinux/$releasever/$basearch
6 | enabled=1
7 | gpgcheck=1
8 | gpgkey=https://packages.adoptium.net/artifactory/api/gpg/key/public
9 | 


--------------------------------------------------------------------------------
/flintrock/scripts/download-package.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import argparse
 4 | import errno
 5 | import os.path
 6 | import sys
 7 | import subprocess
 8 | import time
 9 | 
10 | MAX_TRIES = 5
11 | 
12 | 
13 | def parse_args():
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('url')
16 |     parser.add_argument('destination_dir')
17 |     args = parser.parse_args()
18 |     return (args.url, args.destination_dir)
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     url, destination_dir = parse_args()
23 | 
24 |     try:
25 |         os.makedirs(destination_dir, mode=0o755)
26 |     except OSError as e:
27 |         if e.errno == errno.EEXIST:
28 |             pass
29 |         else:
30 |             raise
31 | 
32 |     download_path = '{}.download'.format(os.path.basename(destination_dir))
33 | 
34 |     tries = 0
35 |     while True:
36 |         try:
37 |             if url.startswith('s3://'):
38 |                 subprocess.check_call(['aws', 's3', 'cp', url, download_path])
39 |             else:
40 |                 subprocess.check_call(['curl', '--location', '--output', download_path, url])
41 |             subprocess.check_call(['gzip', '--test', download_path])
42 |             subprocess.check_call(['tar', 'xzf', download_path, '-C', destination_dir, '--strip-components=1'])
43 |             subprocess.check_call(['rm', download_path])
44 |         except subprocess.CalledProcessError as e:
45 |             print(e, file=sys.stderr)
46 |             if tries < MAX_TRIES:
47 |                 tries += 1
48 |                 time.sleep(1)
49 |             else:
50 |                 print(
51 |                     "Failed to download and unpack '{url}' after {tries} tries."
52 |                     .format(
53 |                         url=url,
54 |                         tries=MAX_TRIES,
55 |                     ),
56 |                     file=sys.stderr,
57 |                 )
58 |                 sys.exit(1)
59 |         else:
60 |             break
61 | 


--------------------------------------------------------------------------------
/flintrock/scripts/setup-ephemeral-storage.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Setup ephemeral storage on a newly launched Linux host.
  3 | 
  4 | This script was developed against EC2, where ephemeral volumes are by
  5 | default haphazardly and inconsistently mounted. Therefore, we unmount
  6 | all volumes that we detect and explicitly format and remount them as
  7 | we please.
  8 | 
  9 | The resulting structure we create is as follows:
 10 | 
 11 |     /media
 12 |         /root: The instance's root volume.
 13 |         /ephemeral[0-N]: Instance store volumes.
 14 |         /persistent[0-N]: EBS volumes.
 15 |         /tmp: A temporary directory with lots of space.
 16 | 
 17 | WARNING: Be conscious about what this script prints to stdout, as that
 18 |          output is parsed by Flintrock.
 19 | """
 20 | from __future__ import print_function
 21 | from __future__ import unicode_literals
 22 | 
 23 | import json
 24 | import platform
 25 | import subprocess
 26 | import sys
 27 | 
 28 | from collections import namedtuple
 29 | 
 30 | # Taken from: http://man7.org/linux/man-pages/man5/fstab.5.html
 31 | Mount = namedtuple(
 32 |     'Mount', [
 33 |         'device_name',
 34 |         'mount_point',
 35 |         'filesystem_type',
 36 |         'mount_options',
 37 |         'dump',
 38 |         'pass_number'
 39 |     ])
 40 | 
 41 | BlockDevice = namedtuple(
 42 |     'BlockDevice', sorted([
 43 |         'kname',
 44 |         'mountpoint',
 45 |         'size',
 46 |     ]))
 47 | BlockDevice.__new__.__defaults__ = (None, ) * len(BlockDevice._fields)
 48 | 
 49 | 
 50 | def device_pairs_to_tuple(pairs):
 51 |     device_dict = {}
 52 |     for pair in pairs:
 53 |         key, value = pair.split('=')
 54 |         key = key.lower()
 55 |         value = value.strip('"').lower()
 56 |         device_dict.update({key: value})
 57 |     return BlockDevice(**device_dict)
 58 | 
 59 | 
 60 | def get_non_root_block_devices():
 61 |     """
 62 |     Get all the non-root block devices available to the host.
 63 | 
 64 |     These are the devices we're going to format and mount for use.
 65 |     """
 66 |     block_devices_raw = subprocess.check_output([
 67 |         'lsblk',
 68 |         '--ascii',
 69 |         '--pairs',
 70 |         '--bytes',
 71 |         '--paths',
 72 |         '--output', 'KNAME,MOUNTPOINT,SIZE',
 73 |         # --inverse and --nodeps make sure that
 74 |         #   1) we get the mount points for devices that have holder devices
 75 |         #   2) we don't get the holder devices themselves
 76 |         '--inverse',
 77 |         '--nodeps',
 78 |         '--noheadings',
 79 |     ]).decode('utf-8')
 80 |     block_devices = [
 81 |         device_pairs_to_tuple(line.split())
 82 |         for line in block_devices_raw.splitlines()
 83 |     ]
 84 |     non_root_block_devices = [
 85 |         device for device in block_devices
 86 |         if device.mountpoint != '/'
 87 |     ]
 88 |     # Skip tiny devices, like the 1M devices that show up on
 89 |     # m5 instances on EC2.
 90 |     # See: https://github.com/nchammas/flintrock/issues/256
 91 |     non_trivial_non_root_block_devices = [
 92 |         device for device in non_root_block_devices
 93 |         if int(device.size) >= 1024 ** 3
 94 |     ]
 95 |     return non_trivial_non_root_block_devices
 96 | 
 97 | 
 98 | def unmount_devices(devices):
 99 |     """
100 |     Unmount the provided devices.
101 |     """
102 |     with open('/proc/mounts') as m:
103 |         mounts = [Mount(*line.split()) for line in m.read().splitlines()]
104 | 
105 |     for mount in mounts:
106 |         if mount.device_name in [d.kname for d in devices]:
107 |             subprocess.check_output(['sudo', 'umount', mount.device_name])
108 | 
109 | 
110 | def format_devices(devices):
111 |     """
112 |     Create an ext4 filesystem on the provided devices.
113 |     """
114 |     format_processes = []
115 |     for device in devices:
116 |         p = subprocess.Popen([
117 |             'sudo', 'mkfs.ext4',
118 |             '-F',
119 |             '-E',
120 |             'lazy_itable_init=0,lazy_journal_init=0',
121 |             device.kname],
122 |             stdout=subprocess.PIPE,
123 |             stderr=subprocess.PIPE)
124 |         format_processes.append(p)
125 | 
126 |     for p in format_processes:
127 |         stdout_raw, stderr_raw = p.communicate()
128 |         stdout, stderr = stdout_raw.decode('utf-8'), stderr_raw.decode('utf-8')  # noqa
129 |         return_code = p.returncode
130 |         if return_code != 0:
131 |             raise Exception(
132 |                 "Format process returned non-zero exit code: {code}\n{error}"
133 |                 .format(
134 |                     code=return_code,
135 |                     error=stderr))
136 | 
137 | 
138 | def mount_devices(devices):
139 |     """
140 |     Mount the provided devices at the provided mount points.
141 | 
142 |     Additionally, add the appropriate entries to /etc/fstab so that the mounts
143 |     persist across cluster stop/start.
144 |     """
145 |     for device in devices:
146 |         subprocess.check_output([
147 |             'sudo', 'mkdir', '-p', device.mountpoint])
148 | 
149 |         # Replace any existing fstab entries with our own.
150 |         subprocess.check_output(
151 |             """
152 |             grep -v -e "^{device_name}" /etc/fstab | sudo tee /etc/fstab
153 |             """.format(device_name=device.kname),
154 |             shell=True)
155 |         subprocess.check_output(
156 |             """
157 |             echo "{fstab_entry}" | sudo tee -a /etc/fstab
158 |             """.format(fstab_entry='   '.join([
159 |                 device.kname,
160 |                 device.mountpoint,
161 |                 'ext4',
162 |                 'defaults,users,noatime',
163 |                 '0',
164 |                 '0'])),
165 |             shell=True)
166 | 
167 |         subprocess.check_output([
168 |             'sudo', 'mount', '--source', device.kname])
169 |         # NOTE: `mount` changes the mount point owner to root, so we have
170 |         #       to set it to what we want here, after `mount` runs.
171 |         subprocess.check_output(
172 |             'sudo chown "$(logname):$(logname)" {m}'.format(m=device.mountpoint),
173 |             shell=True)
174 | 
175 | 
176 | def create_root_dir():
177 |     """
178 |     Create a folder that services like HDFS and Spark can refer to to access
179 |     local storage on the root volume.
180 |     """
181 |     path = '/media/root'
182 |     subprocess.check_output([
183 |         'sudo', 'mkdir', '-p', path])
184 |     subprocess.check_output(
185 |         'sudo chown "$(logname):$(logname)" {p}'.format(p=path),
186 |         shell=True)
187 |     return path
188 | 
189 | 
190 | def create_tmp_dir(target):
191 |     """
192 |     Create a folder that services can use as a temporary directory for big files.
193 |     """
194 |     path = '/media/tmp'
195 |     subprocess.check_output([
196 |         'sudo', 'ln', '-s', target, path])
197 |     subprocess.check_output(
198 |         'sudo chown "$(logname):$(logname)" {p}'.format(p=path),
199 |         shell=True)
200 |     return path
201 | 
202 | 
203 | if __name__ == '__main__':
204 |     if sys.version_info < (2, 7) or ((3, 0) <= sys.version_info < (3, 4)):
205 |         raise Exception(
206 |             "This script is only supported on Python 2.7+ and 3.4+. "
207 |             "You are running Python {v}.".format(v=platform.python_version()))
208 | 
209 |     non_root_block_devices = get_non_root_block_devices()
210 | 
211 |     # NOTE: For now we are assuming that all non-root devices are ephemeral devices.
212 |     #       We're going to assign them the mount points we want them to have once we're
213 |     #       done with the unmount -> format -> mount cycle.
214 |     ephemeral_devices = []
215 |     for (num, device) in enumerate(sorted(non_root_block_devices, key=lambda d: d.kname)):
216 |         ephemeral_devices.append(
217 |             BlockDevice(
218 |                 kname=device.kname,
219 |                 mountpoint='/media/ephemeral' + str(num)))
220 | 
221 |     unmount_devices(ephemeral_devices)
222 |     format_devices(ephemeral_devices)
223 |     mount_devices(ephemeral_devices)
224 | 
225 |     root_dir = create_root_dir()
226 |     if ephemeral_devices:
227 |         tmp_dir = ephemeral_devices[0].mountpoint
228 |     else:
229 |         tmp_dir = '/tmp'
230 |     create_tmp_dir(tmp_dir)
231 | 
232 |     print(json.dumps(
233 |         {
234 |             'root': root_dir,
235 |             'ephemeral': [d.mountpoint for d in ephemeral_devices]
236 |         }))
237 | 


--------------------------------------------------------------------------------
/flintrock/services.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import shlex
  4 | import socket
  5 | import sys
  6 | import urllib.error
  7 | import urllib.request
  8 | import logging
  9 | 
 10 | # External modules
 11 | import paramiko
 12 | 
 13 | # Flintrock modules
 14 | from .core import (
 15 |     FlintrockCluster,
 16 |     generate_template_mapping,
 17 |     get_formatted_template,
 18 | )
 19 | from .ssh import ssh_check_output
 20 | from .util import spark_hadoop_build_version
 21 | 
 22 | FROZEN = getattr(sys, 'frozen', False)
 23 | 
 24 | if FROZEN:
 25 |     THIS_DIR = sys._MEIPASS
 26 | else:
 27 |     THIS_DIR = os.path.dirname(os.path.realpath(__file__))
 28 | 
 29 | SCRIPTS_DIR = os.path.join(THIS_DIR, 'scripts')
 30 | 
 31 | 
 32 | logger = logging.getLogger('flintrock.services')
 33 | 
 34 | 
 35 | # TODO: Move this back to ec2.py. EC2-specific login should not live here.
 36 | class SecurityGroupRule:
 37 |     def __init__(
 38 |         self,
 39 |         ip_protocol,
 40 |         from_port,
 41 |         to_port,
 42 |         src_group=None,
 43 |         cidr_ip=None,
 44 |     ):
 45 |         if src_group and cidr_ip:
 46 |             raise ValueError(
 47 |                 "src_group and cidr_ip are mutually exclusive. Specify one or the other. "
 48 |                 "See: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.SecurityGroup.authorize_ingress"
 49 |             )
 50 | 
 51 |         if not src_group and not cidr_ip:
 52 |             raise ValueError("One of src_group or cidr_ip must be specified.")
 53 | 
 54 |         self.ip_protocol = ip_protocol
 55 |         self.from_port = from_port
 56 |         self.to_port = to_port
 57 |         # We set the default values to empty string so calls to boto3 accept unset parameters.
 58 |         # See: https://github.com/boto/boto3/issues/331
 59 |         self.src_group = src_group if src_group else ''
 60 |         self.cidr_ip = cidr_ip if cidr_ip else ''
 61 | 
 62 |     def __str__(self):
 63 |         return str(vars(self))
 64 | 
 65 | 
 66 | class FlintrockService:
 67 |     """
 68 |     This is an abstract class. Implementations of this class capture all the logic
 69 |     required to fully install and manage services like Spark on Flintrock clusters.
 70 |     """
 71 | 
 72 |     def __init__(self):
 73 |         """
 74 |         This is the only method signature that implementations don't have to follow.
 75 |         Use this method to set properties like the service version or download source
 76 |         which the rest of the methods here will need to do their work.
 77 |         """
 78 |         raise NotImplementedError
 79 | 
 80 |     def install(
 81 |             self,
 82 |             ssh_client: paramiko.client.SSHClient,
 83 |             cluster: FlintrockCluster):
 84 |         """
 85 |         Install the service on a node via the provided SSH client. This typically
 86 |         means downloading a software package and maybe even building it if necessary.
 87 | 
 88 |         This method is role-agnostic; it runs on both the cluster master and slaves.
 89 |         This method is meant to be called asynchronously.
 90 |         """
 91 |         raise NotImplementedError
 92 | 
 93 |     def configure(
 94 |             self,
 95 |             ssh_client: paramiko.client.SSHClient,
 96 |             cluster: FlintrockCluster):
 97 |         """
 98 |         Configure the installed service on a node via the provided SSH client. This
 99 |         typically means using templates to create configuration files on the node.
100 | 
101 |         This method is role-agnostic; it runs on both the cluster master and slaves.
102 |         This method is meant to be called asynchronously.
103 |         """
104 |         raise NotImplementedError
105 | 
106 |     def configure_master(
107 |             self,
108 |             ssh_client: paramiko.client.SSHClient,
109 |             cluster: FlintrockCluster):
110 |         """
111 |         Configure the service master on a node via the provided SSH client after the
112 |         role-agnostic configuration in configure() is complete. Start the master and
113 |         slaves.
114 | 
115 |         This method is meant to be called once on the cluster master.
116 |         This method is meant to be called asynchronously.
117 |         """
118 |         raise NotImplementedError
119 | 
120 |     def configure_slave(
121 |             self,
122 |             ssh_client: paramiko.client.SSHClient,
123 |             cluster: FlintrockCluster):
124 |         """
125 |         Configure a service slave on a node via the provided SSH client after the
126 |         role-agnostic configuration in configure() is complete.
127 | 
128 |         This method is meant to be called once on each cluster slave.
129 |         This method is meant to be called asynchronously.
130 |         """
131 |         raise NotImplementedError
132 | 
133 |     def health_check(
134 |             self,
135 |             master_host: str):
136 |         """
137 |         Check that the service is up and running by querying the cluster master.
138 |         """
139 |         raise NotImplementedError
140 | 
141 |     def get_security_group_rules(self, flintrock_client_cidr: str, flintrock_client_group: str):
142 |         """
143 |         Return the EC2 SecurityGroupRules required by this service.
144 |         """
145 |         raise NotImplementedError
146 | 
147 | 
148 | class HDFS(FlintrockService):
149 |     def __init__(self, *, version, download_source):
150 |         self.version = version
151 |         self.download_source = download_source
152 |         self.name_node_ui_port = 50070 if version < '3.0' else 9870
153 |         self.manifest = {'version': version, 'download_source': download_source}
154 | 
155 |     def install(
156 |         self,
157 |         ssh_client: paramiko.client.SSHClient,
158 |         cluster: FlintrockCluster,
159 |     ):
160 |         logger.info(
161 |             "[{h}] Installing HDFS..."
162 |             .format(h=ssh_client.get_transport().getpeername()[0])
163 |         )
164 | 
165 |         with ssh_client.open_sftp() as sftp:
166 |             sftp.put(
167 |                 localpath=os.path.join(SCRIPTS_DIR, 'download-package.py'),
168 |                 remotepath='/tmp/download-package.py')
169 | 
170 |         logger.debug(
171 |             "[{h}] Downloading Hadoop from: {s}"
172 |             .format(
173 |                 h=ssh_client.get_transport().getpeername()[0],
174 |                 s=self.download_source,
175 |             )
176 |         )
177 | 
178 |         ssh_check_output(
179 |             client=ssh_client,
180 |             command="""
181 |                 set -e
182 | 
183 |                 python /tmp/download-package.py "{download_source}" "hadoop"
184 | 
185 |                 for f in $(find hadoop/bin -type f -executable -not -name '*.cmd'); do
186 |                     sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)"
187 |                 done
188 | 
189 |                 echo "export HADOOP_LIBEXEC_DIR='$(pwd)/hadoop/libexec'" >> .bashrc
190 |             """.format(
191 |                 # version=self.version,
192 |                 download_source=self.download_source.format(v=self.version),
193 |             ))
194 | 
195 |     def configure(
196 |             self,
197 |             ssh_client: paramiko.client.SSHClient,
198 |             cluster: FlintrockCluster):
199 |         # TODO: os.walk() through these files.
200 |         template_paths = [
201 |             'hadoop/conf/masters',
202 |             'hadoop/conf/slaves',
203 |             'hadoop/conf/hadoop-env.sh',
204 |             'hadoop/conf/core-site.xml',
205 |             'hadoop/conf/hdfs-site.xml',
206 |         ]
207 | 
208 |         ssh_check_output(
209 |             client=ssh_client,
210 |             command="mkdir -p hadoop/conf",
211 |         )
212 | 
213 |         for template_path in template_paths:
214 |             ssh_check_output(
215 |                 client=ssh_client,
216 |                 command="""
217 |                     echo {f} > {p}
218 |                 """.format(
219 |                     f=shlex.quote(
220 |                         get_formatted_template(
221 |                             path=os.path.join(THIS_DIR, "templates", template_path),
222 |                             mapping=generate_template_mapping(
223 |                                 cluster=cluster,
224 |                                 hadoop_version=self.version,
225 |                                 # Hadoop doesn't need to know what
226 |                                 # Spark version we're using.
227 |                                 spark_version='',
228 |                                 spark_executor_instances=0,
229 |                             ))),
230 |                     p=shlex.quote(template_path)))
231 | 
232 |     # TODO: Convert this into start_master() and split master- or slave-specific
233 |     #       stuff out of configure() into configure_master() and configure_slave().
234 |     def configure_master(
235 |             self,
236 |             ssh_client: paramiko.client.SSHClient,
237 |             cluster: FlintrockCluster):
238 |         host = ssh_client.get_transport().getpeername()[0]
239 |         logger.info("[{h}] Configuring HDFS master...".format(h=host))
240 | 
241 |         ssh_check_output(
242 |             client=ssh_client,
243 |             command="""
244 |                 # `|| true` because on cluster restart this command will fail.
245 |                 ./hadoop/bin/hdfs namenode -format -nonInteractive || true
246 |             """)
247 | 
248 |         # This loop is a band-aid for: https://github.com/nchammas/flintrock/issues/157
249 |         attempt_limit = 3
250 |         for attempt in range(attempt_limit):
251 |             try:
252 |                 ssh_check_output(
253 |                     client=ssh_client,
254 |                     command="""
255 |                         ./hadoop/sbin/stop-dfs.sh
256 |                         ./hadoop/sbin/start-dfs.sh
257 | 
258 |                         master_ui_response_code=0
259 |                         while [ "$master_ui_response_code" -ne 200 ]; do
260 |                             sleep 1
261 |                             master_ui_response_code="$(
262 |                                 curl \
263 |                                     --location --head --silent \
264 |                                     --output /dev/null \
265 |                                     --write-out "%{{http_code}}" \
266 |                                     {m}:{p}
267 |                             )"
268 |                         done
269 |                     """.format(m=shlex.quote(cluster.master_private_host), p=self.name_node_ui_port),
270 |                     timeout_seconds=90
271 |                 )
272 |                 break
273 |             except socket.timeout as e:
274 |                 logger.debug(
275 |                     "Timed out waiting for HDFS master to come up.{}"
276 |                     .format(" Trying again..." if attempt < attempt_limit - 1 else "")
277 |                 )
278 |         else:
279 |             raise Exception("Time out waiting for HDFS master to come up.")
280 | 
281 |     def health_check(self, master_host: str):
282 |         # This info is not helpful as a detailed health check, but it gives us
283 |         # an up / not up signal.
284 |         hdfs_master_ui = 'http://{m}:{p}/webhdfs/v1/?op=GETCONTENTSUMMARY'.format(m=master_host, p=self.name_node_ui_port)
285 | 
286 |         try:
287 |             json.loads(
288 |                 urllib.request
289 |                 .urlopen(hdfs_master_ui)
290 |                 .read()
291 |                 .decode('utf-8'))
292 |             logger.info("HDFS online.")
293 |         except Exception as e:
294 |             raise Exception("HDFS health check failed.") from e
295 | 
296 |     def get_security_group_rules(self, flintrock_client_cidr: str=None, flintrock_client_group: str=None):
297 |         return [
298 |             SecurityGroupRule(
299 |                 ip_protocol='tcp',
300 |                 from_port=self.name_node_ui_port,
301 |                 to_port=self.name_node_ui_port,
302 |                 cidr_ip=flintrock_client_cidr,
303 |                 src_group=flintrock_client_group,
304 |             )
305 |         ]
306 | 
307 | 
308 | class Spark(FlintrockService):
309 |     def __init__(
310 |         self,
311 |         *,
312 |         spark_executor_instances: int,
313 |         version: str=None,
314 |         hadoop_version: str,
315 |         download_source: str=None,
316 |         git_commit: str=None,
317 |         git_repository: str=None
318 |     ):
319 |         # TODO: Convert these checks into something that throws a proper exception.
320 |         #       Perhaps reuse logic from CLI.
321 |         assert bool(version) ^ bool(git_commit)
322 |         if git_commit:
323 |             assert git_repository
324 | 
325 |         self.spark_executor_instances = spark_executor_instances
326 |         self.version = version
327 |         self.hadoop_version = hadoop_version
328 |         self.download_source = download_source
329 |         self.git_commit = git_commit
330 |         self.git_repository = git_repository
331 | 
332 |         self.manifest = {
333 |             'version': version,
334 |             'spark_executor_instances': spark_executor_instances,
335 |             'hadoop_version': hadoop_version,
336 |             'download_source': download_source,
337 |             'git_commit': git_commit,
338 |             'git_repository': git_repository}
339 | 
340 |     def install(
341 |         self,
342 |         ssh_client: paramiko.client.SSHClient,
343 |         cluster: FlintrockCluster,
344 |     ):
345 |         logger.info(
346 |             "[{h}] Installing Spark..."
347 |             .format(h=ssh_client.get_transport().getpeername()[0])
348 |         )
349 | 
350 |         if self.version:
351 |             with ssh_client.open_sftp() as sftp:
352 |                 sftp.put(
353 |                     localpath=os.path.join(SCRIPTS_DIR, 'download-package.py'),
354 |                     remotepath='/tmp/download-package.py')
355 | 
356 |             logger.debug(
357 |                 "[{h}] Downloading Spark from: {s}"
358 |                 .format(
359 |                     h=ssh_client.get_transport().getpeername()[0],
360 |                     s=self.download_source,
361 |                 )
362 |             )
363 | 
364 |             ssh_check_output(
365 |                 client=ssh_client,
366 |                 command="""
367 |                     python /tmp/download-package.py "{download_source}" "spark"
368 |                 """.format(
369 |                     # version=self.version,
370 |                     download_source=self.download_source.format(v=self.version),
371 |                 ))
372 |         else:
373 |             ssh_check_output(
374 |                 client=ssh_client,
375 |                 command="""
376 |                     set -e
377 |                     sudo yum install -y git
378 |                     sudo yum install -y java-devel
379 |                     """)
380 | 
381 |             logger.debug(
382 |                 "[{h}] Cloning Spark at {c} from: {s}"
383 |                 .format(
384 |                     h=ssh_client.get_transport().getpeername()[0],
385 |                     c=self.git_commit,
386 |                     s=self.git_repository,
387 |                 )
388 |             )
389 | 
390 |             ssh_check_output(
391 |                 client=ssh_client,
392 |                 command="""
393 |                     set -e
394 |                     git clone {repo} spark
395 |                     cd spark
396 |                     git reset --hard {commit}
397 |                     if [ -e "make-distribution.sh" ]; then
398 |                         ./make-distribution.sh -Phadoop-{hadoop_short_version}
399 |                     else
400 |                         ./dev/make-distribution.sh -Phadoop-{hadoop_short_version}
401 |                     fi
402 |                 """.format(
403 |                     repo=shlex.quote(self.git_repository),
404 |                     commit=shlex.quote(self.git_commit),
405 |                     hadoop_short_version=spark_hadoop_build_version(self.hadoop_version),
406 |                 ))
407 |         ssh_check_output(
408 |             client=ssh_client,
409 |             command="""
410 |                 set -e
411 |                 for f in $(find spark/bin -type f -executable -not -name '*.cmd'); do
412 |                     sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)"
413 |                 done
414 |                 echo "export SPARK_HOME='$(pwd)/spark'" >> .bashrc
415 |             """)
416 | 
417 |     def configure(
418 |             self,
419 |             ssh_client: paramiko.client.SSHClient,
420 |             cluster: FlintrockCluster):
421 | 
422 |         template_paths = [
423 |             'spark/conf/spark-env.sh',
424 |             'spark/conf/slaves',
425 |         ]
426 | 
427 |         ssh_check_output(
428 |             client=ssh_client,
429 |             command="mkdir -p spark/conf",
430 |         )
431 | 
432 |         for template_path in template_paths:
433 |             ssh_check_output(
434 |                 client=ssh_client,
435 |                 command="""
436 |                     echo {f} > {p}
437 |                 """.format(
438 |                     f=shlex.quote(
439 |                         get_formatted_template(
440 |                             path=os.path.join(THIS_DIR, "templates", template_path),
441 |                             mapping=generate_template_mapping(
442 |                                 cluster=cluster,
443 |                                 spark_executor_instances=self.spark_executor_instances,
444 |                                 hadoop_version=self.hadoop_version,
445 |                                 spark_version=self.version or self.git_commit,
446 |                             ))),
447 |                     p=shlex.quote(template_path)))
448 | 
449 |     # TODO: Convert this into start_master() and split master- or slave-specific
450 |     #       stuff out of configure() into configure_master() and configure_slave().
451 |     #       start_slave() can block until slave is fully up; that way we don't need
452 |     #       a sleep() before starting the master.
453 |     def configure_master(
454 |             self,
455 |             ssh_client: paramiko.client.SSHClient,
456 |             cluster: FlintrockCluster):
457 |         host = ssh_client.get_transport().getpeername()[0]
458 |         logger.info("[{h}] Configuring Spark master...".format(h=host))
459 | 
460 |         # This loop is a band-aid for: https://github.com/nchammas/flintrock/issues/129
461 |         attempt_limit = 3
462 |         for attempt in range(attempt_limit):
463 |             try:
464 |                 ssh_check_output(
465 |                     client=ssh_client,
466 |                     # Maybe move this shell script out to some separate
467 |                     # file/folder for the Spark service.
468 |                     command="""
469 |                         spark/sbin/start-all.sh
470 | 
471 |                         master_ui_response_code=0
472 |                         while [ "$master_ui_response_code" -ne 200 ]; do
473 |                             sleep 1
474 |                             master_ui_response_code="$(
475 |                                 curl --head --silent --output /dev/null \
476 |                                     --write-out "%{{http_code}}" {m}:8080
477 |                             )"
478 |                         done
479 |                     """.format(m=shlex.quote(cluster.master_private_host)),
480 |                     timeout_seconds=90
481 |                 )
482 |                 break
483 |             except socket.timeout as e:
484 |                 logger.debug(
485 |                     "Timed out waiting for Spark master to come up.{}"
486 |                     .format(" Trying again..." if attempt < attempt_limit - 1 else "")
487 |                 )
488 |         else:
489 |             raise Exception("Timed out waiting for Spark master to come up.")
490 | 
491 |     def health_check(self, master_host: str):
492 |         spark_master_ui = 'http://{m}:8080/json/'.format(m=master_host)
493 | 
494 |         try:
495 |             json.loads(
496 |                 urllib.request
497 |                 .urlopen(spark_master_ui)
498 |                 .read()
499 |                 .decode('utf-8')
500 |             )
501 |             # TODO: Don't print here. Return this and let the caller print.
502 |             logger.info("Spark online.")
503 |         except Exception as e:
504 |             # TODO: Catch a more specific problem known to be related to Spark not
505 |             #       being up; provide a slightly better error message, and don't
506 |             #       dump a large stack trace on the user.
507 |             raise Exception("Spark health check failed.") from e
508 | 
509 |     def get_security_group_rules(self, flintrock_client_cidr: str=None, flintrock_client_group: str=None):
510 |         return [
511 |             SecurityGroupRule(
512 |                 ip_protocol='tcp',
513 |                 from_port=8080,
514 |                 to_port=8081,
515 |                 cidr_ip=flintrock_client_cidr,
516 |                 src_group=flintrock_client_group,
517 |             ),
518 |             SecurityGroupRule(
519 |                 ip_protocol='tcp',
520 |                 from_port=4040,
521 |                 to_port=4050,
522 |                 cidr_ip=flintrock_client_cidr,
523 |                 src_group=flintrock_client_group,
524 |             ),
525 |             SecurityGroupRule(
526 |                 ip_protocol='tcp',
527 |                 from_port=7077,
528 |                 to_port=7077,
529 |                 cidr_ip=flintrock_client_cidr,
530 |                 src_group=flintrock_client_group,
531 |             ),
532 |             # Spark REST Server
533 |             SecurityGroupRule(
534 |                 ip_protocol='tcp',
535 |                 from_port=6066,
536 |                 to_port=6066,
537 |                 cidr_ip=flintrock_client_cidr,
538 |                 src_group=flintrock_client_group,
539 |             ),
540 |         ]
541 | 


--------------------------------------------------------------------------------
/flintrock/ssh.py:
--------------------------------------------------------------------------------
  1 | import errno
  2 | import os
  3 | import socket
  4 | import subprocess
  5 | import tempfile
  6 | import time
  7 | import logging
  8 | from collections import namedtuple
  9 | 
 10 | # External modules
 11 | import paramiko
 12 | 
 13 | # Flintrock modules
 14 | from .util import get_subprocess_env
 15 | from .exceptions import SSHError
 16 | 
 17 | SSHKeyPair = namedtuple('KeyPair', ['public', 'private'])
 18 | 
 19 | 
 20 | logger = logging.getLogger('flintrock.ssh')
 21 | 
 22 | 
 23 | def generate_ssh_key_pair() -> SSHKeyPair:
 24 |     """
 25 |     Generate an SSH key pair that the cluster can use for intra-cluster
 26 |     communication.
 27 |     """
 28 |     with tempfile.TemporaryDirectory() as tempdir:
 29 |         subprocess.check_call(
 30 |             [
 31 |                 'ssh-keygen',
 32 |                 '-q',
 33 |                 '-t', 'rsa',
 34 |                 '-N', '',
 35 |                 '-f', os.path.join(tempdir, 'flintrock_rsa'),
 36 |                 '-C', 'flintrock',
 37 |             ],
 38 |             env=get_subprocess_env(),
 39 |         )
 40 | 
 41 |         with open(file=os.path.join(tempdir, 'flintrock_rsa')) as private_key_file:
 42 |             private_key = private_key_file.read()
 43 | 
 44 |         with open(file=os.path.join(tempdir, 'flintrock_rsa.pub')) as public_key_file:
 45 |             public_key = public_key_file.read()
 46 | 
 47 |     return namedtuple('KeyPair', ['public', 'private'])(public_key, private_key)
 48 | 
 49 | 
 50 | def get_ssh_client(
 51 |         *,
 52 |         user: str,
 53 |         host: str,
 54 |         identity_file: str,
 55 |         wait: bool=False,
 56 |         print_status: bool=None) -> paramiko.client.SSHClient:
 57 |     """
 58 |     Get an SSH client for the provided host, waiting as necessary for SSH to become
 59 |     available.
 60 |     """
 61 |     if print_status is None:
 62 |         print_status = wait
 63 | 
 64 |     client = paramiko.client.SSHClient()
 65 | 
 66 |     client.load_system_host_keys()
 67 |     client.set_missing_host_key_policy(paramiko.client.AutoAddPolicy())
 68 | 
 69 |     if wait:
 70 |         tries = 100
 71 |     else:
 72 |         # It's greater than 1 as a band-aid for this issue:
 73 |         # https://github.com/nchammas/flintrock/issues/198
 74 |         tries = 3
 75 | 
 76 |     while tries > 0:
 77 |         try:
 78 |             tries -= 1
 79 |             client.connect(
 80 |                 username=user,
 81 |                 hostname=host,
 82 |                 key_filename=identity_file,
 83 |                 look_for_keys=False,
 84 |                 timeout=3)
 85 |             if print_status:
 86 |                 logger.info("[{h}] SSH online.".format(h=host))
 87 |             break
 88 |         except socket.timeout as e:
 89 |             logger.debug("[{h}] SSH timeout.".format(h=host))
 90 |             time.sleep(5)
 91 |         except paramiko.ssh_exception.NoValidConnectionsError as e:
 92 |             if any(error.errno != errno.ECONNREFUSED for error in e.errors.values()):
 93 |                 raise
 94 |             logger.debug("[{h}] SSH exception: {e}".format(h=host, e=e))
 95 |             time.sleep(5)
 96 |         # We get this exception during startup with CentOS but not Amazon Linux,
 97 |         # for some reason.
 98 |         except paramiko.ssh_exception.AuthenticationException as e:
 99 |             logger.debug("[{h}] SSH AuthenticationException.".format(h=host))
100 |             time.sleep(5)
101 |         except paramiko.ssh_exception.SSHException as e:
102 |             raise SSHError(
103 |                 host=host,
104 |                 message="SSH protocol error. Possible causes include using "
105 |                 "the wrong key file or username.",
106 |             ) from e
107 |     else:
108 |         raise SSHError(
109 |             host=host,
110 |             message="Could not connect via SSH.")
111 | 
112 |     return client
113 | 
114 | 
115 | def ssh_check_output(
116 |         client: paramiko.client.SSHClient,
117 |         command: str,
118 |         timeout_seconds: int=None,
119 | ):
120 |     """
121 |     Run a command via the provided SSH client and return the output captured
122 |     on stdout.
123 | 
124 |     Raise an exception if the command returns a non-zero code.
125 |     """
126 |     stdin, stdout, stderr = client.exec_command(
127 |         command,
128 |         get_pty=True,
129 |         timeout=timeout_seconds)
130 | 
131 |     # NOTE: Paramiko doesn't clearly document this, but we must read() before
132 |     #       calling recv_exit_status().
133 |     #       See: https://github.com/paramiko/paramiko/issues/448#issuecomment-159481997
134 |     stdout_output = stdout.read().decode('utf8').rstrip('\n')
135 |     stderr_output = stderr.read().decode('utf8').rstrip('\n')
136 |     exit_status = stdout.channel.recv_exit_status()
137 | 
138 |     if exit_status:
139 |         # TODO: Return a custom exception that includes the return code.
140 |         #       See: https://docs.python.org/3/library/subprocess.html#subprocess.check_output
141 |         # NOTE: We are losing the output order here since output from stdout and stderr
142 |         #       may be interleaved.
143 |         raise SSHError(
144 |             host=client.get_transport().getpeername()[0],
145 |             message=stdout_output + stderr_output)
146 | 
147 |     return stdout_output
148 | 
149 | 
150 | def ssh(*, user: str, host: str, identity_file: str):
151 |     """
152 |     SSH into a host for interactive use.
153 |     """
154 |     subprocess.call(
155 |         [
156 |             'ssh',
157 |             '-o', 'StrictHostKeyChecking=no',
158 |             '-i', identity_file,
159 |             '{u}@{h}'.format(u=user, h=host),
160 |         ],
161 |         env=get_subprocess_env(),
162 |     )
163 | 


--------------------------------------------------------------------------------
/flintrock/templates/hadoop/conf/core-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | 
 4 | <configuration>
 5 |   <property>
 6 |     <name>hadoop.tmp.dir</name>
 7 |     <value>{hadoop_root_ephemeral_dirs}</value>
 8 |   </property>
 9 | 
10 |   <property>
11 |     <name>fs.defaultFS</name>
12 |     <value>hdfs://{master_private_host}:9000</value>
13 |   </property>
14 | </configuration>
15 | 


--------------------------------------------------------------------------------
/flintrock/templates/hadoop/conf/hadoop-env.sh:
--------------------------------------------------------------------------------
1 | export HADOOP_HOME="$HOME/hadoop"
2 | export HADOOP_SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=5"
3 | 


--------------------------------------------------------------------------------
/flintrock/templates/hadoop/conf/hdfs-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | 
 4 | <configuration>
 5 |   <property>
 6 |     <name>dfs.blocksize</name>
 7 |     <value>134217728</value>
 8 |   </property>
 9 | 
10 |   <property>
11 |     <name>dfs.datanode.data.dir</name>
12 |     <value>{hadoop_root_ephemeral_dirs}</value>
13 |   </property>
14 | </configuration>
15 | 


--------------------------------------------------------------------------------
/flintrock/templates/hadoop/conf/masters:
--------------------------------------------------------------------------------
1 | {master_private_host}
2 | 


--------------------------------------------------------------------------------
/flintrock/templates/hadoop/conf/slaves:
--------------------------------------------------------------------------------
1 | {slave_private_hosts}
2 | 


--------------------------------------------------------------------------------
/flintrock/templates/spark/conf/slaves:
--------------------------------------------------------------------------------
1 | {slave_private_hosts}
2 | 


--------------------------------------------------------------------------------
/flintrock/templates/spark/conf/spark-env.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | export SPARK_LOCAL_DIRS="{spark_root_ephemeral_dirs}"
 4 | 
 5 | # Standalone cluster options
 6 | export SPARK_EXECUTOR_INSTANCES="{spark_executor_instances}"
 7 | export SPARK_EXECUTOR_CORES="$(($(nproc) / {spark_executor_instances}))"
 8 | export SPARK_WORKER_CORES="$(nproc)"
 9 | 
10 | export SPARK_MASTER_HOST="{master_private_host}"
11 | 
12 | # TODO: Make this dependent on HDFS install.
13 | export HADOOP_CONF_DIR="$HOME/hadoop/conf"
14 | 
15 | # TODO: Make this non-EC2-specific.
16 | # Bind Spark's web UIs to this machine's public EC2 hostname
17 | export SPARK_PUBLIC_DNS="$(curl --silent http://169.254.169.254/latest/meta-data/public-hostname)"
18 | 
19 | # TODO: Set a high ulimit for large shuffles
20 | # Need to find a way to do this, since "sudo ulimit..." doesn't fly.
21 | # Probably need to edit some Linux config file.
22 | # ulimit -n 1000000
23 | 
24 | # Should this be made part of a Python service somehow?
25 | export PYSPARK_PYTHON="python3"
26 | 


--------------------------------------------------------------------------------
/flintrock/util.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | FROZEN = getattr(sys, 'frozen', False)
 5 | 
 6 | 
 7 | def get_subprocess_env() -> dict:
 8 |     """
 9 |     Get the environment we want to use when making subprocess calls.
10 |     This takes care of details that affect subprocess calls made from
11 |     PyInstaller-packaged versions of Flintrock.
12 | 
13 |     For more information see: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
14 |     """
15 |     env = dict(os.environ)
16 |     if FROZEN:
17 |         env['LD_LIBRARY_PATH'] = env.get('LD_LIBRARY_PATH_ORIG', '')
18 |     return env
19 | 
20 | 
21 | def spark_hadoop_build_version(hadoop_version: str) -> str:
22 |     """
23 |     Given a Hadoop version, determine the Hadoop build of Spark to use.
24 |     """
25 |     hadoop_version = tuple(map(int, hadoop_version.split('.')))
26 |     if hadoop_version < (2, 7):
27 |         return 'hadoop2.6'
28 |     elif (2, 7) <= hadoop_version < (3, 0):
29 |         return 'hadoop2.7'
30 |     elif (3, 0) <= hadoop_version:
31 |         return 'hadoop3.2'
32 | 


--------------------------------------------------------------------------------
/generate-standalone-package.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import platform
 3 | import shutil
 4 | import subprocess
 5 | 
 6 | from flintrock import __version__ as flintrock_version
 7 | 
 8 | THIS_DIR = os.path.dirname(os.path.realpath(__file__))
 9 | 
10 | if __name__ == '__main__':
11 |     operating_system = platform.system()
12 |     if operating_system.lower() == 'darwin':
13 |         operating_system = 'macOS'
14 |     machine_type = platform.machine()
15 | 
16 |     subprocess.run(
17 |         [
18 |             'pyinstaller',
19 |             '--noconfirm',
20 |             '--clean',
21 |             '--name', 'flintrock',
22 |             '--additional-hooks-dir', '.',
23 |             # This hidden import is introduced by botocore.
24 |             # We won't need this when this issue is resolved:
25 |             # https://github.com/pyinstaller/pyinstaller/issues/1844
26 |             '--hidden-import', 'html.parser',
27 |             # This hidden import is also introduced by botocore.
28 |             # It appears to be related to this issue:
29 |             # https://github.com/pyinstaller/pyinstaller/issues/1935
30 |             '--hidden-import', 'configparser',
31 |             'standalone.py'
32 |         ],
33 |         check=True)
34 | 
35 |     shutil.make_archive(
36 |         base_name=os.path.join(
37 |             THIS_DIR, 'dist',
38 |             'Flintrock-{v}-standalone-{os}-{m}'.format(
39 |                 v=flintrock_version,
40 |                 os=operating_system,
41 |                 m=machine_type)),
42 |         format='zip',
43 |         root_dir=os.path.join(THIS_DIR, 'dist', 'flintrock'))
44 | 


--------------------------------------------------------------------------------
/hook-flintrock.py:
--------------------------------------------------------------------------------
1 | datas = [
2 |     ('flintrock/scripts', './scripts'),
3 |     ('flintrock/templates', './templates'),
4 |     ('flintrock/config.yaml.template', './'),
5 | ]
6 | 


--------------------------------------------------------------------------------
/make-release.sh:
--------------------------------------------------------------------------------
 1 | # Update:
 2 | #   - Default Spark version: https://spark.apache.org/downloads.html
 3 | #   - Default Hadoop version: https://hadoop.apache.org/releases.html
 4 | #   - Default Amazon Linux 2 EBS AMI: https://aws.amazon.com/amazon-linux-2/release-notes/
 5 | aws ec2 describe-images \
 6 |     --owners amazon \
 7 |     --filters \
 8 |         "Name=name,Values=amzn2-ami-hvm-*-gp2" \
 9 |         "Name=root-device-type,Values=ebs" \
10 |         "Name=virtualization-type,Values=hvm" \
11 |         "Name=architecture,Values=x86_64" \
12 |     --query \
13 |         'reverse(sort_by(Images, &CreationDate))[:100].{CreationDate:CreationDate,ImageId:ImageId,Name:Name,Description:Description}'
14 | #   - Dependencies: pip list --outdated
15 | # Run full acceptance tests
16 | #   - Run private VPC tests too
17 | # Update Flintrock version
18 | #   - flintrock/__init__.py
19 | #   - README blurb about standalone version
20 | # Update CHANGES
21 | #   - Check: https://github.com/nchammas/flintrock/pulls?q=is%3Apr+is%3Aclosed+label%3A%22needs+changelog%22
22 | #   - Update "Unreleased" section. "Nothing notable yet."
23 | # Tag release on GitHub
24 | #   - https://github.com/nchammas/flintrock/releases
25 | #   - vX.Y.Z
26 | #   - "Here's what's new in X.Y.Z."
27 | 
28 | trash dist/ build/ Flintrock.egg-info/
29 | 
30 | python -m build
31 | 
32 | # python setup.py register -r https://testpypi.python.org/pypi
33 | 
34 | # Test PyPI upload
35 | twine upload dist/* --repository pypitest
36 | open https://test.pypi.org/project/Flintrock/
37 | 
38 | # Production PyPI upload
39 | twine upload dist/* --repository pypi
40 | open https://pypi.org/project/Flintrock/
41 | 
42 | python generate-standalone-package.py
43 | 
44 | # Upload release builds to GitHub
45 | open dist/
46 | #   - Wheel
47 | #   - macOS standalone package (x86 _and_ arm64?)
48 | #   - Linux standalone package (built by CI)
49 | # Update version to next.dev0
50 | 
51 | # ---
52 | 
53 | # Test release via pip
54 | deactivate
55 | trash venv
56 | python3 -m venv venv
57 | source venv/bin/activate
58 | 
59 | python3 -m pip install --extra-index-url https://testpypi.python.org/simple flintrock
60 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | # Minimal pyproject file per: https://packaging.python.org/en/latest/guides/modernize-setup-py-project/
2 | [build-system]
3 | # Minimum setuptools version that supports version in setup.cfg per: https://packaging.python.org/en/latest/guides/single-sourcing-package-version/
4 | requires = ["setuptools >= 46.4.0"]
5 | build-backend = "setuptools.build_meta"
6 | 


--------------------------------------------------------------------------------
/requirements/developer.in:
--------------------------------------------------------------------------------
1 | -r user.pip
2 | pytest >= 3.5.0
3 | pytest-cov >= 2.5.1
4 | flake8 == 6.1.0
5 | # PyYAML  # requirement already covered by setup.py
6 | 


--------------------------------------------------------------------------------
/requirements/developer.pip:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is autogenerated by pip-compile with Python 3.9
  3 | # by the following command:
  4 | #
  5 | #    pip-compile --output-file=requirements/developer.pip requirements/developer.in
  6 | #
  7 | -e file:.#egg=Flintrock
  8 |     # via -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip
  9 | bcrypt==4.2.1
 10 |     # via
 11 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip
 12 |     #   paramiko
 13 | boto3==1.29.4
 14 |     # via
 15 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip
 16 |     #   flintrock
 17 | botocore==1.32.4
 18 |     # via
 19 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip
 20 |     #   boto3
 21 |     #   flintrock
 22 |     #   s3transfer
 23 | cffi==1.17.1
 24 |     # via
 25 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip
 26 |     #   cryptography
 27 |     #   pynacl
 28 | click==8.1.7
 29 |     # via
 30 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip
 31 |     #   flintrock
 32 | coverage[toml]==7.6.9
 33 |     # via
 34 |     #   coverage
 35 |     #   pytest-cov
 36 | cryptography==44.0.0
 37 |     # via
 38 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip
 39 |     #   paramiko
 40 | exceptiongroup==1.2.2
 41 |     # via pytest
 42 | flake8==6.1.0
 43 |     # via -r requirements/developer.in
 44 | iniconfig==2.0.0
 45 |     # via pytest
 46 | jmespath==1.0.1
 47 |     # via
 48 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip
 49 |     #   boto3
 50 |     #   botocore
 51 | mccabe==0.7.0
 52 |     # via flake8
 53 | packaging==24.2
 54 |     # via pytest
 55 | paramiko==3.4.0
 56 |     # via
 57 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip
 58 |     #   flintrock
 59 | pluggy==1.5.0
 60 |     # via pytest
 61 | pycodestyle==2.11.1
 62 |     # via flake8
 63 | pycparser==2.22
 64 |     # via
 65 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip
 66 |     #   cffi
 67 | pyflakes==3.1.0
 68 |     # via flake8
 69 | pynacl==1.5.0
 70 |     # via
 71 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip
 72 |     #   paramiko
 73 | pytest==8.3.4
 74 |     # via
 75 |     #   -r requirements/developer.in
 76 |     #   pytest-cov
 77 | pytest-cov==6.0.0
 78 |     # via -r requirements/developer.in
 79 | python-dateutil==2.9.0.post0
 80 |     # via
 81 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip
 82 |     #   botocore
 83 | pyyaml==6.0.2
 84 |     # via
 85 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip
 86 |     #   flintrock
 87 | s3transfer==0.7.0
 88 |     # via
 89 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip
 90 |     #   boto3
 91 | six==1.17.0
 92 |     # via
 93 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip
 94 |     #   python-dateutil
 95 | tomli==2.2.1
 96 |     # via
 97 |     #   coverage
 98 |     #   pytest
 99 | urllib3==1.26.20
100 |     # via
101 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip
102 |     #   botocore
103 | 


--------------------------------------------------------------------------------
/requirements/maintainer.in:
--------------------------------------------------------------------------------
1 | -r developer.pip
2 | wheel >= 0.31.0
3 | twine == 4.0.2
4 | PyInstaller == 6.11.1
5 | build >= 1.0.3, < 2.0.0
6 | 


--------------------------------------------------------------------------------
/requirements/maintainer.pip:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is autogenerated by pip-compile with Python 3.9
  3 | # by the following command:
  4 | #
  5 | #    pip-compile --output-file=requirements/maintainer.pip requirements/maintainer.in
  6 | #
  7 | -e file:.#egg=Flintrock
  8 |     # via -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
  9 | altgraph==0.17.4
 10 |     # via
 11 |     #   macholib
 12 |     #   pyinstaller
 13 | backports-tarfile==1.2.0
 14 |     # via jaraco-context
 15 | bcrypt==4.2.1
 16 |     # via
 17 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
 18 |     #   paramiko
 19 | boto3==1.29.4
 20 |     # via
 21 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
 22 |     #   flintrock
 23 | botocore==1.32.4
 24 |     # via
 25 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
 26 |     #   boto3
 27 |     #   flintrock
 28 |     #   s3transfer
 29 | build==1.2.2.post1
 30 |     # via -r requirements/maintainer.in
 31 | certifi==2024.8.30
 32 |     # via requests
 33 | cffi==1.17.1
 34 |     # via
 35 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
 36 |     #   cryptography
 37 |     #   pynacl
 38 | charset-normalizer==3.4.0
 39 |     # via requests
 40 | click==8.1.7
 41 |     # via
 42 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
 43 |     #   flintrock
 44 | coverage[toml]==7.6.9
 45 |     # via
 46 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
 47 |     #   coverage
 48 |     #   pytest-cov
 49 | cryptography==44.0.0
 50 |     # via
 51 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
 52 |     #   paramiko
 53 | docutils==0.21.2
 54 |     # via readme-renderer
 55 | exceptiongroup==1.2.2
 56 |     # via
 57 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
 58 |     #   pytest
 59 | flake8==6.1.0
 60 |     # via -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
 61 | idna==3.10
 62 |     # via requests
 63 | importlib-metadata==8.5.0
 64 |     # via
 65 |     #   build
 66 |     #   keyring
 67 |     #   pyinstaller
 68 |     #   pyinstaller-hooks-contrib
 69 |     #   twine
 70 | iniconfig==2.0.0
 71 |     # via
 72 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
 73 |     #   pytest
 74 | jaraco-classes==3.4.0
 75 |     # via keyring
 76 | jaraco-context==6.0.1
 77 |     # via keyring
 78 | jaraco-functools==4.1.0
 79 |     # via keyring
 80 | jmespath==1.0.1
 81 |     # via
 82 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
 83 |     #   boto3
 84 |     #   botocore
 85 | keyring==25.5.0
 86 |     # via twine
 87 | macholib==1.16.3
 88 |     # via pyinstaller
 89 | markdown-it-py==3.0.0
 90 |     # via rich
 91 | mccabe==0.7.0
 92 |     # via
 93 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
 94 |     #   flake8
 95 | mdurl==0.1.2
 96 |     # via markdown-it-py
 97 | more-itertools==10.5.0
 98 |     # via
 99 |     #   jaraco-classes
100 |     #   jaraco-functools
101 | nh3==0.2.19
102 |     # via readme-renderer
103 | packaging==24.2
104 |     # via
105 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
106 |     #   build
107 |     #   pyinstaller
108 |     #   pyinstaller-hooks-contrib
109 |     #   pytest
110 | paramiko==3.4.0
111 |     # via
112 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
113 |     #   flintrock
114 | pkginfo==1.12.0
115 |     # via twine
116 | pluggy==1.5.0
117 |     # via
118 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
119 |     #   pytest
120 | pycodestyle==2.11.1
121 |     # via
122 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
123 |     #   flake8
124 | pycparser==2.22
125 |     # via
126 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
127 |     #   cffi
128 | pyflakes==3.1.0
129 |     # via
130 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
131 |     #   flake8
132 | pygments==2.18.0
133 |     # via
134 |     #   readme-renderer
135 |     #   rich
136 | pyinstaller==6.11.1
137 |     # via -r requirements/maintainer.in
138 | pyinstaller-hooks-contrib==2024.10
139 |     # via pyinstaller
140 | pynacl==1.5.0
141 |     # via
142 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
143 |     #   paramiko
144 | pyproject-hooks==1.2.0
145 |     # via build
146 | pytest==8.3.4
147 |     # via
148 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
149 |     #   pytest-cov
150 | pytest-cov==6.0.0
151 |     # via -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
152 | python-dateutil==2.9.0.post0
153 |     # via
154 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
155 |     #   botocore
156 | pyyaml==6.0.2
157 |     # via
158 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
159 |     #   flintrock
160 | readme-renderer==44.0
161 |     # via twine
162 | requests==2.32.3
163 |     # via
164 |     #   requests-toolbelt
165 |     #   twine
166 | requests-toolbelt==1.0.0
167 |     # via twine
168 | rfc3986==2.0.0
169 |     # via twine
170 | rich==13.9.4
171 |     # via twine
172 | s3transfer==0.7.0
173 |     # via
174 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
175 |     #   boto3
176 | six==1.17.0
177 |     # via
178 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
179 |     #   python-dateutil
180 | tomli==2.2.1
181 |     # via
182 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
183 |     #   build
184 |     #   coverage
185 |     #   pytest
186 | twine==4.0.2
187 |     # via -r requirements/maintainer.in
188 | typing-extensions==4.12.2
189 |     # via rich
190 | urllib3==1.26.20
191 |     # via
192 |     #   -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip
193 |     #   botocore
194 |     #   requests
195 |     #   twine
196 | wheel==0.45.1
197 |     # via -r requirements/maintainer.in
198 | zipp==3.21.0
199 |     # via importlib-metadata
200 | 
201 | # The following packages are considered to be unsafe in a requirements file:
202 | # setuptools
203 | 


--------------------------------------------------------------------------------
/requirements/user.in:
--------------------------------------------------------------------------------
 1 | # Notes:
 2 | #   - Run pip from Flintrock's root directory, not from the
 3 | #     directory containing this file.
 4 | #   - The `-e .` syntax lets us reuse the requirements already
 5 | #     specified under `install_requires` in setup.py.
 6 | #     See: https://caremad.io/2013/07/setup-vs-requirement/
 7 | #   - The #egg= syntax is a workaround for pip-tools.
 8 | #     See: https://github.com/jazzband/pip-tools/issues/204#issuecomment-550051424
 9 | --editable file:.#egg=Flintrock
10 | 


--------------------------------------------------------------------------------
/requirements/user.pip:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile with Python 3.9
 3 | # by the following command:
 4 | #
 5 | #    pip-compile --output-file=requirements/user.pip requirements/user.in
 6 | #
 7 | -e file:.#egg=Flintrock
 8 |     # via -r requirements/user.in
 9 | bcrypt==4.2.1
10 |     # via paramiko
11 | boto3==1.29.4
12 |     # via flintrock
13 | botocore==1.32.4
14 |     # via
15 |     #   boto3
16 |     #   flintrock
17 |     #   s3transfer
18 | cffi==1.17.1
19 |     # via
20 |     #   cryptography
21 |     #   pynacl
22 | click==8.1.7
23 |     # via flintrock
24 | cryptography==44.0.0
25 |     # via paramiko
26 | jmespath==1.0.1
27 |     # via
28 |     #   boto3
29 |     #   botocore
30 | paramiko==3.4.0
31 |     # via flintrock
32 | pycparser==2.22
33 |     # via cffi
34 | pynacl==1.5.0
35 |     # via paramiko
36 | python-dateutil==2.9.0.post0
37 |     # via botocore
38 | pyyaml==6.0.2
39 |     # via flintrock
40 | s3transfer==0.7.0
41 |     # via boto3
42 | six==1.17.0
43 |     # via python-dateutil
44 | urllib3==1.26.20
45 |     # via botocore
46 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | # See: https://packaging.python.org/en/latest/guides/single-sourcing-package-version/
 2 | [metadata]
 3 | version = attr: flintrock.__version__
 4 | 
 5 | [tool:pytest]
 6 | norecursedirs = venv
 7 | addopts =
 8 |     --verbose
 9 |     --cov flintrock
10 |     --cov-report html
11 |     -rs
12 |     # --exitfirst
13 | 
14 | [flake8]
15 | max-line-length = 100
16 | exclude = venv, build, dist
17 | ignore =
18 |     E501
19 |     E252
20 |     F821
21 |     F841
22 |     W503
23 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | # from flintrock import __version__
 3 | 
 4 | 
 5 | with open('README.md') as f:
 6 |     long_description = f.read()
 7 | 
 8 | setuptools.setup(
 9 |     name='Flintrock',
10 |     # Moved to setup.cfg to avoid import of flintrock during installation of
11 |     # flintrock. This used to work, but becomes a problem with isolated builds
12 |     # and new pip behavior triggered by pyproject.toml.
13 |     # version=__version__,
14 |     description='A command-line tool for launching Apache Spark clusters.',
15 |     long_description=long_description,
16 |     long_description_content_type="text/markdown",
17 |     url='https://github.com/nchammas/flintrock',
18 |     author='Nicholas Chammas',
19 |     author_email='nicholas.chammas@gmail.com',
20 |     license='Apache License 2.0',
21 |     python_requires='>= 3.9',
22 | 
23 |     # See: https://pypi.python.org/pypi?%3Aaction=list_classifiers
24 |     classifiers=[
25 |         'Development Status :: 5 - Production/Stable',
26 | 
27 |         'Intended Audience :: Developers',
28 |         'Intended Audience :: Science/Research',
29 | 
30 |         'Topic :: Utilities',
31 |         'Environment :: Console',
32 |         'Operating System :: MacOS :: MacOS X',
33 |         'Operating System :: POSIX',
34 | 
35 |         'License :: OSI Approved :: Apache Software License',
36 | 
37 |         'Programming Language :: Python :: 3',
38 |         'Programming Language :: Python :: 3 :: Only',
39 |     ],
40 |     keywords=['Apache Spark'],
41 | 
42 |     packages=setuptools.find_packages(),
43 |     include_package_data=True,
44 | 
45 |     # We pin dependencies because sometimes projects do not
46 |     # strictly follow semantic versioning, so new "feature"
47 |     # releases end up making backwards-incompatible changes.
48 |     # Sometimes, new releases even introduce bugs which
49 |     # totally break Flintrock.
50 |     # For example: https://github.com/paramiko/paramiko/issues/615
51 |     install_requires=[
52 |         'boto3 == 1.29.4',
53 |         'botocore == 1.32.4',
54 |         'click == 8.1.7',
55 |         'paramiko == 3.4.0',
56 |         'PyYAML == 6.0.2',
57 |     ],
58 | 
59 |     entry_points={
60 |         'console_scripts': [
61 |             'flintrock = flintrock.__main__:main',
62 |         ],
63 |     },
64 | )
65 | 


--------------------------------------------------------------------------------
/standalone.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A standalone script for use by PyInstaller.
 3 | 
 4 | Users should not be running this script.
 5 | """
 6 | 
 7 | import sys
 8 | from flintrock.flintrock import main
 9 | 
10 | if __name__ == '__main__':
11 |     sys.exit(main())
12 | 


--------------------------------------------------------------------------------
/test-infra/.gitignore:
--------------------------------------------------------------------------------
1 | .terraform*
2 | terraform.tfstate*
3 | terraform.tfvars
4 | 


--------------------------------------------------------------------------------
/test-infra/README.md:
--------------------------------------------------------------------------------
 1 | # Private VPC Test Infrastructure
 2 | 
 3 | The Terraform templates in this directory manage private VPC infrastructure that Flintrock contributors can use to test their changes.
 4 | 
 5 | ## Set Up
 6 | 
 7 | After [installing Terraform][install], you can spin up a private VPC along with associated infrastructure like a NAT gateway:
 8 | 
 9 | [install]: https://developer.hashicorp.com/terraform/install
10 | 
11 | ```sh
12 | terraform init
13 | terraform apply
14 | ```
15 | 
16 | You can provide the required variables to this command by creating a `terraform.tfvars` file. The variables you need to define are listed in `variables.tf`.
17 | 
18 | Once the `apply` command completes, you'll see some output like this:
19 | 
20 | ```
21 | Apply complete! Resources: 12 added, 0 changed, 0 destroyed.
22 | 
23 | Outputs:
24 | 
25 | bastion_ip = 18.205.7.24
26 | ```
27 | 
28 | SSH into your bastion host. You'll spin up Flintrock clusters from here. A virtual environment and Flintrock config file will already be setup for you based on the variables you provided to Terraform during infrastructure creation:
29 | 
30 | ```sh
31 | ssh ec2-user@18.205.7.24
32 | source venv/bin/activate
33 | less /home/ec2-user/.config/flintrock/config.yaml
34 | ```
35 | 
36 | All you need to do is pick a version of Flintrock to install and then you can begin your tests against a private VPC!
37 | 
38 | ```sh
39 | pip install https://github.com/nchammas/flintrock/archive/master.zip
40 | flintrock launch test-cluster
41 | flintrock login test-cluster
42 | flintrock destroy test-cluster
43 | ```
44 | 
45 | ## Tear Down
46 | 
47 | A NAT gateway is expensive to keep up all the time, so you'll want to tear down the infrastructure when you're done. Be sure to first tear down any Flintrock clusters you launched into the test VPC.
48 | 
49 | ```sh
50 | ./delete-test-infra.sh
51 | ```
52 | 
53 | This script calls `terraform destroy` after clearing out some infrastructure that Flintrock creates inside the private VPC.
54 | 


--------------------------------------------------------------------------------
/test-infra/bastion.tf:
--------------------------------------------------------------------------------
  1 | data "http" "myip" {
  2 |   url = "https://icanhazip.com"
  3 | }
  4 | 
  5 | resource "aws_security_group" "ssh" {
  6 |   name   = "flintrock-bastion-ssh"
  7 |   vpc_id = aws_vpc.main.id
  8 | 
  9 |   ingress {
 10 |     from_port   = 22
 11 |     to_port     = 22
 12 |     protocol    = "tcp"
 13 |     cidr_blocks = ["${chomp(data.http.myip.response_body)}/32"]
 14 |   }
 15 | 
 16 |   egress {
 17 |     from_port   = 0
 18 |     to_port     = 0
 19 |     protocol    = "-1"
 20 |     cidr_blocks = ["0.0.0.0/0"]
 21 |   }
 22 | }
 23 | 
 24 | resource "aws_instance" "bastion" {
 25 |   ami                         = "ami-0a887e401f7654935"
 26 |   instance_type               = "t2.nano"
 27 |   key_name                    = var.ec2_key_name
 28 |   subnet_id                   = aws_subnet.public.id
 29 |   associate_public_ip_address = true
 30 |   vpc_security_group_ids      = [aws_security_group.ssh.id]
 31 | 
 32 |   tags = {
 33 |     Name = "flintrock-bastion"
 34 |   }
 35 | 
 36 |   connection {
 37 |     host        = self.public_ip
 38 |     user        = "ec2-user"
 39 |     private_key = file(var.ssh_key_path)
 40 |   }
 41 | 
 42 |   provisioner "remote-exec" {
 43 |     inline = [
 44 |       "mkdir -p /home/ec2-user/.aws/",
 45 |     ]
 46 |   }
 47 | 
 48 |   provisioner "file" {
 49 |     source      = var.aws_credentials_path
 50 |     destination = "/home/ec2-user/.aws/credentials"
 51 |   }
 52 | 
 53 |   provisioner "file" {
 54 |     source      = var.ssh_key_path
 55 |     destination = "/home/ec2-user/.ssh/${var.ec2_key_name}.pem"
 56 |   }
 57 | 
 58 |   provisioner "remote-exec" {
 59 |     inline = [
 60 |       "chmod go-rwx /home/ec2-user/.ssh/${var.ec2_key_name}.pem",
 61 |     ]
 62 |   }
 63 | 
 64 |   provisioner "remote-exec" {
 65 |     inline = [
 66 |       "mkdir -p /home/ec2-user/.config/flintrock/",
 67 |     ]
 68 |   }
 69 | 
 70 |   provisioner "file" {
 71 |     source      = var.flintrock_config_path
 72 |     destination = "/home/ec2-user/.config/flintrock/config.yaml"
 73 |   }
 74 | 
 75 |   provisioner "remote-exec" {
 76 |     inline = [
 77 |       "sudo yum install -y git",
 78 |       "sudo yum install -y gcc make patch zlib-devel bzip2 bzip2-devel readline-devel sqlite sqlite-devel openssl11-devel tk-devel libffi-devel xz-devel",
 79 |       "curl https://pyenv.run | bash",
 80 |       ".pyenv/bin/pyenv install 3.9",
 81 |       ".pyenv/bin/pyenv global 3.9",
 82 |       ".pyenv/bin/pyenv exec python -m venv /home/ec2-user/venv",
 83 |       "/home/ec2-user/venv/bin/pip install PyYAML",
 84 |     ]
 85 |   }
 86 | 
 87 |   provisioner "remote-exec" {
 88 |     inline = [
 89 |       <<-EOM
 90 |         /home/ec2-user/venv/bin/python << EO_PYTHON
 91 |         import yaml
 92 |         with open('/home/ec2-user/.config/flintrock/config.yaml') as f:
 93 |             config = yaml.safe_load(f)
 94 |         config['providers']['ec2']['key-name'] = '${var.ec2_key_name}'
 95 |         config['providers']['ec2']['identity-file'] = '/home/ec2-user/.ssh/${var.ec2_key_name}.pem'
 96 |         config['providers']['ec2']['vpc-id'] = '${aws_vpc.main.id}'
 97 |         config['providers']['ec2']['subnet-id'] = '${aws_subnet.private.id}'
 98 |         config['providers']['ec2']['authorize-access-from'] = ['${self.private_ip}']
 99 |         with open('/home/ec2-user/.config/flintrock/config.yaml', 'w') as f:
100 |             yaml.dump(config, f, indent=2)
101 |         EO_PYTHON
102 |         EOM
103 |     ]
104 |   }
105 | }
106 | 
107 | output "bastion_ip" {
108 |   value = aws_instance.bastion.public_ip
109 | }
110 | 


--------------------------------------------------------------------------------
/test-infra/delete-test-infra.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | set -x
 4 | 
 5 | # Apparently you can't simply do `terraform state show aws_vpc.main.id`.
 6 | vpc_id="$(
 7 |     terraform show -json \
 8 |     | jq --raw-output '
 9 |         .values.root_module.resources[] 
10 |         | select(.type == "aws_vpc" and .name == "main") 
11 |         | .values.id
12 |     '
13 | )"
14 | 
15 | security_group_ids=($(
16 |     aws ec2 describe-security-groups \
17 |         --filters "Name=vpc-id,Values=$vpc_id" "Name=group-name,Values=flintrock" \
18 |         --query "SecurityGroups[*].{ID:GroupId}" \
19 |     | jq --raw-output '.[] | .ID'
20 | ))
21 | 
22 | for sg_id in "${security_group_ids[@]}"; do
23 |     aws ec2 delete-security-group --group-id "$sg_id"
24 | done
25 | 
26 | terraform destroy
27 | 


--------------------------------------------------------------------------------
/test-infra/network.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_vpc" "main" {
 2 |   cidr_block           = "172.31.0.0/16"
 3 |   enable_dns_hostnames = true
 4 | 
 5 |   tags = {
 6 |     Name = "flintrock-test"
 7 |   }
 8 | }
 9 | 
10 | resource "aws_subnet" "public" {
11 |   vpc_id                  = aws_vpc.main.id
12 |   cidr_block              = "172.31.0.0/24"
13 |   availability_zone       = "us-east-1a"
14 |   map_public_ip_on_launch = true
15 | 
16 |   tags = {
17 |     Name = "flintrock-test-public"
18 |   }
19 | }
20 | 
21 | resource "aws_subnet" "private" {
22 |   vpc_id                  = aws_vpc.main.id
23 |   cidr_block              = "172.31.1.0/24"
24 |   availability_zone       = "us-east-1a"
25 |   map_public_ip_on_launch = false
26 | 
27 |   tags = {
28 |     Name = "flintrock-test-private"
29 |   }
30 | }
31 | 
32 | resource "aws_eip" "nat" {
33 |   domain = "vpc"
34 | 
35 |   tags = {
36 |     Name = "flintrock-test-nat-ip"
37 |   }
38 | }
39 | 
40 | resource "aws_nat_gateway" "private_gateway" {
41 |   allocation_id = aws_eip.nat.id
42 |   subnet_id     = aws_subnet.public.id
43 | 
44 |   tags = {
45 |     Name = "flintrock-test-private-gateway"
46 |   }
47 | }
48 | 
49 | resource "aws_internet_gateway" "main" {
50 |   vpc_id = aws_vpc.main.id
51 | 
52 |   tags = {
53 |     Name = "flintrock-test-gateway"
54 |   }
55 | }
56 | 
57 | resource "aws_route_table" "public" {
58 |   vpc_id = aws_vpc.main.id
59 | 
60 |   route {
61 |     cidr_block = "0.0.0.0/0"
62 |     gateway_id = aws_internet_gateway.main.id
63 |   }
64 | 
65 |   tags = {
66 |     Name = "flintrock-test-public"
67 |   }
68 | }
69 | 
70 | resource "aws_route_table" "private" {
71 |   vpc_id = aws_vpc.main.id
72 | 
73 |   route {
74 |     cidr_block     = "0.0.0.0/0"
75 |     nat_gateway_id = aws_nat_gateway.private_gateway.id
76 |   }
77 | 
78 |   tags = {
79 |     Name = "flintrock-test-private"
80 |   }
81 | }
82 | 
83 | resource "aws_route_table_association" "public" {
84 |   subnet_id      = aws_subnet.public.id
85 |   route_table_id = aws_route_table.public.id
86 | }
87 | 
88 | resource "aws_route_table_association" "private" {
89 |   subnet_id      = aws_subnet.private.id
90 |   route_table_id = aws_route_table.private.id
91 | }
92 | 


--------------------------------------------------------------------------------
/test-infra/provider.tf:
--------------------------------------------------------------------------------
 1 | terraform {
 2 |   required_providers {
 3 |     aws = {
 4 |       source  = "hashicorp/aws"
 5 |       version = "~> 5"
 6 |     }
 7 |     http = {
 8 |       source  = "hashicorp/http"
 9 |       version = "~> 3"
10 |     }
11 |   }
12 | }
13 | 
14 | provider "aws" {
15 |   region = "us-east-1"
16 | }
17 | 


--------------------------------------------------------------------------------
/test-infra/variables.tf:
--------------------------------------------------------------------------------
 1 | variable "ec2_key_name" {
 2 |   type = string
 3 | }
 4 | 
 5 | variable "ssh_key_path" {
 6 |   type = string
 7 | }
 8 | 
 9 | variable "aws_credentials_path" {
10 |   type = string
11 | }
12 | 
13 | variable "flintrock_config_path" {
14 |   type = string
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
 1 | # Flintrock Tests
 2 | 
 3 | Use the tests in this directory to help you catch bugs as you work on Flintrock.
 4 | 
 5 | The instructions here assume the following things:
 6 | 
 7 | 1. You've read through our [guide on contributing code](../CONTRIBUTING.md#contributing-code) and installed Flintrock's development dependencies.
 8 | 2. You're working from Flintrock's root directory.
 9 | 3. You're running Python 3.9+.
10 | 4. You've already setup your Flintrock config file and can launch clusters.
11 | 
12 | To run all of Flintrock's tests that don't require AWS credentials, just run:
13 | 
14 | ```sh
15 | pytest
16 | ```
17 | 
18 | This is probably what you want to do most of the time.
19 | 
20 | To run all of Flintrock's tests, including the ones that require AWS credentials (like acceptance tests), run this:
21 | 
22 | ```sh
23 | USE_AWS_CREDENTIALS=true pytest  # will launch real clusters!
24 | ```
25 | 
26 | Acceptance tests launch and manipulate real clusters to test Flintrock's various commands and make sure installed services like Spark are working correctly.
27 | 
28 | Some things you should keep in mind when running the full test suite with your AWS credentials:
29 | 
30 |   * **Running the full test suite costs money** (less than $1 for the full test run) since it launches and manipulates real clusters.
31 |   * **A failed test run may leave behind running clusters**. You'll need to destroy these manually.
32 |   * The full test suite takes a while to run (~30-60 minutes).
33 |   * Though the tests that use your AWS credentials are disabled by default, you can explicitly disable them by setting `USE_AWS_CREDENTIALS=""`. Setting that variable to `false` or to any non-empty string won't work.
34 | 
35 | Relatively speaking, acceptance tests are expensive, but they are the most valuable type of test for an orchestration tool like Flintrock. Use them judiciously.
36 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | import tempfile
  4 | import uuid
  5 | from collections import OrderedDict
  6 | 
  7 | # Flintrock
  8 | from flintrock.core import StorageDirs
  9 | 
 10 | # External
 11 | import pytest
 12 | 
 13 | HADOOP_VERSION = '3.3.6'
 14 | SPARK_VERSION = '3.5.0'
 15 | SPARK_GIT_COMMIT = 'ce5ddad990373636e94071e7cef2f31021add07b'  # 3.5.0
 16 | JAVA_VERSION = '11'
 17 | 
 18 | 
 19 | class Dummy():
 20 |     pass
 21 | 
 22 | 
 23 | aws_credentials_required = (
 24 |     pytest.mark.skipif(
 25 |         not bool(os.environ.get('USE_AWS_CREDENTIALS')),
 26 |         reason="USE_AWS_CREDENTIALS not set"))
 27 | 
 28 | 
 29 | @pytest.fixture(scope='session')
 30 | def project_root_dir():
 31 |     return os.path.dirname(
 32 |         os.path.dirname(
 33 |             os.path.realpath(__file__)
 34 |         )
 35 |     )
 36 | 
 37 | 
 38 | @pytest.fixture(scope='session')
 39 | def dummy_cluster():
 40 |     storage_dirs = StorageDirs(
 41 |         root='/media/root',
 42 |         ephemeral=['/media/eph1', '/media/eph2'],
 43 |         persistent=None,
 44 |     )
 45 | 
 46 |     cluster = Dummy()
 47 |     cluster.name = 'test'
 48 |     cluster.storage_dirs = storage_dirs
 49 |     cluster.master_ip = '10.0.0.1'
 50 |     cluster.master_host = 'master.hostname'
 51 |     cluster.master_private_host = 'master.privatehostname'
 52 |     cluster.slave_ips = ['10.0.0.2']
 53 |     cluster.slave_hosts = ['slave1.hostname']
 54 |     cluster.slave_private_hosts = ['slave1.privatehostname']
 55 | 
 56 |     return cluster
 57 | 
 58 | 
 59 | def random_string():
 60 |     return str(uuid.uuid4())[:8]
 61 | 
 62 | 
 63 | def launch_cluster(
 64 |         *,
 65 |         cluster_name,
 66 |         instance_type,
 67 |         spark_version,
 68 |         spark_git_commit):
 69 |     p = subprocess.run([
 70 |         'flintrock', 'launch', cluster_name,
 71 |         '--num-slaves', '1',
 72 |         '--install-hdfs',
 73 |         '--hdfs-version', HADOOP_VERSION,
 74 |         '--install-spark',
 75 |         '--spark-version', spark_version,
 76 |         '--spark-git-commit', spark_git_commit,
 77 |         '--java-version', JAVA_VERSION,
 78 |         '--assume-yes',
 79 |         '--ec2-instance-type', instance_type])
 80 |     assert p.returncode == 0
 81 | 
 82 | 
 83 | def stop_cluster(cluster_name):
 84 |     p = subprocess.run([
 85 |         'flintrock', 'stop', cluster_name, '--assume-yes'])
 86 |     assert p.returncode == 0
 87 | 
 88 | 
 89 | def start_cluster(cluster_name):
 90 |     p = subprocess.run([
 91 |         'flintrock', 'start', cluster_name])
 92 |     assert p.returncode == 0
 93 | 
 94 | 
 95 | # TODO: This should reuse FlintrockCluster.
 96 | class ClusterConfig:
 97 |     def __init__(
 98 |             self,
 99 |             *,
100 |             restarted,
101 |             instance_type,
102 |             spark_version=SPARK_VERSION,
103 |             spark_git_commit=''):
104 |         self.restarted = restarted
105 |         self.instance_type = instance_type
106 |         self.spark_version = spark_version
107 |         self.spark_git_commit = spark_git_commit
108 | 
109 |     def __str__(self):
110 |         return str(OrderedDict(sorted(vars(self).items())))
111 | 
112 | 
113 | cluster_configs = [
114 |     ClusterConfig(restarted=False, instance_type='t3.small'),
115 |     ClusterConfig(restarted=True, instance_type='t3.small'),
116 |     ClusterConfig(restarted=False, instance_type='m5.large'),
117 |     ClusterConfig(restarted=True, instance_type='m5.large'),
118 |     # We don't test all cluster states when building Spark because
119 |     # it takes a very long time.
120 |     ClusterConfig(
121 |         restarted=True,
122 |         instance_type='m5.xlarge',
123 |         spark_version='',
124 |         spark_git_commit=SPARK_GIT_COMMIT,
125 |     ),
126 | ]
127 | 
128 | 
129 | @pytest.fixture(
130 |     scope='module',
131 |     params=cluster_configs,
132 |     ids=[str(cc) for cc in cluster_configs])
133 | def running_cluster(request):
134 |     """
135 |     Return the name of a running Flintrock cluster.
136 |     """
137 |     cluster_name = 'running-cluster-' + random_string()
138 | 
139 |     try:
140 |         launch_cluster(
141 |             cluster_name=cluster_name,
142 |             instance_type=request.param.instance_type,
143 |             spark_version=request.param.spark_version,
144 |             spark_git_commit=request.param.spark_git_commit)
145 | 
146 |         if request.param.restarted:
147 |             stop_cluster(cluster_name)
148 |             start_cluster(cluster_name)
149 | 
150 |         yield cluster_name
151 |     finally:
152 |         p = subprocess.run([
153 |             'flintrock', 'destroy', cluster_name, '--assume-yes',
154 |         ])
155 |         assert p.returncode == 0
156 | 
157 | 
158 | @pytest.fixture(scope='module')
159 | def stopped_cluster(request):
160 |     cluster_name = 'running-cluster-' + random_string()
161 | 
162 |     try:
163 |         p = subprocess.run([
164 |             'flintrock', 'launch', cluster_name,
165 |             '--num-slaves', '1',
166 |             '--no-install-hdfs',
167 |             '--no-install-spark',
168 |             '--assume-yes',
169 |             '--ec2-instance-type', 't3.small'])
170 |         assert p.returncode == 0
171 | 
172 |         p = subprocess.run([
173 |             'flintrock', 'stop', cluster_name, '--assume-yes'])
174 |         assert p.returncode == 0
175 | 
176 |         yield cluster_name
177 |     finally:
178 |         p = subprocess.run([
179 |             'flintrock', 'destroy', cluster_name, '--assume-yes',
180 |         ])
181 |         assert p.returncode == 0
182 | 
183 | 
184 | @pytest.fixture(scope='module')
185 | def remote_file(request, running_cluster):
186 |     """
187 |     Return the path to a remote dummy file on a running Flintrock cluster.
188 |     """
189 |     file_path = '/tmp/remote_dummy_file_for_testing'
190 | 
191 |     try:
192 |         p = subprocess.run([
193 |             'flintrock', 'run-command', running_cluster, '--',
194 |             'echo -e "{data}" > {path}'.format(
195 |                 data='test\n' * 3,
196 |                 path=file_path)])
197 |         assert p.returncode == 0
198 | 
199 |         yield file_path
200 |     finally:
201 |         p = subprocess.run([
202 |             'flintrock', 'run-command', running_cluster, '--',
203 |             'rm', '-f', file_path,
204 |         ])
205 |         assert p.returncode == 0
206 | 
207 | 
208 | @pytest.fixture(scope='module')
209 | def local_file(request):
210 |     """
211 |     Return the path to a local dummy file.
212 |     """
213 |     file = tempfile.NamedTemporaryFile(delete=False)
214 |     try:
215 |         with open(file.name, 'wb') as f:
216 |             f.truncate(1024)
217 | 
218 |         yield file.name
219 |     finally:
220 |         os.remove(file.name)
221 | 


--------------------------------------------------------------------------------
/tests/test_acceptance.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import subprocess
  3 | import urllib.request
  4 | 
  5 | # Flintrock modules
  6 | from flintrock.exceptions import ClusterInvalidState
  7 | from conftest import aws_credentials_required
  8 | 
  9 | pytestmark = aws_credentials_required
 10 | 
 11 | 
 12 | def test_describe_stopped_cluster(stopped_cluster):
 13 |     p = subprocess.run([
 14 |         'flintrock', 'describe', stopped_cluster],
 15 |         stdout=subprocess.PIPE)
 16 |     assert p.returncode == 0
 17 |     assert p.stdout.startswith(stopped_cluster.encode())
 18 | 
 19 | 
 20 | def test_stop_stopped_cluster(stopped_cluster):
 21 |     p = subprocess.run([
 22 |         'flintrock', 'stop', stopped_cluster],
 23 |         stdout=subprocess.PIPE)
 24 |     assert p.returncode == 0
 25 |     assert p.stdout == b"Cluster is already stopped.\n"
 26 | 
 27 | 
 28 | def test_try_launching_duplicate_stopped_cluster(stopped_cluster):
 29 |     p = subprocess.run([
 30 |         'flintrock', 'launch', stopped_cluster],
 31 |         stderr=subprocess.PIPE)
 32 |     assert p.returncode == 1
 33 |     assert p.stderr.decode('utf-8').startswith(
 34 |         "Cluster {c} already exists".format(c=stopped_cluster))
 35 | 
 36 | 
 37 | def test_start_running_cluster(running_cluster):
 38 |     p = subprocess.run([
 39 |         'flintrock', 'start', running_cluster],
 40 |         stdout=subprocess.PIPE)
 41 |     assert p.returncode == 0
 42 |     assert p.stdout == b"Cluster is already running.\n"
 43 | 
 44 | 
 45 | def test_try_launching_duplicate_cluster(running_cluster):
 46 |     p = subprocess.run([
 47 |         'flintrock', 'launch', running_cluster],
 48 |         stderr=subprocess.PIPE)
 49 |     assert p.returncode == 1
 50 |     assert p.stderr.decode('utf-8').startswith(
 51 |         "Cluster {c} already exists".format(c=running_cluster))
 52 | 
 53 | 
 54 | def test_describe_running_cluster(running_cluster):
 55 |     p = subprocess.run([
 56 |         'flintrock', 'describe', running_cluster],
 57 |         stdout=subprocess.PIPE)
 58 |     assert p.returncode == 0
 59 |     assert p.stdout.startswith(running_cluster.encode())
 60 | 
 61 | 
 62 | def test_run_command_on_running_cluster(running_cluster):
 63 |     p = subprocess.run([
 64 |         'flintrock', 'run-command', running_cluster, '--', 'ls', '-l'])
 65 |     assert p.returncode == 0
 66 | 
 67 | 
 68 | def test_copy_file_on_running_cluster(running_cluster, local_file):
 69 |     p = subprocess.run([
 70 |         'flintrock', 'copy-file', running_cluster, local_file, '/tmp/copied_from_local'])
 71 |     assert p.returncode == 0
 72 | 
 73 | 
 74 | def test_hdfs_on_running_cluster(running_cluster, remote_file):
 75 |     hdfs_path = '/hdfs_file'
 76 | 
 77 |     p = subprocess.run([
 78 |         'flintrock', 'run-command', running_cluster, '--master-only', '--',
 79 |         './hadoop/bin/hdfs', 'dfs', '-put', remote_file, hdfs_path])
 80 |     assert p.returncode == 0
 81 | 
 82 |     p = subprocess.run([
 83 |         'flintrock', 'run-command', running_cluster, '--',
 84 |         './hadoop/bin/hdfs', 'dfs', '-cat', hdfs_path])
 85 |     assert p.returncode == 0
 86 | 
 87 | 
 88 | def test_spark_on_running_cluster(running_cluster, remote_file):
 89 |     # TODO: Run a real query; e.g. sc.parallelize(range(10)).count()
 90 |     p = subprocess.run([
 91 |         'flintrock', 'run-command', running_cluster, '--',
 92 |         './spark/bin/pyspark', '--help'])
 93 |     assert p.returncode == 0
 94 | 
 95 |     p = subprocess.run([
 96 |         'flintrock', 'describe', running_cluster, '--master-hostname-only'],
 97 |         stdout=subprocess.PIPE)
 98 |     master_address = p.stdout.strip().decode('utf-8')
 99 |     assert p.returncode == 0
100 | 
101 |     spark_master_ui = 'http://{m}:8080/json/'.format(m=master_address)
102 |     spark_ui_info = json.loads(
103 |         urllib.request.urlopen(spark_master_ui).read().decode('utf-8'))
104 |     assert spark_ui_info['status'] == 'ALIVE'
105 | 
106 | 
107 | def test_operations_against_non_existent_cluster():
108 |     cluster_name = 'this_cluster_doesnt_exist_yo'
109 |     expected_error_message = (
110 |         b"No cluster " + cluster_name.encode('utf-8') + b" in region ")
111 | 
112 |     for command in ['describe', 'stop', 'start', 'login', 'destroy']:
113 |         p = subprocess.run(
114 |             ['flintrock', command, cluster_name],
115 |             stderr=subprocess.PIPE)
116 |         assert p.returncode == 1
117 |         assert p.stderr.startswith(expected_error_message)
118 | 
119 |     for command in ['run-command']:
120 |         p = subprocess.run(
121 |             ['flintrock', command, cluster_name, 'ls'],
122 |             stderr=subprocess.PIPE)
123 |         assert p.returncode == 1
124 |         assert p.stderr.startswith(expected_error_message)
125 | 
126 |     for command in ['copy-file']:
127 |         p = subprocess.run(
128 |             ['flintrock', command, cluster_name, __file__, '/remote/path'],
129 |             stderr=subprocess.PIPE)
130 |         assert p.returncode == 1
131 |         assert p.stderr.startswith(expected_error_message)
132 | 
133 | 
134 | def test_operations_against_stopped_cluster(stopped_cluster):
135 |     p = subprocess.run(
136 |         ['flintrock', 'run-command', stopped_cluster, 'ls'],
137 |         stderr=subprocess.PIPE)
138 |     expected_error_message = str(
139 |         ClusterInvalidState(
140 |             attempted_command='run-command',
141 |             state='stopped'))
142 |     assert p.returncode == 1
143 |     assert p.stderr.decode('utf-8').strip() == expected_error_message
144 | 
145 |     p = subprocess.run(
146 |         ['flintrock', 'copy-file', stopped_cluster, __file__, '/remote/path'],
147 |         stderr=subprocess.PIPE)
148 |     expected_error_message = str(
149 |         ClusterInvalidState(
150 |             attempted_command='copy-file',
151 |             state='stopped'))
152 |     assert p.returncode == 1
153 |     assert p.stderr.decode('utf-8').strip() == expected_error_message
154 | 
155 | 
156 | def test_launch_with_bad_ami():
157 |     p = subprocess.run([
158 |         'flintrock', 'launch', 'whatever-cluster',
159 |         '--ec2-ami', 'ami-badbad00'],
160 |         stderr=subprocess.PIPE)
161 |     assert p.returncode == 1
162 |     assert p.stderr.startswith(b"Error: Could not find")
163 | 


--------------------------------------------------------------------------------
/tests/test_core.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | 
 4 | # Flintrock
 5 | from flintrock.core import (
 6 |     generate_template_mapping,
 7 |     get_formatted_template,
 8 | )
 9 | 
10 | FLINTROCK_ROOT_DIR = (
11 |     os.path.dirname(
12 |         os.path.dirname(
13 |             os.path.realpath(__file__))))
14 | 
15 | 
16 | @pytest.mark.parametrize(
17 |     'spark_version', [
18 |         (''),
19 |         ('3.5.0'),
20 |         ('a28880f3b9c63d86368bcd6cbbaa6a9af7075409'),
21 |     ])
22 | def test_templates(dummy_cluster, spark_version):
23 |     template_dir = os.path.join(FLINTROCK_ROOT_DIR, 'flintrock', 'templates')
24 |     for (dirpath, dirnames, filenames) in os.walk(template_dir):
25 |         if filenames:
26 |             for filename in filenames:
27 |                 template_path = os.path.join(dirpath, filename)
28 |                 mapping = generate_template_mapping(
29 |                     cluster=dummy_cluster,
30 |                     hadoop_version='',
31 |                     spark_version=spark_version,
32 |                     spark_executor_instances=0,
33 |                 )
34 |                 get_formatted_template(
35 |                     path=template_path,
36 |                     mapping=mapping,
37 |                 )
38 | 


--------------------------------------------------------------------------------
/tests/test_ec2.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import click
 3 | from flintrock.ec2 import validate_tags
 4 | 
 5 | 
 6 | def test_validate_tags():
 7 |     # List of test cases; each test case is a tuple, with first element
 8 |     # the input and the second element the expected output
 9 |     positive_test_cases = [
10 |         # basic case
11 |         (['k1,v1'], [{'Key': 'k1', 'Value': 'v1'}]),
12 |         # strips whitespace?
13 |         (['k2, v2 '], [{'Key': 'k2', 'Value': 'v2'}]),
14 |         # empty Value
15 |         (['k3,'], [{'Key': 'k3', 'Value': ''}]),
16 |         # multiple tags
17 |         (['k4,v4', 'k5,v5'],
18 |          [{'Key': 'k4', 'Value': 'v4'}, {'Key': 'k5', 'Value': 'v5'}])]
19 | 
20 |     for test_case in positive_test_cases:
21 |         ec2_tags = validate_tags(test_case[0])
22 |         assert isinstance(ec2_tags, list)
23 |         for i, ec2_tag in enumerate(ec2_tags):
24 |             expected_dict = test_case[1][i]
25 |             for k in expected_dict:
26 |                 assert k in ec2_tag
27 |                 assert ec2_tag[k] == expected_dict[k]
28 | 
29 |     negative_test_cases = [["k1"], ["k2,v2,"], ["k3,,v3"], [",v4"]]
30 |     for test_case in negative_test_cases:
31 |         with pytest.raises(click.BadParameter):
32 |             validate_tags(test_case)
33 | 


--------------------------------------------------------------------------------
/tests/test_flintrock.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | # External modules
  4 | import pytest
  5 | 
  6 | # Flintrock modules
  7 | from flintrock.exceptions import (
  8 |     Error,
  9 |     UsageError,
 10 | )
 11 | from flintrock.flintrock import (
 12 |     option_name_to_variable_name,
 13 |     variable_name_to_option_name,
 14 |     option_requires,
 15 |     mutually_exclusive,
 16 |     get_latest_commit,
 17 |     validate_download_source,
 18 |     normalize_keys,
 19 | )
 20 | 
 21 | 
 22 | def test_option_name_to_variable_name_conversions():
 23 |     test_cases = [
 24 |         ('--ec2-user', 'ec2_user'),
 25 |         ('--provider', 'provider'),
 26 |         ('--spark-git-commit', 'spark_git_commit')
 27 |     ]
 28 | 
 29 |     for option_name, variable_name in test_cases:
 30 |         assert option_name_to_variable_name(option_name) == variable_name
 31 |         assert variable_name_to_option_name(variable_name) == option_name
 32 |         assert option_name == variable_name_to_option_name(
 33 |             option_name_to_variable_name(option_name))
 34 |         assert variable_name == option_name_to_variable_name(
 35 |             variable_name_to_option_name(variable_name))
 36 | 
 37 | 
 38 | def test_option_requires():
 39 |     some_option = 'something'
 40 |     unset_option = None
 41 |     set_option = '와 짠이다'
 42 | 
 43 |     option_requires(
 44 |         option='--some-option',
 45 |         requires_all=['--set_option'],
 46 |         scope=locals()
 47 |     )
 48 | 
 49 |     option_requires(
 50 |         option='--some-option',
 51 |         requires_any=[
 52 |             '--set_option',
 53 |             '--unset-option'],
 54 |         scope=locals()
 55 |     )
 56 | 
 57 |     with pytest.raises(UsageError):
 58 |         option_requires(
 59 |             option='--some-option',
 60 |             requires_all=[
 61 |                 '--set-option',
 62 |                 '--unset-option'],
 63 |             scope=locals()
 64 |         )
 65 | 
 66 |     with pytest.raises(UsageError):
 67 |         option_requires(
 68 |             option='--some-option',
 69 |             requires_any=[
 70 |                 '--unset-option'],
 71 |             scope=locals()
 72 |         )
 73 | 
 74 | 
 75 | def test_option_requires_conditional_value():
 76 |     unset_option = None
 77 |     set_option = '대박'
 78 | 
 79 |     some_option = 'magic'
 80 |     option_requires(
 81 |         option='--some-option',
 82 |         conditional_value='magic',
 83 |         requires_any=[
 84 |             '--set-option',
 85 |             '--unset-option'],
 86 |         scope=locals()
 87 |     )
 88 | 
 89 |     some_option = 'not magic'
 90 |     option_requires(
 91 |         option='--some-option',
 92 |         conditional_value='magic',
 93 |         requires_any=[
 94 |             '--unset-option'],
 95 |         scope=locals()
 96 |     )
 97 | 
 98 |     some_option = ''
 99 |     option_requires(
100 |         option='--some-option',
101 |         conditional_value='',
102 |         requires_any=[
103 |             '--unset-option'],
104 |         scope=locals()
105 |     )
106 | 
107 |     with pytest.raises(UsageError):
108 |         some_option = 'magic'
109 |         option_requires(
110 |             option='--some-option',
111 |             conditional_value='magic',
112 |             requires_any=[
113 |                 '--unset-option'],
114 |             scope=locals()
115 |         )
116 | 
117 | 
118 | def test_mutually_exclusive():
119 |     option1 = 'yes'
120 |     option2 = None
121 | 
122 |     mutually_exclusive(
123 |         options=[
124 |             '--option1',
125 |             '--option2'],
126 |         scope=locals())
127 | 
128 |     option2 = 'no'
129 |     with pytest.raises(UsageError):
130 |         mutually_exclusive(
131 |             options=[
132 |                 '--option1',
133 |                 '--option2'],
134 |             scope=locals())
135 | 
136 | 
137 | @pytest.mark.xfail(
138 |     reason="This may fail on CI with HTTP Error 403: rate limit exceeded.",
139 |     raises=Exception,
140 |     condition=(os.environ.get('CI') == 'true'),
141 | )
142 | def test_get_latest_commit():
143 |     sha = get_latest_commit("https://github.com/apache/spark")
144 |     assert len(sha) == 40
145 | 
146 |     with pytest.raises(UsageError):
147 |         get_latest_commit("https://google.com")
148 | 
149 |     with pytest.raises(Exception):
150 |         get_latest_commit("https://github.com/apache/nonexistent-repo")
151 | 
152 | 
153 | @pytest.mark.xfail(
154 |     reason=(
155 |         "This test will fail whenever a new Hadoop or Spark "
156 |         "release is made, which is out of our control."
157 |     ),
158 |     raises=Error,
159 | )
160 | def test_validate_valid_download_source():
161 |     validate_download_source("https://www.apache.org/dyn/closer.lua?action=download&filename=hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz")
162 |     validate_download_source("https://www.apache.org/dyn/closer.lua?action=download&filename=spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz")
163 | 
164 | 
165 | def test_validate_invalid_download_source():
166 |     with pytest.raises(Error):
167 |         validate_download_source("https://www.apache.org/dyn/closer.lua?action=download&filename=hadoop/common/hadoop-invalid-version/hadoop-invalid-version.tar.gz")
168 | 
169 | 
170 | def test_normalize_keys():
171 |     config_file_settings = {
172 |         "java-version": 11,
173 |         "ec2": {
174 |             "spot-price": 0.05,
175 |             "key-name": "key.pem",
176 |         },
177 |         "tags": ["name, test-cluster"],
178 |     }
179 |     cli_settings = {
180 |         "java_version": 11,
181 |         "ec2": {
182 |             "spot_price": 0.05,
183 |             "key_name": "key.pem",
184 |         },
185 |         "tags": ["name, test-cluster"],
186 |     }
187 |     assert normalize_keys(config_file_settings) == cli_settings
188 | 


--------------------------------------------------------------------------------
/tests/test_pyinstaller_packaging.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import shutil
 4 | import subprocess
 5 | 
 6 | from conftest import aws_credentials_required
 7 | 
 8 | # External modules
 9 | import pytest
10 | 
11 | 
12 | def pyinstaller_exists():
13 |     return shutil.which('pyinstaller') is not None
14 | 
15 | 
16 | # PyTest doesn't let you place skipif markers on fixures. Otherwise,
17 | # we'd ideally be able to do that and all the dependent tests would be
18 | # skipped automatically.
19 | @pytest.fixture(scope='session')
20 | def pyinstaller_flintrock():
21 |     flintrock_executable_path = './dist/flintrock/flintrock'
22 |     p = subprocess.run([
23 |         'python', 'generate-standalone-package.py'
24 |     ])
25 |     assert p.returncode == 0
26 |     assert glob.glob('./dist/*.zip')
27 |     assert os.path.isfile(flintrock_executable_path)
28 |     return flintrock_executable_path
29 | 
30 | 
31 | @pytest.mark.skipif(not pyinstaller_exists(), reason="PyInstaller is required")
32 | def test_pyinstaller_flintrock_help(pyinstaller_flintrock):
33 |     p = subprocess.run(
34 |         # Without explicitly setting the locale here, Click will complain
35 |         # when this test is run via GitHub Desktop that the locale is
36 |         # misconfigured.
37 |         """
38 |         export LANG=en_US.UTF-8
39 |         {flintrock_executable}
40 |         """.format(
41 |             flintrock_executable=pyinstaller_flintrock
42 |         ),
43 |         shell=True)
44 |     assert p.returncode == 0
45 | 
46 | 
47 | @pytest.mark.skipif(not pyinstaller_exists(), reason="PyInstaller is required")
48 | @aws_credentials_required
49 | def test_pyinstaller_flintrock_describe(pyinstaller_flintrock):
50 |     # This test picks up some PyInstaller packaging issues that are not
51 |     # exposed by the help test.
52 |     p = subprocess.run(
53 |         # Without explicitly setting the locale here, Click will complain
54 |         # when this test is run via GitHub Desktop that the locale is
55 |         # misconfigured.
56 |         """
57 |         export LANG=en_US.UTF-8
58 |         {flintrock_executable} describe
59 |         """.format(
60 |             flintrock_executable=pyinstaller_flintrock,
61 |         ),
62 |         shell=True)
63 |     assert p.returncode == 0
64 | 


--------------------------------------------------------------------------------
/tests/test_scripts.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import tempfile
 4 | 
 5 | import pytest
 6 | 
 7 | 
 8 | @pytest.fixture(scope='module')
 9 | def tgz_file(request):
10 |     with tempfile.NamedTemporaryFile() as source_file:
11 |         source_file.file.write('Hi!'.encode('utf-8'))
12 |         tgz_file_name = source_file.name + '.tgz'
13 |         subprocess.run(
14 |             ['tar', 'czf', tgz_file_name, source_file.name],
15 |             check=True,
16 |         )
17 | 
18 |     def destroy():
19 |         subprocess.run(
20 |             ['rm', tgz_file_name],
21 |             check=True,
22 |         )
23 |     request.addfinalizer(destroy)
24 | 
25 |     return tgz_file_name
26 | 
27 | 
28 | def test_download_package(project_root_dir, tgz_file):
29 |     with tempfile.TemporaryDirectory() as temp_dir:
30 |         subprocess.run(
31 |             [
32 |                 'python',
33 |                 os.path.join(project_root_dir, 'flintrock/scripts/download-package.py'),
34 |                 'file://' + tgz_file,
35 |                 temp_dir,
36 |             ],
37 |             check=True,
38 |         )
39 | 


--------------------------------------------------------------------------------
/tests/test_static.py:
--------------------------------------------------------------------------------
 1 | import compileall
 2 | import os
 3 | import subprocess
 4 | 
 5 | # External modules
 6 | import yaml
 7 | 
 8 | FLINTROCK_ROOT_DIR = (
 9 |     os.path.dirname(
10 |         os.path.dirname(
11 |             os.path.realpath(__file__))))
12 | 
13 | TEST_TARGETS = [
14 |     'setup.py',
15 |     'flintrock/',
16 |     'tests/']
17 | 
18 | TEST_PATHS = [
19 |     os.path.join(FLINTROCK_ROOT_DIR, path) for path in TEST_TARGETS]
20 | 
21 | 
22 | def test_code_compiles():
23 |     for path in TEST_PATHS:
24 |         if os.path.isdir(path):
25 |             result = compileall.compile_dir(path)
26 |         else:
27 |             result = compileall.compile_file(path)
28 |         # NOTE: This is not publicly documented, but a return of 1 means
29 |         #       the compilation succeeded.
30 |         #       See: http://bugs.python.org/issue25768
31 |         assert result == 1
32 | 
33 | 
34 | def test_flake8():
35 |     ret = subprocess.call(['flake8'], cwd=FLINTROCK_ROOT_DIR)
36 |     assert ret == 0
37 | 
38 | 
39 | def test_config_template_is_valid():
40 |     config_template = os.path.join(FLINTROCK_ROOT_DIR, 'flintrock', 'config.yaml.template')
41 |     with open(config_template) as f:
42 |         yaml.safe_load(f)
43 | 


--------------------------------------------------------------------------------
/tests/test_util.py:
--------------------------------------------------------------------------------
1 | from flintrock.util import spark_hadoop_build_version
2 | 
3 | 
4 | def test_spark_hadoop_build_version():
5 |     assert spark_hadoop_build_version('3.1.3') == 'hadoop3.2'
6 | 


--------------------------------------------------------------------------------