├── .github ├── ISSUE_TEMPLATE.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── flintrock.yaml │ └── infra.yaml ├── .gitignore ├── CHANGES.md ├── CONTRIBUTING.md ├── COPYRIGHT ├── LICENSE ├── MANIFEST.in ├── README.md ├── flintrock-logo.png ├── flintrock ├── __init__.py ├── __main__.py ├── config.yaml.template ├── core.py ├── ec2.py ├── exceptions.py ├── flintrock.py ├── scripts │ ├── adoptium.repo │ ├── download-package.py │ └── setup-ephemeral-storage.py ├── services.py ├── ssh.py ├── templates │ ├── hadoop │ │ └── conf │ │ │ ├── core-site.xml │ │ │ ├── hadoop-env.sh │ │ │ ├── hdfs-site.xml │ │ │ ├── masters │ │ │ └── slaves │ └── spark │ │ └── conf │ │ ├── slaves │ │ └── spark-env.sh └── util.py ├── generate-standalone-package.py ├── hook-flintrock.py ├── make-release.sh ├── pyproject.toml ├── requirements ├── developer.in ├── developer.pip ├── maintainer.in ├── maintainer.pip ├── user.in └── user.pip ├── setup.cfg ├── setup.py ├── standalone.py ├── test-infra ├── .gitignore ├── README.md ├── bastion.tf ├── delete-test-infra.sh ├── network.tf ├── provider.tf └── variables.tf └── tests ├── README.md ├── conftest.py ├── test_acceptance.py ├── test_core.py ├── test_ec2.py ├── test_flintrock.py ├── test_pyinstaller_packaging.py ├── test_scripts.py ├── test_static.py └── test_util.py /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 5 | * Flintrock version: 6 | * Python version: 7 | * OS: 8 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | This PR makes the following changes: 2 | * 3 | * 4 | 5 | I tested this PR by... 6 | 7 | Fixes #NN. 8 | Fixes #MM. 9 | -------------------------------------------------------------------------------- /.github/workflows/flintrock.yaml: -------------------------------------------------------------------------------- 1 | name: flintrock 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | build: 13 | runs-on: ${{ matrix.os }} 14 | strategy: 15 | matrix: 16 | os: 17 | - ubuntu-20.04 18 | - macos-14 19 | python-version: 20 | # Update the artifact upload steps below if modifying 21 | # this list of Python versions. 22 | - "3.9" 23 | - "3.10" 24 | - "3.11" 25 | - "3.12" 26 | - "3.13" 27 | name: ${{ matrix.os }} / Python ${{ matrix.python-version }} 28 | steps: 29 | - uses: actions/checkout@v3 30 | - name: Set up Python 31 | uses: actions/setup-python@v3 32 | with: 33 | python-version: ${{ matrix.python-version }} 34 | - run: "pip install -r requirements/maintainer.pip" 35 | - run: "pytest" 36 | - run: python -m build 37 | - uses: actions/upload-artifact@v3 38 | # Use the latest supported Python to build a standalone package. 39 | if: ${{ matrix.python-version == '3.13' }} 40 | with: 41 | name: Flintrock Standalone - ${{ matrix.os }} 42 | path: dist/Flintrock-*-standalone-*.zip 43 | - uses: actions/upload-artifact@v3 44 | # Use the oldest supported Python to build a wheel. 45 | if: ${{ matrix.os == 'ubuntu-20.04' && matrix.python-version == '3.9' }} 46 | with: 47 | name: Flintrock Wheel 48 | path: dist/Flintrock-*.whl 49 | -------------------------------------------------------------------------------- /.github/workflows/infra.yaml: -------------------------------------------------------------------------------- 1 | name: test-infra 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | terraform-lint: 13 | runs-on: ubuntu-20.04 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: Check Terraform Formatting 17 | run: | 18 | cd test-infra 19 | terraform fmt -check -diff 20 | - name: Validate Terraform Templates 21 | run: | 22 | cd test-infra 23 | terraform init 24 | terraform validate 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Started from: https://github.com/github/gitignore/blob/master/Python.gitignore 2 | 3 | .vscode/ 4 | .metals/ 5 | config.yaml 6 | venv/ 7 | *example.py 8 | flintrock-logo/ 9 | .hypothesis/ 10 | *.prf 11 | .DS_Store 12 | .pytest_cache/ 13 | /resources/ 14 | .python-version 15 | 16 | # Byte-compiled / optimized / DLL files 17 | __pycache__/ 18 | *.py[cod] 19 | 20 | # C extensions 21 | *.so 22 | 23 | # Distribution / packaging 24 | .Python 25 | env/ 26 | build/ 27 | develop-eggs/ 28 | dist/ 29 | downloads/ 30 | eggs/ 31 | .eggs/ 32 | lib/ 33 | lib64/ 34 | parts/ 35 | sdist/ 36 | var/ 37 | *.egg-info/ 38 | .installed.cfg 39 | *.egg 40 | 41 | # PyInstaller 42 | # Usually these files are written by a python script from a template 43 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 44 | *.manifest 45 | *.spec 46 | 47 | # Installer logs 48 | pip-log.txt 49 | pip-delete-this-directory.txt 50 | 51 | # Unit test / coverage reports 52 | htmlcov/ 53 | .tox/ 54 | .coverage 55 | .coverage.* 56 | .cache 57 | nosetests.xml 58 | coverage.xml 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## [Unreleased] 4 | 5 | [Unreleased]: https://github.com/nchammas/flintrock/compare/v2.1.0...master 6 | 7 | ### Changed 8 | 9 | * [#383]: Dropped support for Python 3.8 and added CI build for Python 3.13. 10 | 11 | [#383]: https://github.com/nchammas/flintrock/pull/383 12 | 13 | ## [2.1.0] - 2023-11-26 14 | 15 | [2.1.0]: https://github.com/nchammas/flintrock/compare/v2.0.0...2.1.0 16 | 17 | ### Changed 18 | 19 | * [#348], [#367]: Bumped default Spark to 3.5.0 and default Hadoop to 3.3.6; dropped support for Python 3.6 and 3.7; added CI builds for Python 3.10, 3.11, and 3.12. 20 | * [#361]: Migrated from AdoptOpenJDK, which is deprecated, to Adoptium OpenJDK. 21 | * [#362], [#366]: Improved Flintrock's ability to cleanup after launch failures. 22 | * [#366]: Deprecated `--ec2-spot-request-duration`, which is not needed for one-time spot instances launched using the RunInstances API. 23 | * [#369]: Adopted `pyproject.toml` and tweaked Flintrock's Python packaging accordingly. This keeps Flintrock in line with modern Python packaging standards and should be transparent to end-users. 24 | 25 | [#348]: https://github.com/nchammas/flintrock/pull/348 26 | [#361]: https://github.com/nchammas/flintrock/pull/361 27 | [#362]: https://github.com/nchammas/flintrock/pull/362 28 | [#366]: https://github.com/nchammas/flintrock/pull/366 29 | [#367]: https://github.com/nchammas/flintrock/pull/367 30 | [#369]: https://github.com/nchammas/flintrock/pull/369 31 | 32 | ## [2.0.0] - 2021-06-10 33 | 34 | [2.0.0]: https://github.com/nchammas/flintrock/compare/v1.0.0...v2.0.0 35 | 36 | ### Added 37 | 38 | * [#296]: Added support for launching clusters into private VPCs. This includes new infrastructure added in [#302] to support testing against private VPCs. 39 | * [#307]: Added support for Hadoop/HDFS 3.x. 40 | * [#315]: Added a new `--ec2-spot-request-duration` option to support setting the EC2 spot request duration. 41 | * [#316]: Added a new `--java-version` option and support for Java 11. 42 | * [#323]: Flintrock now automatically selects the correct build of Spark to use, based on the version of Hadoop/HDFS that you specify. 43 | * [#324]: Flintrock now supports S3 URLs as a download source for Hadoop or Spark. This makes it easy to host your own copies of the Hadoop and Spark release builds in a private bucket. 44 | 45 | [#296]: https://github.com/nchammas/flintrock/pull/296 46 | [#302]: https://github.com/nchammas/flintrock/pull/302 47 | [#307]: https://github.com/nchammas/flintrock/pull/307 48 | [#315]: https://github.com/nchammas/flintrock/pull/315 49 | [#316]: https://github.com/nchammas/flintrock/pull/316 50 | [#323]: https://github.com/nchammas/flintrock/pull/323 51 | [#324]: https://github.com/nchammas/flintrock/pull/324 52 | 53 | ### Changed 54 | 55 | * [#285]: Flintrock now configures cluster nodes to use private IP addresses for internal communication. This should improve the reliability of cluster launches and restarts. 56 | * [#304]: Fixed a bug in how `UserData` scripts are submitted to new cluster slaves. 57 | * [#311]: Changed how Flintrock manages its own security groups to reduce the likelihood of hitting any limits on the number of rules per security group. 58 | * [#326]: Switched some internals from using host names to IP addresses, which should improve Flintrock's behavior when running from an EC2 host. 59 | * [#329]: Dropped support for Python 3.5 and added automated testing for Python 3.8 and 3.9. 60 | * [#334]: Flintrock now ensures that `python3` is available on launched clusters and sets that as the default Python that PySpark will use. 61 | 62 | [#285]: https://github.com/nchammas/flintrock/pull/285 63 | [#304]: https://github.com/nchammas/flintrock/pull/304 64 | [#311]: https://github.com/nchammas/flintrock/pull/311 65 | [#326]: https://github.com/nchammas/flintrock/pull/326 66 | [#329]: https://github.com/nchammas/flintrock/pull/329 67 | [#334]: https://github.com/nchammas/flintrock/pull/334 68 | 69 | ## [1.0.0] - 2020-01-11 70 | 71 | [1.0.0]: https://github.com/nchammas/flintrock/compare/v0.11.0...v1.0.0 72 | 73 | ### Changed 74 | 75 | * [#297]: Dropped support for Python 3.4. 76 | * [#252]: Flintrock now pins all its transitive dependencies via the files under `requirements/`. This is useful for users who want to build Flintrock themselves. 77 | 78 | [#297]: https://github.com/nchammas/flintrock/pull/297 79 | [#252]: https://github.com/nchammas/flintrock/pull/252 80 | 81 | ## [0.11.0] - 2018-12-02 82 | 83 | [0.11.0]: https://github.com/nchammas/flintrock/compare/v0.10.0...v0.11.0 84 | 85 | ### Changed 86 | 87 | * [#258], [#268]: Fixed up support for Python 3.7. 88 | * [#264]: Fixed a logging error in `flintrock describe --master-hostname-only`. 89 | * [#277]: Fixed a bug in resolving client IP addresses from behind proxy. 90 | 91 | [#258]: https://github.com/nchammas/flintrock/pull/258 92 | [#264]: https://github.com/nchammas/flintrock/pull/264 93 | [#268]: https://github.com/nchammas/flintrock/pull/268 94 | [#277]: https://github.com/nchammas/flintrock/pull/277 95 | 96 | ## [0.10.0] - 2018-07-15 97 | 98 | [0.10.0]: https://github.com/nchammas/flintrock/compare/v0.9.0...v0.10.0 99 | 100 | ### Added 101 | 102 | * [#242]: Flintrock is now available on Homebrew: 103 | ``` 104 | brew install flintrock 105 | ``` 106 | This is a community-supported distribution. 107 | 108 | [#242]: https://github.com/nchammas/flintrock/pull/242 109 | 110 | ### Changed 111 | 112 | * [#224]: Fixed a problem with some Flintrock config combinations 113 | related to Hadoop. 114 | * [#232]: When you destroy a cluster, Flintrock now waits until the 115 | instances are completely terminated before returning. 116 | * [#234]: Flintrock now tries more times by default to connect via 117 | SSH, which should provide more launch stability in certain 118 | environments. 119 | * [#246]: Fixed some bugs with `flintrock describe` that are exposed 120 | when a cluster is transitioning states (e.g. from running to 121 | terminated). 122 | * [#249]: **Flintrock now downloads both Spark and Hadoop from Apache 123 | mirrors by default.** This is a significant change. You can read the 124 | background on what prompted this change in [#238]. 125 | * [#254]: Flintrock no longer configures hadoop-aws automatically due 126 | to version incompatibilities that are difficult to resolve 127 | automatically. Instead, the README now provides additional guidance 128 | on using `s3a://`. 129 | * [#259]: Flintrock now correctly ignores tiny devices that show up 130 | on some instance types, like the M5 series on EC2. This fixes the 131 | problems Flintrock had getting HDFS to work on those instance 132 | types. 133 | 134 | [#224]: https://github.com/nchammas/flintrock/pull/224 135 | [#232]: https://github.com/nchammas/flintrock/pull/232 136 | [#234]: https://github.com/nchammas/flintrock/pull/234 137 | [#238]: https://github.com/nchammas/flintrock/pull/238 138 | [#246]: https://github.com/nchammas/flintrock/pull/246 139 | [#249]: https://github.com/nchammas/flintrock/pull/249 140 | [#254]: https://github.com/nchammas/flintrock/pull/254 141 | [#259]: https://github.com/nchammas/flintrock/pull/259 142 | 143 | ## [0.9.0] - 2017-08-06 144 | 145 | [0.9.0]: https://github.com/nchammas/flintrock/compare/v0.8.0...v0.9.0 146 | 147 | ### Added 148 | 149 | * [#178]: You can now see additional output during launch and other 150 | operations with the new `--debug` option. 151 | * [#185]: Added a new mount point under `/media/tmp` that can be used 152 | when `/tmp` is not big enough. 153 | * [#186]: You can now tag your clusters with arbitrary tags on launch 154 | using the new `--ec2-tag` option. (Remember: As with all options, 155 | you can also set this via `flintrock configure`.) 156 | * [#191]: You can now specify the size of the root EBS volume with the 157 | new `--ec2-min-root-ebs-size-gb` option. 158 | * [#181]: You can now set the number of executors per worker with 159 | `--spark-executor-instances`. 160 | 161 | [#178]: https://github.com/nchammas/flintrock/pull/178 162 | [#185]: https://github.com/nchammas/flintrock/pull/185 163 | [#186]: https://github.com/nchammas/flintrock/pull/186 164 | [#191]: https://github.com/nchammas/flintrock/pull/191 165 | [#181]: https://github.com/nchammas/flintrock/pull/181 166 | 167 | ### Changed 168 | 169 | * [#195]: After launching a new cluster, Flintrock now shows the 170 | master address and login command. 171 | * [#196], [#197]: Fixed some bugs that were preventing Flintrock from 172 | launching Spark clusters at a specific commit. 173 | * [#204]: Flintrock now automatically retries starting the Spark and 174 | HDFS masters if it encounters common issues with bringing the 175 | cluster up. This greatly improves launch and restart reliability. 176 | * [#208]: Flintrock now provides a hint with possible causes for 177 | certain SSH errors. 178 | 179 | [#195]: https://github.com/nchammas/flintrock/pull/195 180 | [#196]: https://github.com/nchammas/flintrock/pull/196 181 | [#197]: https://github.com/nchammas/flintrock/pull/197 182 | [#204]: https://github.com/nchammas/flintrock/pull/204 183 | [#208]: https://github.com/nchammas/flintrock/pull/208 184 | 185 | ## [0.8.0] - 2017-02-11 186 | 187 | [0.8.0]: https://github.com/nchammas/flintrock/compare/v0.7.0...v0.8.0 188 | 189 | ### Added 190 | 191 | * [#180]: Accessing data on S3 from your Flintrock cluster is now much 192 | easier! Just configure Flintrock to use Hadoop 2.7+ (which is the 193 | default) and an appropriate IAM role, and you'll be able to access 194 | paths on S3 using the new `s3a://` prefix. [Check the README] for 195 | more information. 196 | * [#176], [#187]: Flintrock now supports users with non-standard home 197 | directories. 198 | 199 | [#180]: https://github.com/nchammas/flintrock/pull/180 200 | [#176]: https://github.com/nchammas/flintrock/pull/176 201 | [#187]: https://github.com/nchammas/flintrock/pull/187 202 | [Check the README]: https://github.com/nchammas/flintrock/tree/v0.8.0#accessing-data-on-s3 203 | 204 | ### Changed 205 | 206 | * [#168]: Flintrock now does a better job of cleaning up after 207 | interrupted operations. 208 | * [#179], [#184]: Flintrock can now clean up malformed Flintrock 209 | clusters. 210 | * [`6b426ae`]: We fixed an issue affecting some users of Flintrock's 211 | standalone package that caused Flintrock to intermittently throw 212 | `ImportError`s. 213 | 214 | [#168]: https://github.com/nchammas/flintrock/pull/168 215 | [#179]: https://github.com/nchammas/flintrock/pull/179 216 | [#184]: https://github.com/nchammas/flintrock/pull/184 217 | [`6b426ae`]: https://github.com/nchammas/flintrock/commit/6b426aedc7e92b434021cc09c6e7eb181fca7eef 218 | 219 | ## [0.7.0] - 2016-11-15 220 | 221 | [0.7.0]: https://github.com/nchammas/flintrock/compare/v0.6.0...v0.7.0 222 | 223 | ### Added 224 | 225 | * [#146]: Flintrock now ensures that launched clusters have Java 8 or 226 | higher installed. 227 | * [#149]: You can now specify an [EC2 user data] script to use on launch 228 | with the new `--ec2-user-data` option. 229 | 230 | [#146]: https://github.com/nchammas/flintrock/pull/146 231 | [#149]: https://github.com/nchammas/flintrock/pull/149 232 | [EC2 user data]: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html 233 | 234 | ### Changed 235 | 236 | * [#154], [#155], [#156]: Flintrock now provides friendly error messages 237 | when it encounters common configuration or setup problems. 238 | 239 | [#154]: https://github.com/nchammas/flintrock/pull/154 240 | [#155]: https://github.com/nchammas/flintrock/pull/155 241 | [#156]: https://github.com/nchammas/flintrock/pull/156 242 | 243 | ## [0.6.0] - 2016-08-28 244 | 245 | [0.6.0]: https://github.com/nchammas/flintrock/compare/v0.5.0...v0.6.0 246 | 247 | ### Added 248 | 249 | * [#115]: Flintrock can now resize existing clusters with the new 250 | `add-slaves` and `remove-slaves` commands. 251 | 252 | [#115]: https://github.com/nchammas/flintrock/pull/115 253 | 254 | ### Changed 255 | 256 | * [#115]: If you lost your master somehow, Flintrock can now still 257 | destroy the cluster. 258 | * [#115]: You can no longer launch clusters with 0 slaves. The 259 | implementation was broken. We may fix and add this capability back 260 | in the future. 261 | 262 | ## [0.5.0] - 2016-07-20 263 | 264 | [0.5.0]: https://github.com/nchammas/flintrock/compare/v0.4.0...v0.5.0 265 | 266 | ### Added 267 | 268 | * [#118]: You can now specify `--hdfs-download-source` (or the 269 | equivalent in your config file) to tell Flintrock to download Hadoop 270 | from a specific URL when launching your cluster. 271 | * [#125]: You can now specify `--spark-download-source` (or the 272 | equivalent in your config file) to tell Flintrock to download Spark 273 | from a specific URL when launching your cluster. 274 | * [#112]: You can now specify `--ec2-security-group` to associate 275 | additional security groups with your cluster on launch. 276 | 277 | [#118]: https://github.com/nchammas/flintrock/pull/118 278 | [#125]: https://github.com/nchammas/flintrock/pull/125 279 | [#112]: https://github.com/nchammas/flintrock/pull/112 280 | 281 | ### Changed 282 | 283 | * [#103], [#114]: Flintrock now opens port 6066 and 7077 so local 284 | clients like Apache Zeppelin can connect directly to the Spark 285 | master on the cluster. 286 | * [#122]: Flintrock now automatically adds executables like 287 | `spark-submit`, `pyspark`, and `hdfs` to the default `PATH`, so 288 | they're available to call right when you login to the cluster. 289 | 290 | [#103]: https://github.com/nchammas/flintrock/pull/103 291 | [#114]: https://github.com/nchammas/flintrock/pull/114 292 | [#122]: https://github.com/nchammas/flintrock/pull/122 293 | 294 | ## [0.4.0] - 2016-03-27 295 | 296 | [0.4.0]: https://github.com/nchammas/flintrock/compare/v0.3.0...v0.4.0 297 | 298 | ### Added 299 | 300 | * [#98], [#99]: You can now specify `latest` for `--spark-git-commit` 301 | and Flintrock will automatically build Spark on your cluster at the 302 | latest commit. This feature is only available for Spark repos 303 | hosted on GitHub. 304 | * [#94]: Flintrock now supports launching clusters into non-default 305 | VPCs. 306 | 307 | [#94]: https://github.com/nchammas/flintrock/pull/94 308 | [#98]: https://github.com/nchammas/flintrock/pull/98 309 | [#99]: https://github.com/nchammas/flintrock/pull/99 310 | 311 | ### Changed 312 | 313 | * [#86]: Flintrock now correctly catches when spot requests fail and 314 | bubbles up an appropriate error message. 315 | * [#93], [#97]: Fixed the ability to build Spark from git. (It was 316 | broken for recent commits.) 317 | * [#96], [#100]: Flintrock launches should now work correctly whether 318 | the default Python on the cluster is Python 2.7 or Python 3.4+. 319 | 320 | [#86]: https://github.com/nchammas/flintrock/pull/86 321 | [#93]: https://github.com/nchammas/flintrock/pull/93 322 | [#96]: https://github.com/nchammas/flintrock/pull/96 323 | [#97]: https://github.com/nchammas/flintrock/pull/97 324 | [#100]: https://github.com/nchammas/flintrock/pull/100 325 | 326 | ## [0.3.0] - 2016-02-14 327 | 328 | [0.3.0]: https://github.com/nchammas/flintrock/compare/v0.2.0...v0.3.0 329 | 330 | ### Changed 331 | 332 | * [`eca59fc`], [`3cf6ee6`]: Tweaked a few things so that Flintrock 333 | can launch 200+ node clusters without hitting certain limits. 334 | 335 | [`eca59fc`]: https://github.com/nchammas/flintrock/commit/eca59fc0052874d9aa48b7d4d7d79192b5e609d1 336 | [`3cf6ee6`]: https://github.com/nchammas/flintrock/commit/3cf6ee64162ceaac6429d79c3bc6ef25988eaa8e 337 | 338 | ## [0.2.0] - 2016-02-07 339 | 340 | [0.2.0]: https://github.com/nchammas/flintrock/compare/v0.1.0...v0.2.0 341 | 342 | ### Added 343 | 344 | * [`b00fd12`]: Added `--assume-yes` option to the `launch` command. 345 | Use `--assume-yes` to tell Flintrock to automatically destroy the 346 | cluster if there are problems during launch. 347 | 348 | [`b00fd12`]: https://github.com/nchammas/flintrock/commit/b00fd128f36e0a05dafca69b26c4d1b190fa42c9 349 | 350 | ### Changed 351 | 352 | * [#69]: Automatically retry Hadoop download from flaky Apache 353 | mirrors. 354 | * [`0df7004`]: Delete unneeded security group after a cluster is 355 | destroyed. 356 | * [`244f734`]: Default HDFS not to install. Going forward, Spark will 357 | be the only service that Flintrock installs by default. Defaults can 358 | easily be changed via Flintrock's config file. 359 | * [`de33412`]: Flintrock installs services, not modules. The 360 | terminology has been updated accordingly throughout the code and 361 | docs. Update your config file to use `services` instead of 362 | `modules`. **Warning**: Flintrock will have problems managing 363 | existing clusters that were launched with versions of Flintrock from 364 | before this change. 365 | * [#73]: Major refactoring of Flintrock internals. 366 | * [#74]: Flintrock now catches common configuration problems upfront 367 | and provides simple error messages, instead of barfing out errors 368 | from EC2 or launching broken clusters. 369 | * [`bf766ba`]: Fixed a bug in how Flintrock polls SSH availability 370 | from Linux. Cluster launches now work from Linux as intended. 371 | 372 | [#69]: https://github.com/nchammas/flintrock/pull/69 373 | [`0df7004`]: https://github.com/nchammas/flintrock/commit/0df70043f3da215fe699165bc961bd0c4ba4ea88 374 | [`244f734`]: https://github.com/nchammas/flintrock/commit/244f7345696d1b8cec1d1b575a304b9bd9a77840 375 | [`de33412`]: https://github.com/nchammas/flintrock/commit/de3341221ca8d57f5a465b13f07c8e266ae11a59 376 | [#73]: https://github.com/nchammas/flintrock/pull/73 377 | [#74]: https://github.com/nchammas/flintrock/pull/74 378 | [`bf766ba`]: https://github.com/nchammas/flintrock/commit/bf766ba48f12a8752c2e32f9b3daf29501c30866 379 | 380 | ## [0.1.0] - 2015-12-11 381 | 382 | [0.1.0]: https://github.com/nchammas/flintrock/releases/tag/v0.1.0 383 | 384 | * Initial release. 385 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guide 2 | 3 | There are many ways to contribute to Flintrock. 4 | 5 | ## Contributing Thanks 6 | 7 | When we put our time and enthusiasm into an open source project like this, we hope that somewhere out there we are putting a smile on someone's face. 8 | 9 | Most of the time we'll never know, though. When people reach out within an open source community, it's typically to report a problem, ask for help, or share an idea. 10 | 11 | That's a bummer, because hearing first-hand that we made a positive impact on someone else's day, even if it's minor, can be a huge boost of joy and motivation. 12 | 13 | Don't underestimate the power of a thank you. If Flintrock helped you in some way, share your story, even if it's "trivial", and know that at times this can be the most valuable way to contribute to the project. 14 | 15 | 16 | ## Contributing Money 17 | 18 | Most projects have various kinds of tests to make sure things are working correctly. The most valuable test for an orchestration tool like Flintrock is a full acceptance test, since the fundamental thing Flintrock does is manage remote resources. 19 | 20 | This means that as Flintrock developers we are always launching and destroying instances on some cloud provider, which costs money. Any money you contribute will go towards paying those bills. 21 | 22 | We're still figuring out how best to accept donations for these purposes, but [Amazon Allowance](http://www.amazon.com/b?ie=UTF8&node=11453461011) looks promising. 23 | 24 | 25 | ## Contributing Bug Reports 26 | 27 | When reporting a bug, do your best to provide a [short, self contained, and correct example](http://sscce.org/) of the problem you are seeing. Bug reports will otherwise likely be ignored, unless they are really easy to reproduce. 28 | 29 | In addition to reporting bugs, you can also confirm or deny existing bug reports. This helps us prioritize bug fixes and understand if certain bugs are limited to certain configurations. 30 | 31 | 32 | ## Contributing Feature Requests 33 | 34 | ### Describe your problem first, not just your solution 35 | 36 | What are you trying to do? Explain the root problem clearly. **This is more important than describing your proposed solution.** 37 | 38 | When we understand your feature request in the context of what you are really trying to do, we can better evaluate any proposed solutions and perhaps even come up with a better solution that you might not see. 39 | 40 | Describing your original problem or use case will also help us avoid the [X-Y Problem](http://mywiki.wooledge.org/XyProblem), which can waste a lot of everyone's time. 41 | 42 | If you see an existing feature request that you are interested in, chime in. Your input will help us flesh out the request and understand how much demand there is for it. 43 | 44 | 45 | ## Contributing Code 46 | 47 | Sometimes, you just wanna write some code. Just keep these guidelines in mind before you do that if you want your code contribution accepted. 48 | 49 | ### License 50 | 51 | Unless you explicitly tell us otherwise, when you contribute code you affirm that the contribution is your original work and that you license it to the project under the project's [license](LICENSE). 52 | 53 | Please make sure that you are OK with our license's terms before contributing code. 54 | 55 | ### Setup 56 | 57 | If you agree to our license, the next thing you'll want to do is get Flintrock's source code and install its development dependencies. 58 | 59 | ```sh 60 | git clone https://github.com/nchammas/flintrock 61 | cd flintrock 62 | 63 | python3 -m venv venv 64 | source venv/bin/activate 65 | 66 | pip3 install -r requirements/developer.pip 67 | ``` 68 | 69 | When you `git pull` the latest changes, don't forget to also rerun the `pip install` step so that Flintrock's dependencies stay up-to-date. 70 | 71 | ### Trivial bug fixes or changes 72 | 73 | If you're making a small change, go right ahead and open that pull request. There's no need to coordinate beforehand. 74 | 75 | ### New features, non-trivial changes 76 | 77 | There are a few things you should do before diving in to write a new feature or implement some non-trivial change. 78 | 79 | ### Changing dependencies 80 | 81 | If you are changing anything about Flintrock's dependencies, be sure to update the compiled requirements using [pip-tools] and the lowest version of Python that Flintrock supports (Python 3.9): 82 | 83 | [pip-tools]: https://github.com/jazzband/pip-tools 84 | 85 | ```shell 86 | function update-deps() { 87 | pip install -U "pip-tools==7.3.0" 88 | 89 | pip-compile -U requirements/user.in -o requirements/user.pip 90 | pip-compile -U requirements/developer.in -o requirements/developer.pip 91 | pip-compile -U requirements/maintainer.in -o requirements/maintainer.pip 92 | 93 | # Uncomment whichever set of requirements makes sense for you. 94 | # pip-sync requirements/user.pip 95 | # pip-sync requirements/developer.pip 96 | # pip-sync requirements/maintainer.pip 97 | } 98 | 99 | update-deps 100 | ``` 101 | 102 | `pip-compile` takes the provided set of input requirements, like `user.in` and compiles them into a full list of pinned transitive dependencies, like `user.pip`. This is similar to a lock file. `pip-sync` ensures that the current active virtual environment has exactly the dependencies listed in the provided pip file, no more and no less. 103 | 104 | #### Coordinate first 105 | 106 | Coordinating first means starting a discussion with the core developers to get a sense of how to approach the problem you want to work on. 107 | 108 | If you don't do this and just submit a pull request out of the blue, there is a good chance you will write something that is unwanted, either because it doesn't fit the project, or because it was implemented in an undesirable way. 109 | 110 | This doesn't mean that you need to wait for some official blessing before doing any interesting work. It just means that your chances of getting your work merged rise considerably when that work has had some input from those closest to the project. 111 | 112 | #### Weigh the maintenance burden 113 | 114 | Programming can be like intercourse. A neat new feature can cranked out after a passionate night of coding, but -- if accepted into the project -- it has to be maintained for years, often at much greater cumulative cost than what the initial implementation took. 115 | 116 | When building something new, don't just consider the value it will provide. Consider also how much work it will take to keep it working over the years. Is it worth it in the long run? This is doubly important if you don't see yourself sticking around to take care of your baby. How easy will it be for others take responsibility for your work? 117 | 118 | #### Capture one idea in one pull request 119 | 120 | *Note: This section is largely a summary of the [guidance given here](https://secure.phabricator.com/book/phabflavor/article/recommendations_on_revision_control/) by Evan Priestley of the Phabricator project.* 121 | 122 | Make sure each pull request you submit captures a single coherent idea. This limits the scope of any given pull request and makes it much easier for a reviewer to understand what you are doing and give precise feedback. Don't mix logically independent changes in the same request if they can be submitted separately. 123 | 124 | #### Expect many revisions 125 | 126 | If you are adding or touching lots of code, then be prepared to go through many rounds of revisions before your pull request is accepted. This is normal, especially as you are still getting acquainted with the project's standards and style. 127 | 128 | ### Test your changes 129 | 130 | Whether your changes are big or small, you'll want to test them. Flintrock includes [tests](./tests/) which you should use. 131 | 132 | ### Don't expand the support matrix 133 | 134 | We will generally reject contributions that expand the number of operating systems, configurations, or languages that Flintrock supports, because they impose a large maintenance burden on the project over its lifespan. In some cases this might mean rejecting contributions that could significantly expand the project's potential user base. 135 | 136 | We accept this tradeoff because we have seen popular open source projects go to decay because their maintenance burden grew large enough to kill the fun of the project for the core developers. 137 | 138 | Small open source projects like Flintrock, which do not have the backing of a company, run on the free time and interest of contributors. Keeping the project's maintenance burden as small as possible, sometimes at the cost of reach, makes it more likely that contributors will continue to take interest in the project for a long time. This better serves our user base over the long run. 139 | -------------------------------------------------------------------------------- /COPYRIGHT: -------------------------------------------------------------------------------- 1 | Copyright 2015 Nicholas Chammas 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use the files in this repository except in compliance with 5 | the License. 6 | 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2024 Nicholas Chammas 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # See: https://setuptools.pypa.io/en/latest/userguide/miscellaneous.html 2 | graft flintrock 3 | 4 | include README.md 5 | include CHANGES.md 6 | include COPYRIGHT 7 | include LICENSE 8 | 9 | global-exclude *.py[cod] __pycache__ .DS_Store 10 | global-exclude config.yaml 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Flintrock logo](https://raw.githubusercontent.com/nchammas/flintrock/master/flintrock-logo.png) 2 | 3 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/nchammas/flintrock/blob/master/LICENSE) 4 | [![Build Status](https://github.com/nchammas/flintrock/actions/workflows/flintrock.yaml/badge.svg)](https://github.com/nchammas/flintrock/actions) 5 | 6 | Flintrock is a command-line tool for launching [Apache Spark](http://spark.apache.org/) clusters. 7 | 8 | 9 | ## Flintrock around the web 10 | 11 | Flintrock has been featured in a few talks, guides, and papers around the web. 12 | 13 | * Talks: 14 | * [Flintrock: A faster, better spark-ec2](https://www.youtube.com/watch?v=3aeIpOGrJOA) ([slides](http://www.slideshare.net/SparkSummit/flintrock-a-faster-better-sparkec2-by-nicholas-chammas)) 15 | * Guides: 16 | * Running Spark on a Cluster: The Basics (using Flintrock) 17 | * [Part 1: Start a Spark Cluster and Use the spark-shell](http://heather.miller.am/blog/launching-a-spark-cluster-part-1.html) 18 | * [Part 2: Dependencies, S3, and Deploying via spark-submit](http://heather.miller.am/blog/launching-a-spark-cluster-part-2.html) 19 | * [Spark with Jupyter on AWS](https://github.com/PiercingDan/spark-Jupyter-AWS) 20 | * [Building a data science platform for R&D, part 2 – Deploying Spark on AWS using Flintrock](https://alexioannides.com/2016/08/18/building-a-data-science-platform-for-rd-part-2-deploying-spark-on-aws-using-flintrock/) 21 | * [AWS EC2를 활용 스파크 클러스터 생성](http://statkclee.github.io/ml/ml-aws-ec2-flintrock.html) 22 | * Papers: 23 | * ["Birds in the Clouds": Adventures in Data Engineering](https://arxiv.org/pdf/1710.08521.pdf) 24 | 25 | 26 | ## Usage 27 | 28 | Here's a quick way to launch a cluster on EC2, assuming you already have an [AWS account set up](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/get-set-up-for-amazon-ec2.html). Flintrock works best with Amazon Linux. You can get the latest AMI IDs [from here](https://aws.amazon.com/amazon-linux-2/release-notes/). 29 | 30 | ```sh 31 | flintrock launch test-cluster \ 32 | --num-slaves 1 \ 33 | --spark-version 3.5.0 \ 34 | --ec2-key-name key_name \ 35 | --ec2-identity-file /path/to/key.pem \ 36 | --ec2-ami ami-0588935a949f9ff17 \ 37 | --ec2-user ec2-user 38 | ``` 39 | 40 | If you [persist these options to a file](#configurable-cli-defaults), you'll be able to do the same thing much more concisely: 41 | 42 | ```sh 43 | flintrock configure 44 | # Save your preferences via the opened editor, then... 45 | flintrock launch test-cluster 46 | ``` 47 | 48 | Once you're done using a cluster, don't forget to destroy it with: 49 | 50 | ```sh 51 | flintrock destroy test-cluster 52 | ``` 53 | 54 | Other things you can do with Flintrock include: 55 | 56 | ```sh 57 | flintrock login test-cluster 58 | flintrock describe test-cluster 59 | flintrock add-slaves test-cluster --num-slaves 2 60 | flintrock remove-slaves test-cluster --num-slaves 1 61 | flintrock run-command test-cluster 'sudo yum install -y package' 62 | flintrock copy-file test-cluster /local/path /remote/path 63 | ``` 64 | 65 | To see what else Flintrock can do, or to see detailed help for a specific command, try: 66 | 67 | ```sh 68 | flintrock --help 69 | flintrock --help 70 | ``` 71 | 72 | That's not all. Flintrock has a few more [features](#features) that you may find interesting. 73 | 74 | ### Accessing data on S3 75 | 76 | We recommend you access data on S3 from your Flintrock cluster by following 77 | these steps: 78 | 79 | 1. Setup an [IAM Role](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html) 80 | that grants access to S3 as desired. Reference this role when you launch 81 | your cluster using the `--ec2-instance-profile-name` option (or its 82 | equivalent in your `config.yaml` file). 83 | 2. Reference S3 paths in your Spark code using the `s3a://` prefix. `s3a://` is 84 | backwards compatible with `s3n://` and replaces both `s3n://` and `s3://`. 85 | The Hadoop project [recommends using `s3a://`](https://hadoop.apache.org/docs/current/hadoop-aws/tools/hadoop-aws/index.html#S3A) 86 | since it is actively developed, supports larger files, and offers 87 | better performance. 88 | 3. Make sure Flintrock is configured to use Hadoop/HDFS 2.7+. Earlier 89 | versions of Hadoop do not have solid implementations of `s3a://`. 90 | Flintrock's default is Hadoop 3.3.6, so you don't need to do anything 91 | here if you're using a vanilla configuration. 92 | 4. Call Spark with the hadoop-aws package to enable `s3a://`. For example: 93 | ```sh 94 | spark-submit --packages org.apache.hadoop:hadoop-aws:3.3.6 my-app.py 95 | pyspark --packages org.apache.hadoop:hadoop-aws:3.3.6 96 | ``` 97 | If you have issues using the package, consult the [hadoop-aws troubleshooting 98 | guide](http://hadoop.apache.org/docs/current/hadoop-aws/tools/hadoop-aws/index.html) 99 | and try adjusting the version. As a rule of thumb, you should match the version 100 | of hadoop-aws to the version of Hadoop that Spark was built against (which is 101 | typically Hadoop 3.2 or 2.7), even if the version of Hadoop that you're deploying to 102 | your Flintrock cluster is different. 103 | 104 | With this approach you don't need to copy around your AWS credentials 105 | or pass them into your Spark programs. As long as the assigned IAM role 106 | allows it, Spark will be able to read and write data to S3 simply by 107 | referencing the appropriate path (e.g. `s3a://bucket/path/to/file`). 108 | 109 | 110 | ## Installation 111 | 112 | Before using Flintrock, take a quick look at the 113 | [copyright](https://github.com/nchammas/flintrock/blob/master/COPYRIGHT) 114 | notice and [license](https://github.com/nchammas/flintrock/blob/master/LICENSE) 115 | and make sure you're OK with their terms. 116 | 117 | **Flintrock requires Python 3.9 or newer**, unless you are using one 118 | of our **standalone packages**. Flintrock has been thoroughly tested 119 | only on OS X, but it should run on all POSIX systems. 120 | A motivated contributor should be able to add 121 | [Windows support](https://github.com/nchammas/flintrock/issues/46) 122 | without too much trouble, too. 123 | 124 | ### Release version 125 | 126 | To get the latest release of Flintrock, simply install it with [pip][pip]. 127 | 128 | Since Flintrock is a command-line application rather than a library, you may prefer to 129 | install it using [pipx][pipx], which automatically takes care of installing Flintrock to 130 | an isolated virtual environment for you. 131 | 132 | [pip]: https://pip.pypa.io/en/stable/ 133 | [pipx]: https://pypa.github.io/pipx/ 134 | 135 | ``` 136 | pipx install flintrock 137 | ``` 138 | 139 | This will install Flintrock and place it on your path. You should be good to go now! 140 | 141 | You'll probably want to get started with the following two commands: 142 | 143 | ```sh 144 | flintrock --help 145 | flintrock configure 146 | ``` 147 | 148 | ### Standalone version (Python not required!) 149 | 150 | We used to publish standalone versions of Flintrock that don't require you to have Python 151 | installed on your machine. Since Flintrock 2.1.0, we have stopped publishing these 152 | standalone builds. 153 | 154 | If you used these standalone packages, please [chime in on this issue][standalone] and 155 | share a bit about your environment and use case. 156 | 157 | [standalone]: https://github.com/nchammas/flintrock/issues/370 158 | 159 | ### Community-supported distributions 160 | 161 | Flintrock is also available via the following package managers: 162 | 163 | * [Homebrew](https://brew.sh): `brew install flintrock` 164 | 165 | These packages are not supported by the core contributors and **may be out of date**. Please reach out to the relevant communities directly if you have trouble using these distributions to install Flintrock. You can always find the latest release of Flintrock [on GitHub](https://github.com/nchammas/flintrock/releases/latest) and [on PyPI](https://pypi.org/project/Flintrock/). 166 | 167 | ### Development version 168 | 169 | If you like living on the edge, install the development version of Flintrock: 170 | 171 | ```sh 172 | pipx install git+https://github.com/nchammas/flintrock 173 | ``` 174 | 175 | If you want to [contribute](https://github.com/nchammas/flintrock/blob/master/CONTRIBUTING.md), follow the instructions in our contributing guide on [how to install Flintrock](https://github.com/nchammas/flintrock/blob/master/CONTRIBUTING.md#contributing-code). 176 | 177 | ## Use Cases 178 | 179 | ### Experimentation 180 | 181 | If you want to play around with Spark, develop a prototype application, run a one-off job, or otherwise just experiment, Flintrock is the fastest way to get you a working Spark cluster. 182 | 183 | ### Performance testing 184 | 185 | Flintrock exposes many options of its underlying providers (e.g. EBS-optimized volumes on EC2) which makes it easy to create a cluster with predictable performance for [Spark performance testing](https://github.com/databricks/spark-perf). 186 | 187 | ### Automated pipelines 188 | 189 | Most people will use Flintrock interactively from the command line, but Flintrock is also designed to be used as part of an automated pipeline. Flintrock's exit codes are carefully chosen; it offers options to disable interactive prompts; and when appropriate it prints output in YAML, which is both human- and machine-friendly. 190 | 191 | 192 | ## Anti-Use Cases 193 | 194 | There are some things that Flintrock specifically *does not* support. 195 | 196 | ### Managing permanent infrastructure 197 | 198 | Flintrock is not for managing long-lived clusters, or any infrastructure that serves as a permanent part of some environment. 199 | 200 | For starters, Flintrock provides no guarantee that clusters launched with one version of Flintrock can be managed by another version of Flintrock, and no considerations are made for any long-term use cases. 201 | 202 | If you are looking for ways to manage permanent infrastructure, look at tools like [Terraform](https://www.terraform.io/), [Ansible](http://www.ansible.com/), or [Ubuntu Juju](http://www.ubuntu.com/cloud/tools/juju). You might also find a service like [Databricks](https://databricks.com/product/databricks) useful if you're looking for someone else to host and manage Spark for you. Amazon also offers [Spark on EMR](https://aws.amazon.com/elasticmapreduce/details/spark/). 203 | 204 | ### Launching non-Spark-related services 205 | 206 | Flintrock is meant for launching Spark clusters that include closely related services like HDFS. 207 | 208 | Flintrock is not for launching external datasources (e.g. Cassandra), or other services that are not closely integrated with Spark (e.g. Tez). 209 | 210 | If you are looking for an easy way to launch other services from the Hadoop ecosystem, look at the [Apache Bigtop](http://bigtop.apache.org/) project. 211 | 212 | ### Launching out-of-date services 213 | 214 | Flintrock will always take advantage of new features of Spark and related services to make the process of launching a cluster faster, simpler, and easier to maintain. If that means dropping support for launching older versions of a service, then we will generally make that tradeoff. 215 | 216 | 217 | ## Features 218 | 219 | ### Polished CLI 220 | 221 | Flintrock has a clean command-line interface. 222 | 223 | ```sh 224 | flintrock --help 225 | flintrock describe 226 | flintrock destroy --help 227 | flintrock launch test-cluster --num-slaves 10 228 | ``` 229 | 230 | ### Configurable CLI Defaults 231 | 232 | Flintrock lets you persist your desired configuration to a YAML file so that you don't have to keep typing out the same options over and over at the command line. 233 | 234 | To setup and edit the default config file, run this: 235 | 236 | ```sh 237 | flintrock configure 238 | ``` 239 | 240 | You can also point Flintrock to a non-default config file by using the `--config` option. 241 | 242 | #### Sample `config.yaml` 243 | 244 | ```yaml 245 | provider: ec2 246 | 247 | services: 248 | spark: 249 | version: 3.5.0 250 | 251 | launch: 252 | num-slaves: 1 253 | 254 | providers: 255 | ec2: 256 | key-name: key_name 257 | identity-file: /path/to/.ssh/key.pem 258 | instance-type: m5.large 259 | region: us-east-1 260 | ami: ami-0588935a949f9ff17 261 | user: ec2-user 262 | ``` 263 | 264 | With a config file like that, you can now launch a cluster with just this: 265 | 266 | ```sh 267 | flintrock launch test-cluster 268 | ``` 269 | 270 | And if you want, you can even override individual options in your config file at the command line: 271 | 272 | ```sh 273 | flintrock launch test-cluster \ 274 | --num-slaves 10 \ 275 | --ec2-instance-type r5.xlarge 276 | ``` 277 | 278 | ### Fast Launches 279 | 280 | Flintrock is really fast. It can launch a 100-node cluster in about three minutes (give or take a few seconds due to AWS's normal performance variability). 281 | 282 | ### Advanced Storage Setup 283 | 284 | Flintrock automatically configures any available [ephemeral storage](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html) on the cluster and makes it available to installed services like HDFS and Spark. This storage is fast and is perfect for use as a temporary store by those services. 285 | 286 | ### Tests 287 | 288 | Flintrock comes with a set of automated, end-to-end [tests](https://github.com/nchammas/flintrock/tree/master/tests). These tests help us develop Flintrock with confidence and guarantee a certain level of quality. 289 | 290 | ### Low-level Provider Options 291 | 292 | Flintrock exposes low-level provider options (e.g. [instance-initiated shutdown behavior](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/terminating-instances.html#Using_ChangingInstanceInitiatedShutdownBehavior)) so you can control the details of how your cluster is setup if you want. 293 | 294 | ### No Custom Machine Image Dependencies 295 | 296 | Flintrock is built and tested against vanilla Amazon Linux and CentOS. You can easily launch Flintrock clusters using your own custom machine images built from either of those distributions. 297 | 298 | 299 | ## Anti-Features 300 | 301 | ### Support for out-of-date versions of Python, EC2 APIs, etc. 302 | 303 | Supporting multiple versions of anything is tough. There's more surface area to cover for testing, and over the long term the maintenance burden of supporting something non-current with bug fixes and workarounds really adds up. 304 | 305 | There are projects that support stuff across a wide cut of language or API versions. For example, Spark supports multiple versions of Java, Scala, R, and Python. The people behind these projects are gods. They take on an immense maintenance burden for the benefit and convenience of their users. 306 | 307 | We here at project Flintrock are much more modest in our abilities. We are best able to serve the project over the long term when we limit ourselves to supporting a small but widely applicable set of configurations. 308 | 309 | 310 | ## Motivation 311 | 312 | *Note: The explanation here is provided from the perspective of Flintrock's original author, Nicholas Chammas.* 313 | 314 | I got started with Spark by using [spark-ec2](https://github.com/amplab/spark-ec2). It's one of the biggest reasons I found Spark so accessible. I didn't need to spend time upfront working through some setup guide before I could work on a "real" problem. Instead, with a simple spark-ec2 command I was able to launch a large, working cluster and get straight to business. 315 | 316 | As I became a heavy user of spark-ec2, several limitations stood out and became an increasing pain. They provided me with the motivation for this project. 317 | 318 | Among those limitations, the most frustrating ones were: 319 | 320 | * **Slow launches**: spark-ec2 cluster launch times increase linearly with the number of slaves being created. For example, it takes spark-ec2 **[over an hour](https://issues.apache.org/jira/browse/SPARK-5189)** to launch a cluster with 100 slaves. ([SPARK-4325](https://issues.apache.org/jira/browse/SPARK-4325), [SPARK-5189](https://issues.apache.org/jira/browse/SPARK-5189)) 321 | * **No support for configuration files**: spark-ec2 does not support reading options from a config file, so users are always forced to type them in at the command line. ([SPARK-925](https://issues.apache.org/jira/browse/SPARK-925)) 322 | * **Un-resizable clusters**: Adding or removing slaves from an existing spark-ec2 cluster is not possible. ([SPARK-2008](https://issues.apache.org/jira/browse/SPARK-2008)) 323 | * **Custom machine images**: spark-ec2 uses custom machine images, making it difficult for users to bring their own image. And since the process of updating those machine images is not automated, they have not been updated in years. ([SPARK-3821](https://issues.apache.org/jira/browse/SPARK-3821)) 324 | 325 | I built Flintrock to address all of these shortcomings, which it does. 326 | 327 | ### Why build Flintrock when we have EMR? 328 | 329 | I started work on Flintrock months before [EMR added support for Spark](https://aws.amazon.com/blogs/aws/new-apache-spark-on-amazon-emr/). It's likely that, had I considered building Flintrock a year later than I did, I would have decided against it. 330 | 331 | Now that Flintrock exists, many users appreciate the lower cost of running Flintrock clusters as compared to EMR, as well as Flintrock's simpler interface. And for my part, I enjoy working on Flintrock in my free time. 332 | 333 | ### Why didn't you build Flintrock on top of an orchestration tool? 334 | 335 | People have asked me whether I considered building Flintrock on top of Ansible, Terraform, Docker, or something else. I looked into some of these things back when Flintrock was just an idea in my head and decided against using any of them for two basic reasons: 336 | 337 | 1. **Fun**: I didn't have any experience with these tools, and it looked both simple enough and more fun to build something "from scratch". 338 | 2. **Focus**: I wanted a single-purpose tool with a very limited focus, not a module or set of scripts that were part of a sprawling framework that did a lot of different things. 339 | 340 | These are not necessarily the right reasons to build "from scratch", but they were my reasons. If you are already comfortable with any of the popular orchestration tools out there, you may find it more attractive to use them rather than add a new standalone tool to your toolchain. 341 | 342 | 343 | ## About the Flintrock Logo 344 | 345 | The [Flintrock logo](https://github.com/nchammas/flintrock/blob/master/flintrock-logo.png) was created using [Highbrow Cafetorium JNL](http://www.myfonts.com/fonts/jnlevine/highbrow-cafetorium/) and [this icon](https://thenounproject.com/term/stars/40856/). Licenses to use both the font and icon were purchased from their respective owners. 346 | -------------------------------------------------------------------------------- /flintrock-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nchammas/flintrock/7056d20a7f2dee78dd8c89c19d538458cc13288a/flintrock-logo.png -------------------------------------------------------------------------------- /flintrock/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '2.2.0.dev0' 2 | -------------------------------------------------------------------------------- /flintrock/__main__.py: -------------------------------------------------------------------------------- 1 | # See: https://docs.python.org/3/library/__main__.html 2 | import sys 3 | 4 | # Flintrock modules 5 | from .flintrock import main 6 | 7 | if __name__ == '__main__': 8 | sys.exit(main()) 9 | -------------------------------------------------------------------------------- /flintrock/config.yaml.template: -------------------------------------------------------------------------------- 1 | services: 2 | spark: 3 | version: 3.5.0 4 | # git-commit: latest # if not 'latest', provide a full commit SHA; e.g. d6dc12ef0146ae409834c78737c116050961f350 5 | # git-repository: # optional; defaults to https://github.com/apache/spark 6 | # optional; defaults to download from a dynamically selected Apache mirror 7 | # - can be http, https, or s3 URL 8 | # - must contain a {v} template corresponding to the version 9 | # - Spark must be pre-built 10 | # - files must be named according to the release pattern shown here: https://dist.apache.org/repos/dist/release/spark/ 11 | # download-source: "https://www.example.com/files/spark/{v}/" 12 | # download-source: "s3://some-bucket/spark/{v}/" 13 | # executor-instances: 1 14 | hdfs: 15 | version: 3.3.6 16 | # optional; defaults to download from a dynamically selected Apache mirror 17 | # - can be http, https, or s3 URL 18 | # - must contain a {v} template corresponding to the version 19 | # - files must be named according to the release pattern shown here: https://dist.apache.org/repos/dist/release/hadoop/common/ 20 | # download-source: "https://www.example.com/files/hadoop/{v}/" 21 | # download-source: "http://www-us.apache.org/dist/hadoop/common/hadoop-{v}/" 22 | # download-source: "s3://some-bucket/hadoop/{v}/" 23 | 24 | provider: ec2 25 | 26 | providers: 27 | ec2: 28 | key-name: key_name 29 | identity-file: /path/to/key.pem 30 | instance-type: m5.large 31 | region: us-east-1 32 | # availability-zone: 33 | ami: ami-0588935a949f9ff17 # Amazon Linux 2, us-east-1 34 | user: ec2-user 35 | # ami: ami-61bbf104 # CentOS 7, us-east-1 36 | # user: centos 37 | # spot-price: 38 | # vpc-id: 39 | # subnet-id: 40 | # placement-group: 41 | # security-groups: 42 | # - group-name1 43 | # - group-name2 44 | # instance-profile-name: 45 | # tags: 46 | # - key1,value1 47 | # - key2, value2 # leading/trailing spaces are trimmed 48 | # - key3, # value will be empty 49 | # min-root-ebs-size-gb: 50 | tenancy: default # default | dedicated 51 | ebs-optimized: no # yes | no 52 | instance-initiated-shutdown-behavior: terminate # terminate | stop 53 | # user-data: /path/to/userdata/script 54 | # authorize-access-from: 55 | # - 10.0.0.42/32 56 | # - sg-xyz4654564xyz 57 | 58 | launch: 59 | num-slaves: 1 60 | # install-hdfs: True 61 | # install-spark: False 62 | # java-version: 8 63 | 64 | debug: false 65 | -------------------------------------------------------------------------------- /flintrock/core.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | import functools 3 | import json 4 | import os 5 | import posixpath 6 | import shlex 7 | import sys 8 | import logging 9 | from concurrent.futures import FIRST_EXCEPTION 10 | 11 | # External modules 12 | import paramiko 13 | 14 | # Flintrock modules 15 | from .ssh import get_ssh_client, ssh_check_output, ssh, SSHKeyPair 16 | from .exceptions import SSHError 17 | 18 | FROZEN = getattr(sys, 'frozen', False) 19 | 20 | if FROZEN: 21 | THIS_DIR = sys._MEIPASS 22 | else: 23 | THIS_DIR = os.path.dirname(os.path.realpath(__file__)) 24 | 25 | SCRIPTS_DIR = os.path.join(THIS_DIR, 'scripts') 26 | 27 | 28 | logger = logging.getLogger('flintrock.core') 29 | 30 | 31 | class StorageDirs: 32 | def __init__(self, *, root, ephemeral, persistent): 33 | self.root = root 34 | self.ephemeral = ephemeral 35 | self.persistent = persistent 36 | 37 | 38 | # TODO: Implement concept of ClusterNode. (?) That way we can 39 | # define a cluster as having several nodes, and implement 40 | # actions as `for node in nodes: node.action()`. 41 | # NOTE: We take both IP addresses and host names because we 42 | # don't understand why Spark doesn't accept IP addresses 43 | # in its config, yet we prefer IP addresses when 44 | # connecting to hosts to avoid single-threaded DNS lookups. 45 | # See: https://github.com/nchammas/flintrock/issues/43 46 | # See: http://www.dalkescientific.com/writings/diary/archive/2012/01/19/concurrent.futures.html 47 | class FlintrockCluster: 48 | def __init__( 49 | self, 50 | *, 51 | name, 52 | ssh_key_pair=None, 53 | storage_dirs=StorageDirs(root=None, ephemeral=None, persistent=None), 54 | ): 55 | self.name = name 56 | self.ssh_key_pair = ssh_key_pair 57 | self.storage_dirs = storage_dirs 58 | self.java_version = None 59 | self.services = [] 60 | 61 | @property 62 | def master_ip(self) -> str: 63 | """ 64 | The IP address of the master. 65 | 66 | Providers must override this property since it is typically derived from 67 | an underlying object, like an EC2 instance. 68 | """ 69 | raise NotImplementedError 70 | 71 | @property 72 | def master_host(self) -> str: 73 | """ 74 | The hostname of the master. 75 | 76 | Providers must override this property since it is typically derived from 77 | an underlying object, like an EC2 instance. 78 | """ 79 | raise NotImplementedError 80 | 81 | @property 82 | def private_network(self) -> bool: 83 | """ 84 | Indicate if this cluster runs on a private network. 85 | 86 | Providers must override this property since it is typically derived from 87 | an underlying object, like the VPC subnet of an EC2 Instance. 88 | """ 89 | raise NotImplementedError 90 | 91 | @property 92 | def slave_ips(self) -> 'List[str]': 93 | """ 94 | A list of the IP addresses of the slaves. 95 | 96 | Providers must override this property since it is typically derived from 97 | an underlying object, like an EC2 instance. 98 | """ 99 | raise NotImplementedError 100 | 101 | @property 102 | def slave_hosts(self) -> 'List[str]': 103 | """ 104 | A list of the hostnames of the slaves. 105 | 106 | Providers must override this property since it is typically derived from 107 | an underlying object, like an EC2 instance. 108 | """ 109 | raise NotImplementedError 110 | 111 | @property 112 | def num_masters(self) -> int: 113 | """ 114 | How many masters the cluster has. 115 | 116 | This normally just equals 1, but in cases where the cluster master 117 | has been destroyed this should return 0. 118 | 119 | Providers must override this property. 120 | """ 121 | raise NotImplementedError 122 | 123 | @property 124 | def num_slaves(self) -> int: 125 | """ 126 | How many slaves the cluster has. 127 | 128 | This is typically just len(self.slave_ips), but we need a separate 129 | property because slave IPs are not available when the cluster is 130 | stopped, and sometimes in that situation we still want to know how 131 | many slaves there are. 132 | 133 | Providers must override this property. 134 | """ 135 | raise NotImplementedError 136 | 137 | def load_manifest(self, *, user: str, identity_file: str): 138 | """ 139 | Load a cluster's manifest from the master. This will populate information 140 | about installed services and configured storage. 141 | 142 | Providers shouldn't need to override this method. 143 | """ 144 | if not self.master_ip: 145 | return 146 | 147 | master_ssh_client = get_ssh_client( 148 | user=user, 149 | host=self.master_ip, 150 | identity_file=identity_file, 151 | wait=True, 152 | print_status=False) 153 | 154 | with master_ssh_client: 155 | manifest_raw = ssh_check_output( 156 | client=master_ssh_client, 157 | command=""" 158 | cat "$HOME/.flintrock-manifest.json" 159 | """) 160 | # TODO: Would it be better if storage (ephemeral and otherwise) was 161 | # implemented as a Flintrock service and tracked in the manifest? 162 | ephemeral_dirs_raw = ssh_check_output( 163 | client=master_ssh_client, 164 | # It's generally safer to avoid using ls: 165 | # http://mywiki.wooledge.org/ParsingLs 166 | command=""" 167 | shopt -s nullglob 168 | for f in /media/ephemeral*; do 169 | echo "$f" 170 | done 171 | """) 172 | 173 | manifest = json.loads(manifest_raw) 174 | 175 | self.ssh_key_pair = SSHKeyPair( 176 | public=manifest['ssh_key_pair']['public'], 177 | private=manifest['ssh_key_pair']['private'], 178 | ) 179 | 180 | self.java_version = manifest['java_version'] 181 | 182 | services = [] 183 | for [service_name, manifest] in manifest['services']: 184 | # TODO: Expose the classes being used here. 185 | service = globals()[service_name](**manifest) 186 | services.append(service) 187 | self.services = services 188 | 189 | storage_dirs = StorageDirs( 190 | root='/media/root', 191 | ephemeral=sorted(ephemeral_dirs_raw.splitlines()), 192 | persistent=None) 193 | self.storage_dirs = storage_dirs 194 | 195 | def destroy_check(self): 196 | """ 197 | Check that the cluster is in a state in which it can be destroyed. 198 | 199 | Providers should override this method since we have no way to perform 200 | this check in a provider-agnostic way. 201 | """ 202 | pass 203 | 204 | def destroy(self): 205 | """ 206 | Destroy the cluster and any resources created specifically to support 207 | it. 208 | 209 | Providers should override this method since we have no way to destroy a 210 | cluster in a provider-agnostic way. 211 | 212 | Nonetheless, this method should be called before the underlying provider 213 | destroys the nodes. That way, if we ever add cleanup logic here to destroy 214 | resources external to the cluster it will get executed correctly. 215 | """ 216 | pass 217 | 218 | def start_check(self): 219 | """ 220 | Check that the cluster is in a state in which it can be started. 221 | 222 | The interface can use this method to decide whether it needs to prompt 223 | the user for confirmation. If the cluster cannot be started (e.g. 224 | because it's already running) then we don't want to show a prompt. 225 | 226 | Providers should override this method since we have no way to perform 227 | this check in a provider-agnostic way. 228 | """ 229 | pass 230 | 231 | def start(self, *, user: str, identity_file: str): 232 | """ 233 | Start up all the services installed on the cluster. 234 | 235 | This method assumes that the nodes constituting cluster were just 236 | started up by the provider (e.g. EC2, GCE, etc.) they're hosted on 237 | and are running. 238 | """ 239 | self.load_manifest(user=user, identity_file=identity_file) 240 | 241 | partial_func = functools.partial( 242 | start_node, 243 | services=self.services, 244 | user=user, 245 | identity_file=identity_file, 246 | cluster=self) 247 | hosts = [self.master_ip] + self.slave_ips 248 | 249 | run_against_hosts(partial_func=partial_func, hosts=hosts) 250 | 251 | master_ssh_client = get_ssh_client( 252 | user=user, 253 | host=self.master_ip, 254 | identity_file=identity_file) 255 | 256 | with master_ssh_client: 257 | for service in self.services: 258 | service.configure_master( 259 | ssh_client=master_ssh_client, 260 | cluster=self) 261 | 262 | for service in self.services: 263 | service.health_check(master_host=self.master_ip) 264 | 265 | def stop_check(self): 266 | """ 267 | Check that the cluster is in a state in which it can be stopped. 268 | 269 | Providers should override this method since we have no way to perform 270 | this check in a provider-agnostic way. 271 | """ 272 | pass 273 | 274 | def stop(self): 275 | """ 276 | Prepare the cluster to be stopped by the underlying provider. 277 | 278 | There's currently nothing to do here, but this method should be called 279 | before the underlying provider stops the nodes. 280 | """ 281 | pass 282 | 283 | def add_slaves_check(self): 284 | pass 285 | 286 | def add_slaves(self, *, user: str, identity_file: str, new_hosts: list): 287 | """ 288 | Add new slaves to the cluster. 289 | 290 | Providers should implement this with the following signature: 291 | 292 | add_slaves(self, *, user: str, identity_file: str, num_slaves: int, **provider_specific_options) 293 | 294 | This method should be called after the new hosts are online and have been 295 | added to the cluster's internal list. 296 | """ 297 | hosts = [self.master_ip] + self.slave_ips 298 | partial_func = functools.partial( 299 | add_slaves_node, 300 | java_version=self.java_version, 301 | services=self.services, 302 | user=user, 303 | identity_file=identity_file, 304 | cluster=self, 305 | new_hosts=new_hosts) 306 | run_against_hosts(partial_func=partial_func, hosts=hosts) 307 | 308 | master_ssh_client = get_ssh_client( 309 | user=user, 310 | host=self.master_ip, 311 | identity_file=identity_file) 312 | with master_ssh_client: 313 | for service in self.services: 314 | service.configure_master( 315 | ssh_client=master_ssh_client, 316 | cluster=self) 317 | 318 | def remove_slaves(self, *, user: str, identity_file: str): 319 | """ 320 | Remove some slaves from the cluster. 321 | 322 | Providers should implement this method with the following signature: 323 | 324 | remove_slaves(self, *, user: str, identity_file: str, num_slaves: int) 325 | 326 | This method should be called after the provider has removed the slaves 327 | from the cluster's internal list but before the instances themselves 328 | have been terminated. 329 | 330 | This method simply makes sure that the rest of the cluster knows that 331 | the relevant slaves are no longer part of the cluster. 332 | """ 333 | self.load_manifest(user=user, identity_file=identity_file) 334 | 335 | partial_func = functools.partial( 336 | remove_slaves_node, 337 | user=user, 338 | identity_file=identity_file, 339 | services=self.services, 340 | cluster=self) 341 | hosts = [self.master_ip] + self.slave_ips 342 | 343 | run_against_hosts(partial_func=partial_func, hosts=hosts) 344 | 345 | def run_command_check(self): 346 | """ 347 | Check that the cluster is in a state that supports running commands. 348 | 349 | Providers should override this method since we have no way to perform 350 | this check in a provider-agnostic way. 351 | """ 352 | pass 353 | 354 | def run_command( 355 | self, 356 | *, 357 | master_only: bool, 358 | user: str, 359 | identity_file: str, 360 | command: tuple): 361 | """ 362 | Run a shell command on each node of an existing cluster. 363 | 364 | If master_only is True, then run the comand on the master only. 365 | """ 366 | if master_only: 367 | target_hosts = [self.master_ip] 368 | else: 369 | target_hosts = [self.master_ip] + self.slave_ips 370 | 371 | partial_func = functools.partial( 372 | run_command_node, 373 | user=user, 374 | identity_file=identity_file, 375 | command=command) 376 | hosts = target_hosts 377 | 378 | run_against_hosts(partial_func=partial_func, hosts=hosts) 379 | 380 | def copy_file_check(self): 381 | """ 382 | Check that the cluster is in a state in which files can be copied to 383 | it. 384 | 385 | Providers should override this method since we have no way to perform 386 | this check in a provider-agnostic way. 387 | """ 388 | pass 389 | 390 | def copy_file( 391 | self, 392 | *, 393 | master_only: bool, 394 | user: str, 395 | identity_file: str, 396 | local_path: str, 397 | remote_path: str): 398 | """ 399 | Copy a file to each node of an existing cluster. 400 | 401 | If master_only is True, then copy the file to the master only. 402 | """ 403 | if master_only: 404 | target_hosts = [self.master_ip] 405 | else: 406 | target_hosts = [self.master_ip] + self.slave_ips 407 | 408 | partial_func = functools.partial( 409 | copy_file_node, 410 | user=user, 411 | identity_file=identity_file, 412 | local_path=local_path, 413 | remote_path=remote_path) 414 | hosts = target_hosts 415 | 416 | run_against_hosts(partial_func=partial_func, hosts=hosts) 417 | 418 | def login( 419 | self, 420 | *, 421 | user: str, 422 | identity_file: str): 423 | """ 424 | Interactively SSH into the cluster master. 425 | """ 426 | ssh( 427 | host=self.master_ip, 428 | user=user, 429 | identity_file=identity_file) 430 | 431 | 432 | def generate_template_mapping( 433 | *, 434 | cluster: FlintrockCluster, 435 | # If we add additional services later on we may want to refactor 436 | # this to take a list of services and dynamically pull the service 437 | # name. 438 | spark_executor_instances: int, 439 | hadoop_version: str, 440 | spark_version: str 441 | ) -> dict: 442 | """ 443 | Generate a template mapping from a FlintrockCluster instance that we can use 444 | to fill in template parameters. 445 | """ 446 | hadoop_root_dir = posixpath.join(cluster.storage_dirs.root, 'hadoop') 447 | hadoop_ephemeral_dirs = ','.join( 448 | posixpath.join(path, 'hadoop') 449 | for path in cluster.storage_dirs.ephemeral 450 | ) 451 | spark_root_dir = posixpath.join(cluster.storage_dirs.root, 'spark') 452 | spark_ephemeral_dirs = ','.join( 453 | posixpath.join(path, 'spark') 454 | for path in cluster.storage_dirs.ephemeral 455 | ) 456 | 457 | template_mapping = { 458 | 'master_ip': cluster.master_ip, 459 | 'master_host': cluster.master_host, 460 | 'master_private_host': cluster.master_private_host, 461 | 'slave_ips': '\n'.join(cluster.slave_ips), 462 | 'slave_hosts': '\n'.join(cluster.slave_hosts), 463 | 'slave_private_hosts': '\n'.join(cluster.slave_private_hosts), 464 | 465 | 'hadoop_version': hadoop_version, 466 | 'hadoop_short_version': '.'.join(hadoop_version.split('.')[:2]), 467 | 'spark_version': spark_version, 468 | 'spark_short_version': '.'.join(spark_version.split('.')[:2]) if '.' in spark_version else spark_version, 469 | 470 | 'spark_executor_instances': spark_executor_instances, 471 | 472 | 'hadoop_root_dir': hadoop_root_dir, 473 | 'hadoop_ephemeral_dirs': hadoop_ephemeral_dirs, 474 | 'spark_root_dir': spark_root_dir, 475 | 'spark_ephemeral_dirs': spark_ephemeral_dirs, 476 | 477 | # If ephemeral storage is available, it replaces the root volume, which is 478 | # typically persistent. We don't want to mix persistent and ephemeral 479 | # storage since that causes problems after cluster stop/start; some volumes 480 | # have leftover data, whereas others start fresh. 481 | 'hadoop_root_ephemeral_dirs': hadoop_ephemeral_dirs if hadoop_ephemeral_dirs else hadoop_root_dir, 482 | 'spark_root_ephemeral_dirs': spark_ephemeral_dirs if spark_ephemeral_dirs else spark_root_dir, 483 | } 484 | 485 | return template_mapping 486 | 487 | 488 | # TODO: Cache these files. (?) They are being read potentially tens or 489 | # hundreds of times. Maybe it doesn't matter because the files 490 | # are so small. 491 | def get_formatted_template(*, path: str, mapping: dict) -> str: 492 | with open(path) as f: 493 | formatted = f.read().format(**mapping) 494 | return formatted 495 | 496 | 497 | def run_against_hosts(*, partial_func: functools.partial, hosts: list): 498 | """ 499 | Run a function asynchronously against each of the provided hosts. 500 | 501 | This function assumes that partial_func accepts `host` as a keyword argument. 502 | """ 503 | with concurrent.futures.ThreadPoolExecutor(len(hosts)) as executor: 504 | futures = { 505 | executor.submit(functools.partial(partial_func, host=host)) 506 | for host in hosts 507 | } 508 | concurrent.futures.wait(futures, return_when=FIRST_EXCEPTION) 509 | for future in futures: 510 | future.result() 511 | 512 | 513 | def get_installed_java_version(client: paramiko.client.SSHClient): 514 | """ 515 | :return: the major version (5,6,7,8...) of the currently installed Java or None if not installed 516 | """ 517 | possible_cmds = [ 518 | "$JAVA_HOME/bin/java -version", 519 | "java -version" 520 | ] 521 | 522 | for command in possible_cmds: 523 | try: 524 | output = ssh_check_output( 525 | client=client, 526 | command=command) 527 | tokens = output.split() 528 | # First line of the output is like: 'openjdk version "1.8.0_252"' or 'openjdk version "11.0.7" 2020-04-14' 529 | # Get the version string and strip out the first two parts of the 530 | # version as an int: 7, 8, 9, 10... 531 | if len(tokens) >= 3: 532 | version_parts = tokens[2].strip('"').split(".") 533 | if len(version_parts) >= 2: 534 | if version_parts[0] == "1": 535 | # Java 6, 7 or 8 536 | return int(version_parts[1]) 537 | else: 538 | # Java 9+ 539 | return int(version_parts[0]) 540 | except SSHError: 541 | pass 542 | 543 | return None 544 | 545 | 546 | def ensure_java(client: paramiko.client.SSHClient, java_version: int): 547 | """ 548 | Ensures that Java is available on the machine and that it has a 549 | version of at least java_version. 550 | 551 | The specified version of Java will be installed if it does not 552 | exist or the existing version has a major version lower than java_version. 553 | 554 | :param client: 555 | :param java_version: 556 | minimum version of Java required 557 | :return: 558 | """ 559 | host = client.get_transport().getpeername()[0] 560 | installed_java_version = get_installed_java_version(client) 561 | 562 | if installed_java_version == java_version: 563 | logger.info("Java {j} is already installed, skipping Java install".format(j=installed_java_version)) 564 | return 565 | 566 | if installed_java_version and installed_java_version > java_version: 567 | logger.warning(""" 568 | Existing Java {j} installation is newer than the configured version {java_version}. 569 | Your applications will be executed with Java {j}. 570 | Please choose a different AMI if this does not work for you. 571 | """.format(j=installed_java_version, java_version=java_version)) 572 | return 573 | 574 | if installed_java_version and installed_java_version < java_version: 575 | logger.info(""" 576 | Existing Java {j} will be upgraded to Adoptium OpenJDK {java_version} 577 | """.format(j=installed_java_version, java_version=java_version)) 578 | 579 | # We will install Adoptium OpenJDK because it gives us access to Java 8 through 15 580 | # Right now, Amazon Extras only provides Corretto Java 8, 11 and 15 581 | logger.info("[{h}] Installing Adoptium OpenJDK Java {j}...".format(h=host, j=java_version)) 582 | 583 | install_adoptium_repo(client) 584 | java_package = "temurin-{j}-jdk".format(j=java_version) 585 | ssh_check_output( 586 | client=client, 587 | command=""" 588 | set -e 589 | 590 | # Install Java first to protect packages that depend on Java from being removed. 591 | sudo yum install -q -y {jp} 592 | 593 | # Remove any older versions of Java to force the default Java to the requested version. 594 | # We don't use /etc/alternatives because it does not seem to update links in /usr/lib/jvm correctly, 595 | # and we don't just rely on JAVA_HOME because some programs use java directly in the PATH. 596 | sudo yum remove -y java-1.6.0-openjdk java-1.7.0-openjdk 597 | 598 | sudo sh -c "echo export JAVA_HOME=/usr/lib/jvm/{jp} >> /etc/environment" 599 | source /etc/environment 600 | """.format(jp=java_package)) 601 | 602 | 603 | def install_adoptium_repo(client): 604 | """ 605 | Installs the adoptium.repo file into /etc/yum.repos.d/ 606 | """ 607 | with client.open_sftp() as sftp: 608 | sftp.put( 609 | localpath=os.path.join(SCRIPTS_DIR, 'adoptium.repo'), 610 | remotepath='/tmp/adoptium.repo') 611 | ssh_check_output( 612 | client=client, 613 | command=""" 614 | # Use sudo to install the repo file 615 | sudo mv /tmp/adoptium.repo /etc/yum.repos.d/ 616 | """ 617 | ) 618 | 619 | 620 | def setup_node( 621 | *, 622 | # Change this to take host, user, and identity_file? 623 | # Add some kind of caching for SSH connections so that they 624 | # can be looked up by host and reused? 625 | ssh_client: paramiko.client.SSHClient, 626 | services: list, 627 | java_version: int, 628 | cluster: FlintrockCluster): 629 | """ 630 | Setup a new node. 631 | 632 | Cluster methods like provision_node() and add_slaves_node() should 633 | delegate the main work of setting up new nodes to this function. 634 | """ 635 | host = ssh_client.get_transport().getpeername()[0] 636 | ssh_check_output( 637 | client=ssh_client, 638 | command=""" 639 | set -e 640 | 641 | echo {private_key} > "$HOME/.ssh/id_rsa" 642 | echo {public_key} >> "$HOME/.ssh/authorized_keys" 643 | 644 | chmod 400 "$HOME/.ssh/id_rsa" 645 | """.format( 646 | private_key=shlex.quote(cluster.ssh_key_pair.private), 647 | public_key=shlex.quote(cluster.ssh_key_pair.public))) 648 | 649 | with ssh_client.open_sftp() as sftp: 650 | sftp.put( 651 | localpath=os.path.join(SCRIPTS_DIR, 'setup-ephemeral-storage.py'), 652 | remotepath='/tmp/setup-ephemeral-storage.py') 653 | 654 | logger.info("[{h}] Configuring ephemeral storage...".format(h=host)) 655 | # TODO: Print some kind of warning if storage is large, since formatting 656 | # will take several minutes (~4 minutes for 2TB). 657 | storage_dirs_raw = ssh_check_output( 658 | client=ssh_client, 659 | command=""" 660 | set -e 661 | python /tmp/setup-ephemeral-storage.py 662 | rm -f /tmp/setup-ephemeral-storage.py 663 | """) 664 | storage_dirs = json.loads(storage_dirs_raw) 665 | 666 | cluster.storage_dirs.root = storage_dirs['root'] 667 | cluster.storage_dirs.ephemeral = storage_dirs['ephemeral'] 668 | 669 | # TODO: Move Python and Java setup to new service under services.py. 670 | # New service to cover Python/Scala/Java: LanguageRuntimes (name?) 671 | ssh_check_output( 672 | client=ssh_client, 673 | command=( 674 | """ 675 | set -e 676 | sudo yum install -y python3 677 | """ 678 | ) 679 | ) 680 | ensure_java(ssh_client, java_version) 681 | 682 | for service in services: 683 | try: 684 | service.install( 685 | ssh_client=ssh_client, 686 | cluster=cluster, 687 | ) 688 | except Exception as e: 689 | raise Exception( 690 | "Failed to install {}." 691 | .format(type(service).__name__) 692 | ) from e 693 | 694 | 695 | def provision_cluster( 696 | *, 697 | cluster: FlintrockCluster, 698 | java_version: int, 699 | services: list, 700 | user: str, 701 | identity_file: str): 702 | """ 703 | Connect to a freshly launched cluster and install the specified services. 704 | """ 705 | partial_func = functools.partial( 706 | provision_node, 707 | java_version=java_version, 708 | services=services, 709 | user=user, 710 | identity_file=identity_file, 711 | cluster=cluster) 712 | hosts = [cluster.master_ip] + cluster.slave_ips 713 | 714 | run_against_hosts(partial_func=partial_func, hosts=hosts) 715 | 716 | master_ssh_client = get_ssh_client( 717 | user=user, 718 | host=cluster.master_ip, 719 | identity_file=identity_file) 720 | 721 | with master_ssh_client: 722 | manifest = { 723 | 'java_version': java_version, 724 | 'services': [[type(m).__name__, m.manifest] for m in services], 725 | 'ssh_key_pair': cluster.ssh_key_pair._asdict(), 726 | } 727 | # The manifest tells us how the cluster is configured. We'll need this 728 | # when we resize the cluster or restart it. 729 | ssh_check_output( 730 | client=master_ssh_client, 731 | command=""" 732 | echo {m} > "$HOME/.flintrock-manifest.json" 733 | chmod go-rw "$HOME/.flintrock-manifest.json" 734 | """.format( 735 | m=shlex.quote(json.dumps(manifest, indent=4, sort_keys=True)) 736 | )) 737 | 738 | for service in services: 739 | service.configure_master( 740 | ssh_client=master_ssh_client, 741 | cluster=cluster) 742 | 743 | for service in services: 744 | service.health_check(master_host=cluster.master_ip) 745 | 746 | 747 | def provision_node( 748 | *, 749 | java_version: int, 750 | services: list, 751 | user: str, 752 | host: str, 753 | identity_file: str, 754 | cluster: FlintrockCluster): 755 | """ 756 | Connect to a freshly launched node, set it up for SSH access, configure ephemeral 757 | storage, and install the specified services. 758 | 759 | This method is role-agnostic; it runs on both the cluster master and slaves. 760 | This method is meant to be called asynchronously. 761 | """ 762 | client = get_ssh_client( 763 | user=user, 764 | host=host, 765 | identity_file=identity_file, 766 | wait=True) 767 | 768 | with client: 769 | setup_node( 770 | ssh_client=client, 771 | services=services, 772 | java_version=java_version, 773 | cluster=cluster) 774 | for service in services: 775 | service.configure( 776 | ssh_client=client, 777 | cluster=cluster) 778 | 779 | 780 | def start_node( 781 | *, 782 | services: list, 783 | user: str, 784 | host: str, 785 | identity_file: str, 786 | cluster: FlintrockCluster): 787 | """ 788 | Connect to an existing node that has just been started up again and prepare it for 789 | work. 790 | 791 | This method is role-agnostic; it runs on both the cluster master and slaves. 792 | This method is meant to be called asynchronously. 793 | """ 794 | ssh_client = get_ssh_client( 795 | user=user, 796 | host=host, 797 | identity_file=identity_file, 798 | wait=True) 799 | 800 | with ssh_client: 801 | # TODO: Consider consolidating ephemeral storage code under a dedicated 802 | # Flintrock service. 803 | if cluster.storage_dirs.ephemeral: 804 | ssh_check_output( 805 | client=ssh_client, 806 | command=""" 807 | sudo chown "{u}:{u}" {d} 808 | """.format( 809 | u=user, 810 | d=' '.join(cluster.storage_dirs.ephemeral))) 811 | 812 | for service in services: 813 | service.configure( 814 | ssh_client=ssh_client, 815 | cluster=cluster) 816 | 817 | 818 | def add_slaves_node( 819 | *, 820 | user: str, 821 | host: str, 822 | identity_file: str, 823 | java_version: int, 824 | services: list, 825 | cluster: FlintrockCluster, 826 | new_hosts: list): 827 | """ 828 | If the node is new, set it up. If not, just reconfigure it to recognize 829 | the newly added nodes. 830 | 831 | This method is role-agnostic; it runs on both the cluster master and slaves. 832 | This method is meant to be called asynchronously. 833 | """ 834 | is_new_host = host in new_hosts 835 | 836 | client = get_ssh_client( 837 | user=user, 838 | host=host, 839 | identity_file=identity_file, 840 | wait=is_new_host) 841 | 842 | with client: 843 | if is_new_host: 844 | setup_node( 845 | ssh_client=client, 846 | services=services, 847 | java_version=java_version, 848 | cluster=cluster) 849 | 850 | for service in services: 851 | service.configure( 852 | ssh_client=client, 853 | cluster=cluster) 854 | 855 | 856 | def remove_slaves_node( 857 | *, 858 | user: str, 859 | host: str, 860 | identity_file: str, 861 | services: list, 862 | cluster: FlintrockCluster): 863 | """ 864 | Update the services on a node to remove the provided slaves. 865 | 866 | This method is role-agnostic; it runs on both the cluster master and slaves. 867 | This method is meant to be called asynchronously. 868 | """ 869 | ssh_client = get_ssh_client( 870 | user=user, 871 | host=host, 872 | identity_file=identity_file) 873 | 874 | for service in services: 875 | service.configure( 876 | ssh_client=ssh_client, 877 | cluster=cluster) 878 | 879 | 880 | def run_command_node(*, user: str, host: str, identity_file: str, command: tuple): 881 | """ 882 | Run a shell command on a node. 883 | 884 | This method is role-agnostic; it runs on both the cluster master and slaves. 885 | This method is meant to be called asynchronously. 886 | """ 887 | ssh_client = get_ssh_client( 888 | user=user, 889 | host=host, 890 | identity_file=identity_file) 891 | 892 | logger.info("[{h}] Running command...".format(h=host)) 893 | 894 | command_str = ' '.join(command) 895 | 896 | with ssh_client: 897 | ssh_check_output( 898 | client=ssh_client, 899 | command=command_str) 900 | 901 | logger.info("[{h}] Command complete.".format(h=host)) 902 | 903 | 904 | def copy_file_node( 905 | *, 906 | user: str, 907 | host: str, 908 | identity_file: str, 909 | local_path: str, 910 | remote_path: str): 911 | """ 912 | Copy a file to the specified remote path on a node. 913 | 914 | This method is role-agnostic; it runs on both the cluster master and slaves. 915 | This method is meant to be called asynchronously. 916 | """ 917 | ssh_client = get_ssh_client( 918 | user=user, 919 | host=host, 920 | identity_file=identity_file) 921 | 922 | with ssh_client: 923 | remote_dir = posixpath.dirname(remote_path) 924 | 925 | try: 926 | ssh_check_output( 927 | client=ssh_client, 928 | command=""" 929 | test -d {path} 930 | """.format(path=shlex.quote(remote_dir))) 931 | except Exception as e: 932 | # TODO: Catch more specific exception. 933 | raise Exception("Remote directory does not exist: {d}".format(d=remote_dir)) 934 | 935 | with ssh_client.open_sftp() as sftp: 936 | logger.info("[{h}] Copying file...".format(h=host)) 937 | 938 | sftp.put(localpath=local_path, remotepath=remote_path) 939 | 940 | logger.info("[{h}] Copy complete.".format(h=host)) 941 | 942 | 943 | # This is necessary down here since we have a circular import dependency between 944 | # core.py and services.py. I've thought about how to remove this circular dependency, 945 | # but for now this seems like what we need to go with. 946 | # Flintrock modules 947 | from .services import HDFS, Spark # Used by start_cluster() # noqa 948 | -------------------------------------------------------------------------------- /flintrock/exceptions.py: -------------------------------------------------------------------------------- 1 | class NothingToDo(Exception): 2 | pass 3 | 4 | 5 | class UsageError(Exception): 6 | pass 7 | 8 | 9 | class UnsupportedProviderError(UsageError): 10 | def __init__(self, provider: str): 11 | super().__init__( 12 | "This provider is not supported: {p}".format(p=provider)) 13 | self.provider = provider 14 | 15 | 16 | class Error(Exception): 17 | pass 18 | 19 | 20 | class ClusterNotFound(Error): 21 | pass 22 | 23 | 24 | class ClusterAlreadyExists(Error): 25 | pass 26 | 27 | 28 | class ClusterInvalidState(Error): 29 | def __init__(self, *, attempted_command: str, state: str): 30 | super().__init__( 31 | "Cluster is in state '{s}'. Cannot execute {c}.".format( 32 | c=attempted_command, 33 | s=state)) 34 | self.attempted_command = attempted_command 35 | self.state = state 36 | 37 | 38 | class SSHError(Error): 39 | def __init__(self, *, host: str, message: str): 40 | super().__init__( 41 | "[{h}] {m}".format(h=host, m=message)) 42 | self.host = host 43 | self.message = message 44 | 45 | 46 | class InterruptedEC2Operation(Error): 47 | def __init__(self, *, instances: list): 48 | super().__init__( 49 | "Operation aborted." 50 | ) 51 | self.instances = instances 52 | -------------------------------------------------------------------------------- /flintrock/scripts/adoptium.repo: -------------------------------------------------------------------------------- 1 | # Source: https://adoptium.net/installation/linux/#_centosrhelfedora_instructions 2 | 3 | [Adoptium] 4 | name=Adoptium 5 | baseurl=https://packages.adoptium.net/artifactory/rpm/amazonlinux/$releasever/$basearch 6 | enabled=1 7 | gpgcheck=1 8 | gpgkey=https://packages.adoptium.net/artifactory/api/gpg/key/public 9 | -------------------------------------------------------------------------------- /flintrock/scripts/download-package.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import argparse 4 | import errno 5 | import os.path 6 | import sys 7 | import subprocess 8 | import time 9 | 10 | MAX_TRIES = 5 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('url') 16 | parser.add_argument('destination_dir') 17 | args = parser.parse_args() 18 | return (args.url, args.destination_dir) 19 | 20 | 21 | if __name__ == '__main__': 22 | url, destination_dir = parse_args() 23 | 24 | try: 25 | os.makedirs(destination_dir, mode=0o755) 26 | except OSError as e: 27 | if e.errno == errno.EEXIST: 28 | pass 29 | else: 30 | raise 31 | 32 | download_path = '{}.download'.format(os.path.basename(destination_dir)) 33 | 34 | tries = 0 35 | while True: 36 | try: 37 | if url.startswith('s3://'): 38 | subprocess.check_call(['aws', 's3', 'cp', url, download_path]) 39 | else: 40 | subprocess.check_call(['curl', '--location', '--output', download_path, url]) 41 | subprocess.check_call(['gzip', '--test', download_path]) 42 | subprocess.check_call(['tar', 'xzf', download_path, '-C', destination_dir, '--strip-components=1']) 43 | subprocess.check_call(['rm', download_path]) 44 | except subprocess.CalledProcessError as e: 45 | print(e, file=sys.stderr) 46 | if tries < MAX_TRIES: 47 | tries += 1 48 | time.sleep(1) 49 | else: 50 | print( 51 | "Failed to download and unpack '{url}' after {tries} tries." 52 | .format( 53 | url=url, 54 | tries=MAX_TRIES, 55 | ), 56 | file=sys.stderr, 57 | ) 58 | sys.exit(1) 59 | else: 60 | break 61 | -------------------------------------------------------------------------------- /flintrock/scripts/setup-ephemeral-storage.py: -------------------------------------------------------------------------------- 1 | """ 2 | Setup ephemeral storage on a newly launched Linux host. 3 | 4 | This script was developed against EC2, where ephemeral volumes are by 5 | default haphazardly and inconsistently mounted. Therefore, we unmount 6 | all volumes that we detect and explicitly format and remount them as 7 | we please. 8 | 9 | The resulting structure we create is as follows: 10 | 11 | /media 12 | /root: The instance's root volume. 13 | /ephemeral[0-N]: Instance store volumes. 14 | /persistent[0-N]: EBS volumes. 15 | /tmp: A temporary directory with lots of space. 16 | 17 | WARNING: Be conscious about what this script prints to stdout, as that 18 | output is parsed by Flintrock. 19 | """ 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | import json 24 | import platform 25 | import subprocess 26 | import sys 27 | 28 | from collections import namedtuple 29 | 30 | # Taken from: http://man7.org/linux/man-pages/man5/fstab.5.html 31 | Mount = namedtuple( 32 | 'Mount', [ 33 | 'device_name', 34 | 'mount_point', 35 | 'filesystem_type', 36 | 'mount_options', 37 | 'dump', 38 | 'pass_number' 39 | ]) 40 | 41 | BlockDevice = namedtuple( 42 | 'BlockDevice', sorted([ 43 | 'kname', 44 | 'mountpoint', 45 | 'size', 46 | ])) 47 | BlockDevice.__new__.__defaults__ = (None, ) * len(BlockDevice._fields) 48 | 49 | 50 | def device_pairs_to_tuple(pairs): 51 | device_dict = {} 52 | for pair in pairs: 53 | key, value = pair.split('=') 54 | key = key.lower() 55 | value = value.strip('"').lower() 56 | device_dict.update({key: value}) 57 | return BlockDevice(**device_dict) 58 | 59 | 60 | def get_non_root_block_devices(): 61 | """ 62 | Get all the non-root block devices available to the host. 63 | 64 | These are the devices we're going to format and mount for use. 65 | """ 66 | block_devices_raw = subprocess.check_output([ 67 | 'lsblk', 68 | '--ascii', 69 | '--pairs', 70 | '--bytes', 71 | '--paths', 72 | '--output', 'KNAME,MOUNTPOINT,SIZE', 73 | # --inverse and --nodeps make sure that 74 | # 1) we get the mount points for devices that have holder devices 75 | # 2) we don't get the holder devices themselves 76 | '--inverse', 77 | '--nodeps', 78 | '--noheadings', 79 | ]).decode('utf-8') 80 | block_devices = [ 81 | device_pairs_to_tuple(line.split()) 82 | for line in block_devices_raw.splitlines() 83 | ] 84 | non_root_block_devices = [ 85 | device for device in block_devices 86 | if device.mountpoint != '/' 87 | ] 88 | # Skip tiny devices, like the 1M devices that show up on 89 | # m5 instances on EC2. 90 | # See: https://github.com/nchammas/flintrock/issues/256 91 | non_trivial_non_root_block_devices = [ 92 | device for device in non_root_block_devices 93 | if int(device.size) >= 1024 ** 3 94 | ] 95 | return non_trivial_non_root_block_devices 96 | 97 | 98 | def unmount_devices(devices): 99 | """ 100 | Unmount the provided devices. 101 | """ 102 | with open('/proc/mounts') as m: 103 | mounts = [Mount(*line.split()) for line in m.read().splitlines()] 104 | 105 | for mount in mounts: 106 | if mount.device_name in [d.kname for d in devices]: 107 | subprocess.check_output(['sudo', 'umount', mount.device_name]) 108 | 109 | 110 | def format_devices(devices): 111 | """ 112 | Create an ext4 filesystem on the provided devices. 113 | """ 114 | format_processes = [] 115 | for device in devices: 116 | p = subprocess.Popen([ 117 | 'sudo', 'mkfs.ext4', 118 | '-F', 119 | '-E', 120 | 'lazy_itable_init=0,lazy_journal_init=0', 121 | device.kname], 122 | stdout=subprocess.PIPE, 123 | stderr=subprocess.PIPE) 124 | format_processes.append(p) 125 | 126 | for p in format_processes: 127 | stdout_raw, stderr_raw = p.communicate() 128 | stdout, stderr = stdout_raw.decode('utf-8'), stderr_raw.decode('utf-8') # noqa 129 | return_code = p.returncode 130 | if return_code != 0: 131 | raise Exception( 132 | "Format process returned non-zero exit code: {code}\n{error}" 133 | .format( 134 | code=return_code, 135 | error=stderr)) 136 | 137 | 138 | def mount_devices(devices): 139 | """ 140 | Mount the provided devices at the provided mount points. 141 | 142 | Additionally, add the appropriate entries to /etc/fstab so that the mounts 143 | persist across cluster stop/start. 144 | """ 145 | for device in devices: 146 | subprocess.check_output([ 147 | 'sudo', 'mkdir', '-p', device.mountpoint]) 148 | 149 | # Replace any existing fstab entries with our own. 150 | subprocess.check_output( 151 | """ 152 | grep -v -e "^{device_name}" /etc/fstab | sudo tee /etc/fstab 153 | """.format(device_name=device.kname), 154 | shell=True) 155 | subprocess.check_output( 156 | """ 157 | echo "{fstab_entry}" | sudo tee -a /etc/fstab 158 | """.format(fstab_entry=' '.join([ 159 | device.kname, 160 | device.mountpoint, 161 | 'ext4', 162 | 'defaults,users,noatime', 163 | '0', 164 | '0'])), 165 | shell=True) 166 | 167 | subprocess.check_output([ 168 | 'sudo', 'mount', '--source', device.kname]) 169 | # NOTE: `mount` changes the mount point owner to root, so we have 170 | # to set it to what we want here, after `mount` runs. 171 | subprocess.check_output( 172 | 'sudo chown "$(logname):$(logname)" {m}'.format(m=device.mountpoint), 173 | shell=True) 174 | 175 | 176 | def create_root_dir(): 177 | """ 178 | Create a folder that services like HDFS and Spark can refer to to access 179 | local storage on the root volume. 180 | """ 181 | path = '/media/root' 182 | subprocess.check_output([ 183 | 'sudo', 'mkdir', '-p', path]) 184 | subprocess.check_output( 185 | 'sudo chown "$(logname):$(logname)" {p}'.format(p=path), 186 | shell=True) 187 | return path 188 | 189 | 190 | def create_tmp_dir(target): 191 | """ 192 | Create a folder that services can use as a temporary directory for big files. 193 | """ 194 | path = '/media/tmp' 195 | subprocess.check_output([ 196 | 'sudo', 'ln', '-s', target, path]) 197 | subprocess.check_output( 198 | 'sudo chown "$(logname):$(logname)" {p}'.format(p=path), 199 | shell=True) 200 | return path 201 | 202 | 203 | if __name__ == '__main__': 204 | if sys.version_info < (2, 7) or ((3, 0) <= sys.version_info < (3, 4)): 205 | raise Exception( 206 | "This script is only supported on Python 2.7+ and 3.4+. " 207 | "You are running Python {v}.".format(v=platform.python_version())) 208 | 209 | non_root_block_devices = get_non_root_block_devices() 210 | 211 | # NOTE: For now we are assuming that all non-root devices are ephemeral devices. 212 | # We're going to assign them the mount points we want them to have once we're 213 | # done with the unmount -> format -> mount cycle. 214 | ephemeral_devices = [] 215 | for (num, device) in enumerate(sorted(non_root_block_devices, key=lambda d: d.kname)): 216 | ephemeral_devices.append( 217 | BlockDevice( 218 | kname=device.kname, 219 | mountpoint='/media/ephemeral' + str(num))) 220 | 221 | unmount_devices(ephemeral_devices) 222 | format_devices(ephemeral_devices) 223 | mount_devices(ephemeral_devices) 224 | 225 | root_dir = create_root_dir() 226 | if ephemeral_devices: 227 | tmp_dir = ephemeral_devices[0].mountpoint 228 | else: 229 | tmp_dir = '/tmp' 230 | create_tmp_dir(tmp_dir) 231 | 232 | print(json.dumps( 233 | { 234 | 'root': root_dir, 235 | 'ephemeral': [d.mountpoint for d in ephemeral_devices] 236 | })) 237 | -------------------------------------------------------------------------------- /flintrock/services.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import shlex 4 | import socket 5 | import sys 6 | import urllib.error 7 | import urllib.request 8 | import logging 9 | 10 | # External modules 11 | import paramiko 12 | 13 | # Flintrock modules 14 | from .core import ( 15 | FlintrockCluster, 16 | generate_template_mapping, 17 | get_formatted_template, 18 | ) 19 | from .ssh import ssh_check_output 20 | from .util import spark_hadoop_build_version 21 | 22 | FROZEN = getattr(sys, 'frozen', False) 23 | 24 | if FROZEN: 25 | THIS_DIR = sys._MEIPASS 26 | else: 27 | THIS_DIR = os.path.dirname(os.path.realpath(__file__)) 28 | 29 | SCRIPTS_DIR = os.path.join(THIS_DIR, 'scripts') 30 | 31 | 32 | logger = logging.getLogger('flintrock.services') 33 | 34 | 35 | # TODO: Move this back to ec2.py. EC2-specific login should not live here. 36 | class SecurityGroupRule: 37 | def __init__( 38 | self, 39 | ip_protocol, 40 | from_port, 41 | to_port, 42 | src_group=None, 43 | cidr_ip=None, 44 | ): 45 | if src_group and cidr_ip: 46 | raise ValueError( 47 | "src_group and cidr_ip are mutually exclusive. Specify one or the other. " 48 | "See: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.SecurityGroup.authorize_ingress" 49 | ) 50 | 51 | if not src_group and not cidr_ip: 52 | raise ValueError("One of src_group or cidr_ip must be specified.") 53 | 54 | self.ip_protocol = ip_protocol 55 | self.from_port = from_port 56 | self.to_port = to_port 57 | # We set the default values to empty string so calls to boto3 accept unset parameters. 58 | # See: https://github.com/boto/boto3/issues/331 59 | self.src_group = src_group if src_group else '' 60 | self.cidr_ip = cidr_ip if cidr_ip else '' 61 | 62 | def __str__(self): 63 | return str(vars(self)) 64 | 65 | 66 | class FlintrockService: 67 | """ 68 | This is an abstract class. Implementations of this class capture all the logic 69 | required to fully install and manage services like Spark on Flintrock clusters. 70 | """ 71 | 72 | def __init__(self): 73 | """ 74 | This is the only method signature that implementations don't have to follow. 75 | Use this method to set properties like the service version or download source 76 | which the rest of the methods here will need to do their work. 77 | """ 78 | raise NotImplementedError 79 | 80 | def install( 81 | self, 82 | ssh_client: paramiko.client.SSHClient, 83 | cluster: FlintrockCluster): 84 | """ 85 | Install the service on a node via the provided SSH client. This typically 86 | means downloading a software package and maybe even building it if necessary. 87 | 88 | This method is role-agnostic; it runs on both the cluster master and slaves. 89 | This method is meant to be called asynchronously. 90 | """ 91 | raise NotImplementedError 92 | 93 | def configure( 94 | self, 95 | ssh_client: paramiko.client.SSHClient, 96 | cluster: FlintrockCluster): 97 | """ 98 | Configure the installed service on a node via the provided SSH client. This 99 | typically means using templates to create configuration files on the node. 100 | 101 | This method is role-agnostic; it runs on both the cluster master and slaves. 102 | This method is meant to be called asynchronously. 103 | """ 104 | raise NotImplementedError 105 | 106 | def configure_master( 107 | self, 108 | ssh_client: paramiko.client.SSHClient, 109 | cluster: FlintrockCluster): 110 | """ 111 | Configure the service master on a node via the provided SSH client after the 112 | role-agnostic configuration in configure() is complete. Start the master and 113 | slaves. 114 | 115 | This method is meant to be called once on the cluster master. 116 | This method is meant to be called asynchronously. 117 | """ 118 | raise NotImplementedError 119 | 120 | def configure_slave( 121 | self, 122 | ssh_client: paramiko.client.SSHClient, 123 | cluster: FlintrockCluster): 124 | """ 125 | Configure a service slave on a node via the provided SSH client after the 126 | role-agnostic configuration in configure() is complete. 127 | 128 | This method is meant to be called once on each cluster slave. 129 | This method is meant to be called asynchronously. 130 | """ 131 | raise NotImplementedError 132 | 133 | def health_check( 134 | self, 135 | master_host: str): 136 | """ 137 | Check that the service is up and running by querying the cluster master. 138 | """ 139 | raise NotImplementedError 140 | 141 | def get_security_group_rules(self, flintrock_client_cidr: str, flintrock_client_group: str): 142 | """ 143 | Return the EC2 SecurityGroupRules required by this service. 144 | """ 145 | raise NotImplementedError 146 | 147 | 148 | class HDFS(FlintrockService): 149 | def __init__(self, *, version, download_source): 150 | self.version = version 151 | self.download_source = download_source 152 | self.name_node_ui_port = 50070 if version < '3.0' else 9870 153 | self.manifest = {'version': version, 'download_source': download_source} 154 | 155 | def install( 156 | self, 157 | ssh_client: paramiko.client.SSHClient, 158 | cluster: FlintrockCluster, 159 | ): 160 | logger.info( 161 | "[{h}] Installing HDFS..." 162 | .format(h=ssh_client.get_transport().getpeername()[0]) 163 | ) 164 | 165 | with ssh_client.open_sftp() as sftp: 166 | sftp.put( 167 | localpath=os.path.join(SCRIPTS_DIR, 'download-package.py'), 168 | remotepath='/tmp/download-package.py') 169 | 170 | logger.debug( 171 | "[{h}] Downloading Hadoop from: {s}" 172 | .format( 173 | h=ssh_client.get_transport().getpeername()[0], 174 | s=self.download_source, 175 | ) 176 | ) 177 | 178 | ssh_check_output( 179 | client=ssh_client, 180 | command=""" 181 | set -e 182 | 183 | python /tmp/download-package.py "{download_source}" "hadoop" 184 | 185 | for f in $(find hadoop/bin -type f -executable -not -name '*.cmd'); do 186 | sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)" 187 | done 188 | 189 | echo "export HADOOP_LIBEXEC_DIR='$(pwd)/hadoop/libexec'" >> .bashrc 190 | """.format( 191 | # version=self.version, 192 | download_source=self.download_source.format(v=self.version), 193 | )) 194 | 195 | def configure( 196 | self, 197 | ssh_client: paramiko.client.SSHClient, 198 | cluster: FlintrockCluster): 199 | # TODO: os.walk() through these files. 200 | template_paths = [ 201 | 'hadoop/conf/masters', 202 | 'hadoop/conf/slaves', 203 | 'hadoop/conf/hadoop-env.sh', 204 | 'hadoop/conf/core-site.xml', 205 | 'hadoop/conf/hdfs-site.xml', 206 | ] 207 | 208 | ssh_check_output( 209 | client=ssh_client, 210 | command="mkdir -p hadoop/conf", 211 | ) 212 | 213 | for template_path in template_paths: 214 | ssh_check_output( 215 | client=ssh_client, 216 | command=""" 217 | echo {f} > {p} 218 | """.format( 219 | f=shlex.quote( 220 | get_formatted_template( 221 | path=os.path.join(THIS_DIR, "templates", template_path), 222 | mapping=generate_template_mapping( 223 | cluster=cluster, 224 | hadoop_version=self.version, 225 | # Hadoop doesn't need to know what 226 | # Spark version we're using. 227 | spark_version='', 228 | spark_executor_instances=0, 229 | ))), 230 | p=shlex.quote(template_path))) 231 | 232 | # TODO: Convert this into start_master() and split master- or slave-specific 233 | # stuff out of configure() into configure_master() and configure_slave(). 234 | def configure_master( 235 | self, 236 | ssh_client: paramiko.client.SSHClient, 237 | cluster: FlintrockCluster): 238 | host = ssh_client.get_transport().getpeername()[0] 239 | logger.info("[{h}] Configuring HDFS master...".format(h=host)) 240 | 241 | ssh_check_output( 242 | client=ssh_client, 243 | command=""" 244 | # `|| true` because on cluster restart this command will fail. 245 | ./hadoop/bin/hdfs namenode -format -nonInteractive || true 246 | """) 247 | 248 | # This loop is a band-aid for: https://github.com/nchammas/flintrock/issues/157 249 | attempt_limit = 3 250 | for attempt in range(attempt_limit): 251 | try: 252 | ssh_check_output( 253 | client=ssh_client, 254 | command=""" 255 | ./hadoop/sbin/stop-dfs.sh 256 | ./hadoop/sbin/start-dfs.sh 257 | 258 | master_ui_response_code=0 259 | while [ "$master_ui_response_code" -ne 200 ]; do 260 | sleep 1 261 | master_ui_response_code="$( 262 | curl \ 263 | --location --head --silent \ 264 | --output /dev/null \ 265 | --write-out "%{{http_code}}" \ 266 | {m}:{p} 267 | )" 268 | done 269 | """.format(m=shlex.quote(cluster.master_private_host), p=self.name_node_ui_port), 270 | timeout_seconds=90 271 | ) 272 | break 273 | except socket.timeout as e: 274 | logger.debug( 275 | "Timed out waiting for HDFS master to come up.{}" 276 | .format(" Trying again..." if attempt < attempt_limit - 1 else "") 277 | ) 278 | else: 279 | raise Exception("Time out waiting for HDFS master to come up.") 280 | 281 | def health_check(self, master_host: str): 282 | # This info is not helpful as a detailed health check, but it gives us 283 | # an up / not up signal. 284 | hdfs_master_ui = 'http://{m}:{p}/webhdfs/v1/?op=GETCONTENTSUMMARY'.format(m=master_host, p=self.name_node_ui_port) 285 | 286 | try: 287 | json.loads( 288 | urllib.request 289 | .urlopen(hdfs_master_ui) 290 | .read() 291 | .decode('utf-8')) 292 | logger.info("HDFS online.") 293 | except Exception as e: 294 | raise Exception("HDFS health check failed.") from e 295 | 296 | def get_security_group_rules(self, flintrock_client_cidr: str=None, flintrock_client_group: str=None): 297 | return [ 298 | SecurityGroupRule( 299 | ip_protocol='tcp', 300 | from_port=self.name_node_ui_port, 301 | to_port=self.name_node_ui_port, 302 | cidr_ip=flintrock_client_cidr, 303 | src_group=flintrock_client_group, 304 | ) 305 | ] 306 | 307 | 308 | class Spark(FlintrockService): 309 | def __init__( 310 | self, 311 | *, 312 | spark_executor_instances: int, 313 | version: str=None, 314 | hadoop_version: str, 315 | download_source: str=None, 316 | git_commit: str=None, 317 | git_repository: str=None 318 | ): 319 | # TODO: Convert these checks into something that throws a proper exception. 320 | # Perhaps reuse logic from CLI. 321 | assert bool(version) ^ bool(git_commit) 322 | if git_commit: 323 | assert git_repository 324 | 325 | self.spark_executor_instances = spark_executor_instances 326 | self.version = version 327 | self.hadoop_version = hadoop_version 328 | self.download_source = download_source 329 | self.git_commit = git_commit 330 | self.git_repository = git_repository 331 | 332 | self.manifest = { 333 | 'version': version, 334 | 'spark_executor_instances': spark_executor_instances, 335 | 'hadoop_version': hadoop_version, 336 | 'download_source': download_source, 337 | 'git_commit': git_commit, 338 | 'git_repository': git_repository} 339 | 340 | def install( 341 | self, 342 | ssh_client: paramiko.client.SSHClient, 343 | cluster: FlintrockCluster, 344 | ): 345 | logger.info( 346 | "[{h}] Installing Spark..." 347 | .format(h=ssh_client.get_transport().getpeername()[0]) 348 | ) 349 | 350 | if self.version: 351 | with ssh_client.open_sftp() as sftp: 352 | sftp.put( 353 | localpath=os.path.join(SCRIPTS_DIR, 'download-package.py'), 354 | remotepath='/tmp/download-package.py') 355 | 356 | logger.debug( 357 | "[{h}] Downloading Spark from: {s}" 358 | .format( 359 | h=ssh_client.get_transport().getpeername()[0], 360 | s=self.download_source, 361 | ) 362 | ) 363 | 364 | ssh_check_output( 365 | client=ssh_client, 366 | command=""" 367 | python /tmp/download-package.py "{download_source}" "spark" 368 | """.format( 369 | # version=self.version, 370 | download_source=self.download_source.format(v=self.version), 371 | )) 372 | else: 373 | ssh_check_output( 374 | client=ssh_client, 375 | command=""" 376 | set -e 377 | sudo yum install -y git 378 | sudo yum install -y java-devel 379 | """) 380 | 381 | logger.debug( 382 | "[{h}] Cloning Spark at {c} from: {s}" 383 | .format( 384 | h=ssh_client.get_transport().getpeername()[0], 385 | c=self.git_commit, 386 | s=self.git_repository, 387 | ) 388 | ) 389 | 390 | ssh_check_output( 391 | client=ssh_client, 392 | command=""" 393 | set -e 394 | git clone {repo} spark 395 | cd spark 396 | git reset --hard {commit} 397 | if [ -e "make-distribution.sh" ]; then 398 | ./make-distribution.sh -Phadoop-{hadoop_short_version} 399 | else 400 | ./dev/make-distribution.sh -Phadoop-{hadoop_short_version} 401 | fi 402 | """.format( 403 | repo=shlex.quote(self.git_repository), 404 | commit=shlex.quote(self.git_commit), 405 | hadoop_short_version=spark_hadoop_build_version(self.hadoop_version), 406 | )) 407 | ssh_check_output( 408 | client=ssh_client, 409 | command=""" 410 | set -e 411 | for f in $(find spark/bin -type f -executable -not -name '*.cmd'); do 412 | sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)" 413 | done 414 | echo "export SPARK_HOME='$(pwd)/spark'" >> .bashrc 415 | """) 416 | 417 | def configure( 418 | self, 419 | ssh_client: paramiko.client.SSHClient, 420 | cluster: FlintrockCluster): 421 | 422 | template_paths = [ 423 | 'spark/conf/spark-env.sh', 424 | 'spark/conf/slaves', 425 | ] 426 | 427 | ssh_check_output( 428 | client=ssh_client, 429 | command="mkdir -p spark/conf", 430 | ) 431 | 432 | for template_path in template_paths: 433 | ssh_check_output( 434 | client=ssh_client, 435 | command=""" 436 | echo {f} > {p} 437 | """.format( 438 | f=shlex.quote( 439 | get_formatted_template( 440 | path=os.path.join(THIS_DIR, "templates", template_path), 441 | mapping=generate_template_mapping( 442 | cluster=cluster, 443 | spark_executor_instances=self.spark_executor_instances, 444 | hadoop_version=self.hadoop_version, 445 | spark_version=self.version or self.git_commit, 446 | ))), 447 | p=shlex.quote(template_path))) 448 | 449 | # TODO: Convert this into start_master() and split master- or slave-specific 450 | # stuff out of configure() into configure_master() and configure_slave(). 451 | # start_slave() can block until slave is fully up; that way we don't need 452 | # a sleep() before starting the master. 453 | def configure_master( 454 | self, 455 | ssh_client: paramiko.client.SSHClient, 456 | cluster: FlintrockCluster): 457 | host = ssh_client.get_transport().getpeername()[0] 458 | logger.info("[{h}] Configuring Spark master...".format(h=host)) 459 | 460 | # This loop is a band-aid for: https://github.com/nchammas/flintrock/issues/129 461 | attempt_limit = 3 462 | for attempt in range(attempt_limit): 463 | try: 464 | ssh_check_output( 465 | client=ssh_client, 466 | # Maybe move this shell script out to some separate 467 | # file/folder for the Spark service. 468 | command=""" 469 | spark/sbin/start-all.sh 470 | 471 | master_ui_response_code=0 472 | while [ "$master_ui_response_code" -ne 200 ]; do 473 | sleep 1 474 | master_ui_response_code="$( 475 | curl --head --silent --output /dev/null \ 476 | --write-out "%{{http_code}}" {m}:8080 477 | )" 478 | done 479 | """.format(m=shlex.quote(cluster.master_private_host)), 480 | timeout_seconds=90 481 | ) 482 | break 483 | except socket.timeout as e: 484 | logger.debug( 485 | "Timed out waiting for Spark master to come up.{}" 486 | .format(" Trying again..." if attempt < attempt_limit - 1 else "") 487 | ) 488 | else: 489 | raise Exception("Timed out waiting for Spark master to come up.") 490 | 491 | def health_check(self, master_host: str): 492 | spark_master_ui = 'http://{m}:8080/json/'.format(m=master_host) 493 | 494 | try: 495 | json.loads( 496 | urllib.request 497 | .urlopen(spark_master_ui) 498 | .read() 499 | .decode('utf-8') 500 | ) 501 | # TODO: Don't print here. Return this and let the caller print. 502 | logger.info("Spark online.") 503 | except Exception as e: 504 | # TODO: Catch a more specific problem known to be related to Spark not 505 | # being up; provide a slightly better error message, and don't 506 | # dump a large stack trace on the user. 507 | raise Exception("Spark health check failed.") from e 508 | 509 | def get_security_group_rules(self, flintrock_client_cidr: str=None, flintrock_client_group: str=None): 510 | return [ 511 | SecurityGroupRule( 512 | ip_protocol='tcp', 513 | from_port=8080, 514 | to_port=8081, 515 | cidr_ip=flintrock_client_cidr, 516 | src_group=flintrock_client_group, 517 | ), 518 | SecurityGroupRule( 519 | ip_protocol='tcp', 520 | from_port=4040, 521 | to_port=4050, 522 | cidr_ip=flintrock_client_cidr, 523 | src_group=flintrock_client_group, 524 | ), 525 | SecurityGroupRule( 526 | ip_protocol='tcp', 527 | from_port=7077, 528 | to_port=7077, 529 | cidr_ip=flintrock_client_cidr, 530 | src_group=flintrock_client_group, 531 | ), 532 | # Spark REST Server 533 | SecurityGroupRule( 534 | ip_protocol='tcp', 535 | from_port=6066, 536 | to_port=6066, 537 | cidr_ip=flintrock_client_cidr, 538 | src_group=flintrock_client_group, 539 | ), 540 | ] 541 | -------------------------------------------------------------------------------- /flintrock/ssh.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import os 3 | import socket 4 | import subprocess 5 | import tempfile 6 | import time 7 | import logging 8 | from collections import namedtuple 9 | 10 | # External modules 11 | import paramiko 12 | 13 | # Flintrock modules 14 | from .util import get_subprocess_env 15 | from .exceptions import SSHError 16 | 17 | SSHKeyPair = namedtuple('KeyPair', ['public', 'private']) 18 | 19 | 20 | logger = logging.getLogger('flintrock.ssh') 21 | 22 | 23 | def generate_ssh_key_pair() -> SSHKeyPair: 24 | """ 25 | Generate an SSH key pair that the cluster can use for intra-cluster 26 | communication. 27 | """ 28 | with tempfile.TemporaryDirectory() as tempdir: 29 | subprocess.check_call( 30 | [ 31 | 'ssh-keygen', 32 | '-q', 33 | '-t', 'rsa', 34 | '-N', '', 35 | '-f', os.path.join(tempdir, 'flintrock_rsa'), 36 | '-C', 'flintrock', 37 | ], 38 | env=get_subprocess_env(), 39 | ) 40 | 41 | with open(file=os.path.join(tempdir, 'flintrock_rsa')) as private_key_file: 42 | private_key = private_key_file.read() 43 | 44 | with open(file=os.path.join(tempdir, 'flintrock_rsa.pub')) as public_key_file: 45 | public_key = public_key_file.read() 46 | 47 | return namedtuple('KeyPair', ['public', 'private'])(public_key, private_key) 48 | 49 | 50 | def get_ssh_client( 51 | *, 52 | user: str, 53 | host: str, 54 | identity_file: str, 55 | wait: bool=False, 56 | print_status: bool=None) -> paramiko.client.SSHClient: 57 | """ 58 | Get an SSH client for the provided host, waiting as necessary for SSH to become 59 | available. 60 | """ 61 | if print_status is None: 62 | print_status = wait 63 | 64 | client = paramiko.client.SSHClient() 65 | 66 | client.load_system_host_keys() 67 | client.set_missing_host_key_policy(paramiko.client.AutoAddPolicy()) 68 | 69 | if wait: 70 | tries = 100 71 | else: 72 | # It's greater than 1 as a band-aid for this issue: 73 | # https://github.com/nchammas/flintrock/issues/198 74 | tries = 3 75 | 76 | while tries > 0: 77 | try: 78 | tries -= 1 79 | client.connect( 80 | username=user, 81 | hostname=host, 82 | key_filename=identity_file, 83 | look_for_keys=False, 84 | timeout=3) 85 | if print_status: 86 | logger.info("[{h}] SSH online.".format(h=host)) 87 | break 88 | except socket.timeout as e: 89 | logger.debug("[{h}] SSH timeout.".format(h=host)) 90 | time.sleep(5) 91 | except paramiko.ssh_exception.NoValidConnectionsError as e: 92 | if any(error.errno != errno.ECONNREFUSED for error in e.errors.values()): 93 | raise 94 | logger.debug("[{h}] SSH exception: {e}".format(h=host, e=e)) 95 | time.sleep(5) 96 | # We get this exception during startup with CentOS but not Amazon Linux, 97 | # for some reason. 98 | except paramiko.ssh_exception.AuthenticationException as e: 99 | logger.debug("[{h}] SSH AuthenticationException.".format(h=host)) 100 | time.sleep(5) 101 | except paramiko.ssh_exception.SSHException as e: 102 | raise SSHError( 103 | host=host, 104 | message="SSH protocol error. Possible causes include using " 105 | "the wrong key file or username.", 106 | ) from e 107 | else: 108 | raise SSHError( 109 | host=host, 110 | message="Could not connect via SSH.") 111 | 112 | return client 113 | 114 | 115 | def ssh_check_output( 116 | client: paramiko.client.SSHClient, 117 | command: str, 118 | timeout_seconds: int=None, 119 | ): 120 | """ 121 | Run a command via the provided SSH client and return the output captured 122 | on stdout. 123 | 124 | Raise an exception if the command returns a non-zero code. 125 | """ 126 | stdin, stdout, stderr = client.exec_command( 127 | command, 128 | get_pty=True, 129 | timeout=timeout_seconds) 130 | 131 | # NOTE: Paramiko doesn't clearly document this, but we must read() before 132 | # calling recv_exit_status(). 133 | # See: https://github.com/paramiko/paramiko/issues/448#issuecomment-159481997 134 | stdout_output = stdout.read().decode('utf8').rstrip('\n') 135 | stderr_output = stderr.read().decode('utf8').rstrip('\n') 136 | exit_status = stdout.channel.recv_exit_status() 137 | 138 | if exit_status: 139 | # TODO: Return a custom exception that includes the return code. 140 | # See: https://docs.python.org/3/library/subprocess.html#subprocess.check_output 141 | # NOTE: We are losing the output order here since output from stdout and stderr 142 | # may be interleaved. 143 | raise SSHError( 144 | host=client.get_transport().getpeername()[0], 145 | message=stdout_output + stderr_output) 146 | 147 | return stdout_output 148 | 149 | 150 | def ssh(*, user: str, host: str, identity_file: str): 151 | """ 152 | SSH into a host for interactive use. 153 | """ 154 | subprocess.call( 155 | [ 156 | 'ssh', 157 | '-o', 'StrictHostKeyChecking=no', 158 | '-i', identity_file, 159 | '{u}@{h}'.format(u=user, h=host), 160 | ], 161 | env=get_subprocess_env(), 162 | ) 163 | -------------------------------------------------------------------------------- /flintrock/templates/hadoop/conf/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | hadoop.tmp.dir 7 | {hadoop_root_ephemeral_dirs} 8 | 9 | 10 | 11 | fs.defaultFS 12 | hdfs://{master_private_host}:9000 13 | 14 | 15 | -------------------------------------------------------------------------------- /flintrock/templates/hadoop/conf/hadoop-env.sh: -------------------------------------------------------------------------------- 1 | export HADOOP_HOME="$HOME/hadoop" 2 | export HADOOP_SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=5" 3 | -------------------------------------------------------------------------------- /flintrock/templates/hadoop/conf/hdfs-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | dfs.blocksize 7 | 134217728 8 | 9 | 10 | 11 | dfs.datanode.data.dir 12 | {hadoop_root_ephemeral_dirs} 13 | 14 | 15 | -------------------------------------------------------------------------------- /flintrock/templates/hadoop/conf/masters: -------------------------------------------------------------------------------- 1 | {master_private_host} 2 | -------------------------------------------------------------------------------- /flintrock/templates/hadoop/conf/slaves: -------------------------------------------------------------------------------- 1 | {slave_private_hosts} 2 | -------------------------------------------------------------------------------- /flintrock/templates/spark/conf/slaves: -------------------------------------------------------------------------------- 1 | {slave_private_hosts} 2 | -------------------------------------------------------------------------------- /flintrock/templates/spark/conf/spark-env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export SPARK_LOCAL_DIRS="{spark_root_ephemeral_dirs}" 4 | 5 | # Standalone cluster options 6 | export SPARK_EXECUTOR_INSTANCES="{spark_executor_instances}" 7 | export SPARK_EXECUTOR_CORES="$(($(nproc) / {spark_executor_instances}))" 8 | export SPARK_WORKER_CORES="$(nproc)" 9 | 10 | export SPARK_MASTER_HOST="{master_private_host}" 11 | 12 | # TODO: Make this dependent on HDFS install. 13 | export HADOOP_CONF_DIR="$HOME/hadoop/conf" 14 | 15 | # TODO: Make this non-EC2-specific. 16 | # Bind Spark's web UIs to this machine's public EC2 hostname 17 | export SPARK_PUBLIC_DNS="$(curl --silent http://169.254.169.254/latest/meta-data/public-hostname)" 18 | 19 | # TODO: Set a high ulimit for large shuffles 20 | # Need to find a way to do this, since "sudo ulimit..." doesn't fly. 21 | # Probably need to edit some Linux config file. 22 | # ulimit -n 1000000 23 | 24 | # Should this be made part of a Python service somehow? 25 | export PYSPARK_PYTHON="python3" 26 | -------------------------------------------------------------------------------- /flintrock/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | FROZEN = getattr(sys, 'frozen', False) 5 | 6 | 7 | def get_subprocess_env() -> dict: 8 | """ 9 | Get the environment we want to use when making subprocess calls. 10 | This takes care of details that affect subprocess calls made from 11 | PyInstaller-packaged versions of Flintrock. 12 | 13 | For more information see: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations 14 | """ 15 | env = dict(os.environ) 16 | if FROZEN: 17 | env['LD_LIBRARY_PATH'] = env.get('LD_LIBRARY_PATH_ORIG', '') 18 | return env 19 | 20 | 21 | def spark_hadoop_build_version(hadoop_version: str) -> str: 22 | """ 23 | Given a Hadoop version, determine the Hadoop build of Spark to use. 24 | """ 25 | hadoop_version = tuple(map(int, hadoop_version.split('.'))) 26 | if hadoop_version < (2, 7): 27 | return 'hadoop2.6' 28 | elif (2, 7) <= hadoop_version < (3, 0): 29 | return 'hadoop2.7' 30 | elif (3, 0) <= hadoop_version: 31 | return 'hadoop3.2' 32 | -------------------------------------------------------------------------------- /generate-standalone-package.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | import shutil 4 | import subprocess 5 | 6 | from flintrock import __version__ as flintrock_version 7 | 8 | THIS_DIR = os.path.dirname(os.path.realpath(__file__)) 9 | 10 | if __name__ == '__main__': 11 | operating_system = platform.system() 12 | if operating_system.lower() == 'darwin': 13 | operating_system = 'macOS' 14 | machine_type = platform.machine() 15 | 16 | subprocess.run( 17 | [ 18 | 'pyinstaller', 19 | '--noconfirm', 20 | '--clean', 21 | '--name', 'flintrock', 22 | '--additional-hooks-dir', '.', 23 | # This hidden import is introduced by botocore. 24 | # We won't need this when this issue is resolved: 25 | # https://github.com/pyinstaller/pyinstaller/issues/1844 26 | '--hidden-import', 'html.parser', 27 | # This hidden import is also introduced by botocore. 28 | # It appears to be related to this issue: 29 | # https://github.com/pyinstaller/pyinstaller/issues/1935 30 | '--hidden-import', 'configparser', 31 | 'standalone.py' 32 | ], 33 | check=True) 34 | 35 | shutil.make_archive( 36 | base_name=os.path.join( 37 | THIS_DIR, 'dist', 38 | 'Flintrock-{v}-standalone-{os}-{m}'.format( 39 | v=flintrock_version, 40 | os=operating_system, 41 | m=machine_type)), 42 | format='zip', 43 | root_dir=os.path.join(THIS_DIR, 'dist', 'flintrock')) 44 | -------------------------------------------------------------------------------- /hook-flintrock.py: -------------------------------------------------------------------------------- 1 | datas = [ 2 | ('flintrock/scripts', './scripts'), 3 | ('flintrock/templates', './templates'), 4 | ('flintrock/config.yaml.template', './'), 5 | ] 6 | -------------------------------------------------------------------------------- /make-release.sh: -------------------------------------------------------------------------------- 1 | # Update: 2 | # - Default Spark version: https://spark.apache.org/downloads.html 3 | # - Default Hadoop version: https://hadoop.apache.org/releases.html 4 | # - Default Amazon Linux 2 EBS AMI: https://aws.amazon.com/amazon-linux-2/release-notes/ 5 | aws ec2 describe-images \ 6 | --owners amazon \ 7 | --filters \ 8 | "Name=name,Values=amzn2-ami-hvm-*-gp2" \ 9 | "Name=root-device-type,Values=ebs" \ 10 | "Name=virtualization-type,Values=hvm" \ 11 | "Name=architecture,Values=x86_64" \ 12 | --query \ 13 | 'reverse(sort_by(Images, &CreationDate))[:100].{CreationDate:CreationDate,ImageId:ImageId,Name:Name,Description:Description}' 14 | # - Dependencies: pip list --outdated 15 | # Run full acceptance tests 16 | # - Run private VPC tests too 17 | # Update Flintrock version 18 | # - flintrock/__init__.py 19 | # - README blurb about standalone version 20 | # Update CHANGES 21 | # - Check: https://github.com/nchammas/flintrock/pulls?q=is%3Apr+is%3Aclosed+label%3A%22needs+changelog%22 22 | # - Update "Unreleased" section. "Nothing notable yet." 23 | # Tag release on GitHub 24 | # - https://github.com/nchammas/flintrock/releases 25 | # - vX.Y.Z 26 | # - "Here's what's new in X.Y.Z." 27 | 28 | trash dist/ build/ Flintrock.egg-info/ 29 | 30 | python -m build 31 | 32 | # python setup.py register -r https://testpypi.python.org/pypi 33 | 34 | # Test PyPI upload 35 | twine upload dist/* --repository pypitest 36 | open https://test.pypi.org/project/Flintrock/ 37 | 38 | # Production PyPI upload 39 | twine upload dist/* --repository pypi 40 | open https://pypi.org/project/Flintrock/ 41 | 42 | python generate-standalone-package.py 43 | 44 | # Upload release builds to GitHub 45 | open dist/ 46 | # - Wheel 47 | # - macOS standalone package (x86 _and_ arm64?) 48 | # - Linux standalone package (built by CI) 49 | # Update version to next.dev0 50 | 51 | # --- 52 | 53 | # Test release via pip 54 | deactivate 55 | trash venv 56 | python3 -m venv venv 57 | source venv/bin/activate 58 | 59 | python3 -m pip install --extra-index-url https://testpypi.python.org/simple flintrock 60 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # Minimal pyproject file per: https://packaging.python.org/en/latest/guides/modernize-setup-py-project/ 2 | [build-system] 3 | # Minimum setuptools version that supports version in setup.cfg per: https://packaging.python.org/en/latest/guides/single-sourcing-package-version/ 4 | requires = ["setuptools >= 46.4.0"] 5 | build-backend = "setuptools.build_meta" 6 | -------------------------------------------------------------------------------- /requirements/developer.in: -------------------------------------------------------------------------------- 1 | -r user.pip 2 | pytest >= 3.5.0 3 | pytest-cov >= 2.5.1 4 | flake8 == 6.1.0 5 | # PyYAML # requirement already covered by setup.py 6 | -------------------------------------------------------------------------------- /requirements/developer.pip: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.9 3 | # by the following command: 4 | # 5 | # pip-compile --output-file=requirements/developer.pip requirements/developer.in 6 | # 7 | -e file:.#egg=Flintrock 8 | # via -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip 9 | bcrypt==4.2.1 10 | # via 11 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip 12 | # paramiko 13 | boto3==1.29.4 14 | # via 15 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip 16 | # flintrock 17 | botocore==1.32.4 18 | # via 19 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip 20 | # boto3 21 | # flintrock 22 | # s3transfer 23 | cffi==1.17.1 24 | # via 25 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip 26 | # cryptography 27 | # pynacl 28 | click==8.1.7 29 | # via 30 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip 31 | # flintrock 32 | coverage[toml]==7.6.9 33 | # via 34 | # coverage 35 | # pytest-cov 36 | cryptography==44.0.0 37 | # via 38 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip 39 | # paramiko 40 | exceptiongroup==1.2.2 41 | # via pytest 42 | flake8==6.1.0 43 | # via -r requirements/developer.in 44 | iniconfig==2.0.0 45 | # via pytest 46 | jmespath==1.0.1 47 | # via 48 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip 49 | # boto3 50 | # botocore 51 | mccabe==0.7.0 52 | # via flake8 53 | packaging==24.2 54 | # via pytest 55 | paramiko==3.4.0 56 | # via 57 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip 58 | # flintrock 59 | pluggy==1.5.0 60 | # via pytest 61 | pycodestyle==2.11.1 62 | # via flake8 63 | pycparser==2.22 64 | # via 65 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip 66 | # cffi 67 | pyflakes==3.1.0 68 | # via flake8 69 | pynacl==1.5.0 70 | # via 71 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip 72 | # paramiko 73 | pytest==8.3.4 74 | # via 75 | # -r requirements/developer.in 76 | # pytest-cov 77 | pytest-cov==6.0.0 78 | # via -r requirements/developer.in 79 | python-dateutil==2.9.0.post0 80 | # via 81 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip 82 | # botocore 83 | pyyaml==6.0.2 84 | # via 85 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip 86 | # flintrock 87 | s3transfer==0.7.0 88 | # via 89 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip 90 | # boto3 91 | six==1.17.0 92 | # via 93 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip 94 | # python-dateutil 95 | tomli==2.2.1 96 | # via 97 | # coverage 98 | # pytest 99 | urllib3==1.26.20 100 | # via 101 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/user.pip 102 | # botocore 103 | -------------------------------------------------------------------------------- /requirements/maintainer.in: -------------------------------------------------------------------------------- 1 | -r developer.pip 2 | wheel >= 0.31.0 3 | twine == 4.0.2 4 | PyInstaller == 6.11.1 5 | build >= 1.0.3, < 2.0.0 6 | -------------------------------------------------------------------------------- /requirements/maintainer.pip: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.9 3 | # by the following command: 4 | # 5 | # pip-compile --output-file=requirements/maintainer.pip requirements/maintainer.in 6 | # 7 | -e file:.#egg=Flintrock 8 | # via -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 9 | altgraph==0.17.4 10 | # via 11 | # macholib 12 | # pyinstaller 13 | backports-tarfile==1.2.0 14 | # via jaraco-context 15 | bcrypt==4.2.1 16 | # via 17 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 18 | # paramiko 19 | boto3==1.29.4 20 | # via 21 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 22 | # flintrock 23 | botocore==1.32.4 24 | # via 25 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 26 | # boto3 27 | # flintrock 28 | # s3transfer 29 | build==1.2.2.post1 30 | # via -r requirements/maintainer.in 31 | certifi==2024.8.30 32 | # via requests 33 | cffi==1.17.1 34 | # via 35 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 36 | # cryptography 37 | # pynacl 38 | charset-normalizer==3.4.0 39 | # via requests 40 | click==8.1.7 41 | # via 42 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 43 | # flintrock 44 | coverage[toml]==7.6.9 45 | # via 46 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 47 | # coverage 48 | # pytest-cov 49 | cryptography==44.0.0 50 | # via 51 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 52 | # paramiko 53 | docutils==0.21.2 54 | # via readme-renderer 55 | exceptiongroup==1.2.2 56 | # via 57 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 58 | # pytest 59 | flake8==6.1.0 60 | # via -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 61 | idna==3.10 62 | # via requests 63 | importlib-metadata==8.5.0 64 | # via 65 | # build 66 | # keyring 67 | # pyinstaller 68 | # pyinstaller-hooks-contrib 69 | # twine 70 | iniconfig==2.0.0 71 | # via 72 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 73 | # pytest 74 | jaraco-classes==3.4.0 75 | # via keyring 76 | jaraco-context==6.0.1 77 | # via keyring 78 | jaraco-functools==4.1.0 79 | # via keyring 80 | jmespath==1.0.1 81 | # via 82 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 83 | # boto3 84 | # botocore 85 | keyring==25.5.0 86 | # via twine 87 | macholib==1.16.3 88 | # via pyinstaller 89 | markdown-it-py==3.0.0 90 | # via rich 91 | mccabe==0.7.0 92 | # via 93 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 94 | # flake8 95 | mdurl==0.1.2 96 | # via markdown-it-py 97 | more-itertools==10.5.0 98 | # via 99 | # jaraco-classes 100 | # jaraco-functools 101 | nh3==0.2.19 102 | # via readme-renderer 103 | packaging==24.2 104 | # via 105 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 106 | # build 107 | # pyinstaller 108 | # pyinstaller-hooks-contrib 109 | # pytest 110 | paramiko==3.4.0 111 | # via 112 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 113 | # flintrock 114 | pkginfo==1.12.0 115 | # via twine 116 | pluggy==1.5.0 117 | # via 118 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 119 | # pytest 120 | pycodestyle==2.11.1 121 | # via 122 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 123 | # flake8 124 | pycparser==2.22 125 | # via 126 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 127 | # cffi 128 | pyflakes==3.1.0 129 | # via 130 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 131 | # flake8 132 | pygments==2.18.0 133 | # via 134 | # readme-renderer 135 | # rich 136 | pyinstaller==6.11.1 137 | # via -r requirements/maintainer.in 138 | pyinstaller-hooks-contrib==2024.10 139 | # via pyinstaller 140 | pynacl==1.5.0 141 | # via 142 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 143 | # paramiko 144 | pyproject-hooks==1.2.0 145 | # via build 146 | pytest==8.3.4 147 | # via 148 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 149 | # pytest-cov 150 | pytest-cov==6.0.0 151 | # via -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 152 | python-dateutil==2.9.0.post0 153 | # via 154 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 155 | # botocore 156 | pyyaml==6.0.2 157 | # via 158 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 159 | # flintrock 160 | readme-renderer==44.0 161 | # via twine 162 | requests==2.32.3 163 | # via 164 | # requests-toolbelt 165 | # twine 166 | requests-toolbelt==1.0.0 167 | # via twine 168 | rfc3986==2.0.0 169 | # via twine 170 | rich==13.9.4 171 | # via twine 172 | s3transfer==0.7.0 173 | # via 174 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 175 | # boto3 176 | six==1.17.0 177 | # via 178 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 179 | # python-dateutil 180 | tomli==2.2.1 181 | # via 182 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 183 | # build 184 | # coverage 185 | # pytest 186 | twine==4.0.2 187 | # via -r requirements/maintainer.in 188 | typing-extensions==4.12.2 189 | # via rich 190 | urllib3==1.26.20 191 | # via 192 | # -r /Users/nicholaschammas/Projects/nchammas/flintrock/requirements/developer.pip 193 | # botocore 194 | # requests 195 | # twine 196 | wheel==0.45.1 197 | # via -r requirements/maintainer.in 198 | zipp==3.21.0 199 | # via importlib-metadata 200 | 201 | # The following packages are considered to be unsafe in a requirements file: 202 | # setuptools 203 | -------------------------------------------------------------------------------- /requirements/user.in: -------------------------------------------------------------------------------- 1 | # Notes: 2 | # - Run pip from Flintrock's root directory, not from the 3 | # directory containing this file. 4 | # - The `-e .` syntax lets us reuse the requirements already 5 | # specified under `install_requires` in setup.py. 6 | # See: https://caremad.io/2013/07/setup-vs-requirement/ 7 | # - The #egg= syntax is a workaround for pip-tools. 8 | # See: https://github.com/jazzband/pip-tools/issues/204#issuecomment-550051424 9 | --editable file:.#egg=Flintrock 10 | -------------------------------------------------------------------------------- /requirements/user.pip: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.9 3 | # by the following command: 4 | # 5 | # pip-compile --output-file=requirements/user.pip requirements/user.in 6 | # 7 | -e file:.#egg=Flintrock 8 | # via -r requirements/user.in 9 | bcrypt==4.2.1 10 | # via paramiko 11 | boto3==1.29.4 12 | # via flintrock 13 | botocore==1.32.4 14 | # via 15 | # boto3 16 | # flintrock 17 | # s3transfer 18 | cffi==1.17.1 19 | # via 20 | # cryptography 21 | # pynacl 22 | click==8.1.7 23 | # via flintrock 24 | cryptography==44.0.0 25 | # via paramiko 26 | jmespath==1.0.1 27 | # via 28 | # boto3 29 | # botocore 30 | paramiko==3.4.0 31 | # via flintrock 32 | pycparser==2.22 33 | # via cffi 34 | pynacl==1.5.0 35 | # via paramiko 36 | python-dateutil==2.9.0.post0 37 | # via botocore 38 | pyyaml==6.0.2 39 | # via flintrock 40 | s3transfer==0.7.0 41 | # via boto3 42 | six==1.17.0 43 | # via python-dateutil 44 | urllib3==1.26.20 45 | # via botocore 46 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | # See: https://packaging.python.org/en/latest/guides/single-sourcing-package-version/ 2 | [metadata] 3 | version = attr: flintrock.__version__ 4 | 5 | [tool:pytest] 6 | norecursedirs = venv 7 | addopts = 8 | --verbose 9 | --cov flintrock 10 | --cov-report html 11 | -rs 12 | # --exitfirst 13 | 14 | [flake8] 15 | max-line-length = 100 16 | exclude = venv, build, dist 17 | ignore = 18 | E501 19 | E252 20 | F821 21 | F841 22 | W503 23 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | # from flintrock import __version__ 3 | 4 | 5 | with open('README.md') as f: 6 | long_description = f.read() 7 | 8 | setuptools.setup( 9 | name='Flintrock', 10 | # Moved to setup.cfg to avoid import of flintrock during installation of 11 | # flintrock. This used to work, but becomes a problem with isolated builds 12 | # and new pip behavior triggered by pyproject.toml. 13 | # version=__version__, 14 | description='A command-line tool for launching Apache Spark clusters.', 15 | long_description=long_description, 16 | long_description_content_type="text/markdown", 17 | url='https://github.com/nchammas/flintrock', 18 | author='Nicholas Chammas', 19 | author_email='nicholas.chammas@gmail.com', 20 | license='Apache License 2.0', 21 | python_requires='>= 3.9', 22 | 23 | # See: https://pypi.python.org/pypi?%3Aaction=list_classifiers 24 | classifiers=[ 25 | 'Development Status :: 5 - Production/Stable', 26 | 27 | 'Intended Audience :: Developers', 28 | 'Intended Audience :: Science/Research', 29 | 30 | 'Topic :: Utilities', 31 | 'Environment :: Console', 32 | 'Operating System :: MacOS :: MacOS X', 33 | 'Operating System :: POSIX', 34 | 35 | 'License :: OSI Approved :: Apache Software License', 36 | 37 | 'Programming Language :: Python :: 3', 38 | 'Programming Language :: Python :: 3 :: Only', 39 | ], 40 | keywords=['Apache Spark'], 41 | 42 | packages=setuptools.find_packages(), 43 | include_package_data=True, 44 | 45 | # We pin dependencies because sometimes projects do not 46 | # strictly follow semantic versioning, so new "feature" 47 | # releases end up making backwards-incompatible changes. 48 | # Sometimes, new releases even introduce bugs which 49 | # totally break Flintrock. 50 | # For example: https://github.com/paramiko/paramiko/issues/615 51 | install_requires=[ 52 | 'boto3 == 1.29.4', 53 | 'botocore == 1.32.4', 54 | 'click == 8.1.7', 55 | 'paramiko == 3.4.0', 56 | 'PyYAML == 6.0.2', 57 | ], 58 | 59 | entry_points={ 60 | 'console_scripts': [ 61 | 'flintrock = flintrock.__main__:main', 62 | ], 63 | }, 64 | ) 65 | -------------------------------------------------------------------------------- /standalone.py: -------------------------------------------------------------------------------- 1 | """ 2 | A standalone script for use by PyInstaller. 3 | 4 | Users should not be running this script. 5 | """ 6 | 7 | import sys 8 | from flintrock.flintrock import main 9 | 10 | if __name__ == '__main__': 11 | sys.exit(main()) 12 | -------------------------------------------------------------------------------- /test-infra/.gitignore: -------------------------------------------------------------------------------- 1 | .terraform* 2 | terraform.tfstate* 3 | terraform.tfvars 4 | -------------------------------------------------------------------------------- /test-infra/README.md: -------------------------------------------------------------------------------- 1 | # Private VPC Test Infrastructure 2 | 3 | The Terraform templates in this directory manage private VPC infrastructure that Flintrock contributors can use to test their changes. 4 | 5 | ## Set Up 6 | 7 | After [installing Terraform][install], you can spin up a private VPC along with associated infrastructure like a NAT gateway: 8 | 9 | [install]: https://developer.hashicorp.com/terraform/install 10 | 11 | ```sh 12 | terraform init 13 | terraform apply 14 | ``` 15 | 16 | You can provide the required variables to this command by creating a `terraform.tfvars` file. The variables you need to define are listed in `variables.tf`. 17 | 18 | Once the `apply` command completes, you'll see some output like this: 19 | 20 | ``` 21 | Apply complete! Resources: 12 added, 0 changed, 0 destroyed. 22 | 23 | Outputs: 24 | 25 | bastion_ip = 18.205.7.24 26 | ``` 27 | 28 | SSH into your bastion host. You'll spin up Flintrock clusters from here. A virtual environment and Flintrock config file will already be setup for you based on the variables you provided to Terraform during infrastructure creation: 29 | 30 | ```sh 31 | ssh ec2-user@18.205.7.24 32 | source venv/bin/activate 33 | less /home/ec2-user/.config/flintrock/config.yaml 34 | ``` 35 | 36 | All you need to do is pick a version of Flintrock to install and then you can begin your tests against a private VPC! 37 | 38 | ```sh 39 | pip install https://github.com/nchammas/flintrock/archive/master.zip 40 | flintrock launch test-cluster 41 | flintrock login test-cluster 42 | flintrock destroy test-cluster 43 | ``` 44 | 45 | ## Tear Down 46 | 47 | A NAT gateway is expensive to keep up all the time, so you'll want to tear down the infrastructure when you're done. Be sure to first tear down any Flintrock clusters you launched into the test VPC. 48 | 49 | ```sh 50 | ./delete-test-infra.sh 51 | ``` 52 | 53 | This script calls `terraform destroy` after clearing out some infrastructure that Flintrock creates inside the private VPC. 54 | -------------------------------------------------------------------------------- /test-infra/bastion.tf: -------------------------------------------------------------------------------- 1 | data "http" "myip" { 2 | url = "https://icanhazip.com" 3 | } 4 | 5 | resource "aws_security_group" "ssh" { 6 | name = "flintrock-bastion-ssh" 7 | vpc_id = aws_vpc.main.id 8 | 9 | ingress { 10 | from_port = 22 11 | to_port = 22 12 | protocol = "tcp" 13 | cidr_blocks = ["${chomp(data.http.myip.response_body)}/32"] 14 | } 15 | 16 | egress { 17 | from_port = 0 18 | to_port = 0 19 | protocol = "-1" 20 | cidr_blocks = ["0.0.0.0/0"] 21 | } 22 | } 23 | 24 | resource "aws_instance" "bastion" { 25 | ami = "ami-0a887e401f7654935" 26 | instance_type = "t2.nano" 27 | key_name = var.ec2_key_name 28 | subnet_id = aws_subnet.public.id 29 | associate_public_ip_address = true 30 | vpc_security_group_ids = [aws_security_group.ssh.id] 31 | 32 | tags = { 33 | Name = "flintrock-bastion" 34 | } 35 | 36 | connection { 37 | host = self.public_ip 38 | user = "ec2-user" 39 | private_key = file(var.ssh_key_path) 40 | } 41 | 42 | provisioner "remote-exec" { 43 | inline = [ 44 | "mkdir -p /home/ec2-user/.aws/", 45 | ] 46 | } 47 | 48 | provisioner "file" { 49 | source = var.aws_credentials_path 50 | destination = "/home/ec2-user/.aws/credentials" 51 | } 52 | 53 | provisioner "file" { 54 | source = var.ssh_key_path 55 | destination = "/home/ec2-user/.ssh/${var.ec2_key_name}.pem" 56 | } 57 | 58 | provisioner "remote-exec" { 59 | inline = [ 60 | "chmod go-rwx /home/ec2-user/.ssh/${var.ec2_key_name}.pem", 61 | ] 62 | } 63 | 64 | provisioner "remote-exec" { 65 | inline = [ 66 | "mkdir -p /home/ec2-user/.config/flintrock/", 67 | ] 68 | } 69 | 70 | provisioner "file" { 71 | source = var.flintrock_config_path 72 | destination = "/home/ec2-user/.config/flintrock/config.yaml" 73 | } 74 | 75 | provisioner "remote-exec" { 76 | inline = [ 77 | "sudo yum install -y git", 78 | "sudo yum install -y gcc make patch zlib-devel bzip2 bzip2-devel readline-devel sqlite sqlite-devel openssl11-devel tk-devel libffi-devel xz-devel", 79 | "curl https://pyenv.run | bash", 80 | ".pyenv/bin/pyenv install 3.9", 81 | ".pyenv/bin/pyenv global 3.9", 82 | ".pyenv/bin/pyenv exec python -m venv /home/ec2-user/venv", 83 | "/home/ec2-user/venv/bin/pip install PyYAML", 84 | ] 85 | } 86 | 87 | provisioner "remote-exec" { 88 | inline = [ 89 | <<-EOM 90 | /home/ec2-user/venv/bin/python << EO_PYTHON 91 | import yaml 92 | with open('/home/ec2-user/.config/flintrock/config.yaml') as f: 93 | config = yaml.safe_load(f) 94 | config['providers']['ec2']['key-name'] = '${var.ec2_key_name}' 95 | config['providers']['ec2']['identity-file'] = '/home/ec2-user/.ssh/${var.ec2_key_name}.pem' 96 | config['providers']['ec2']['vpc-id'] = '${aws_vpc.main.id}' 97 | config['providers']['ec2']['subnet-id'] = '${aws_subnet.private.id}' 98 | config['providers']['ec2']['authorize-access-from'] = ['${self.private_ip}'] 99 | with open('/home/ec2-user/.config/flintrock/config.yaml', 'w') as f: 100 | yaml.dump(config, f, indent=2) 101 | EO_PYTHON 102 | EOM 103 | ] 104 | } 105 | } 106 | 107 | output "bastion_ip" { 108 | value = aws_instance.bastion.public_ip 109 | } 110 | -------------------------------------------------------------------------------- /test-infra/delete-test-infra.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | set -x 4 | 5 | # Apparently you can't simply do `terraform state show aws_vpc.main.id`. 6 | vpc_id="$( 7 | terraform show -json \ 8 | | jq --raw-output ' 9 | .values.root_module.resources[] 10 | | select(.type == "aws_vpc" and .name == "main") 11 | | .values.id 12 | ' 13 | )" 14 | 15 | security_group_ids=($( 16 | aws ec2 describe-security-groups \ 17 | --filters "Name=vpc-id,Values=$vpc_id" "Name=group-name,Values=flintrock" \ 18 | --query "SecurityGroups[*].{ID:GroupId}" \ 19 | | jq --raw-output '.[] | .ID' 20 | )) 21 | 22 | for sg_id in "${security_group_ids[@]}"; do 23 | aws ec2 delete-security-group --group-id "$sg_id" 24 | done 25 | 26 | terraform destroy 27 | -------------------------------------------------------------------------------- /test-infra/network.tf: -------------------------------------------------------------------------------- 1 | resource "aws_vpc" "main" { 2 | cidr_block = "172.31.0.0/16" 3 | enable_dns_hostnames = true 4 | 5 | tags = { 6 | Name = "flintrock-test" 7 | } 8 | } 9 | 10 | resource "aws_subnet" "public" { 11 | vpc_id = aws_vpc.main.id 12 | cidr_block = "172.31.0.0/24" 13 | availability_zone = "us-east-1a" 14 | map_public_ip_on_launch = true 15 | 16 | tags = { 17 | Name = "flintrock-test-public" 18 | } 19 | } 20 | 21 | resource "aws_subnet" "private" { 22 | vpc_id = aws_vpc.main.id 23 | cidr_block = "172.31.1.0/24" 24 | availability_zone = "us-east-1a" 25 | map_public_ip_on_launch = false 26 | 27 | tags = { 28 | Name = "flintrock-test-private" 29 | } 30 | } 31 | 32 | resource "aws_eip" "nat" { 33 | domain = "vpc" 34 | 35 | tags = { 36 | Name = "flintrock-test-nat-ip" 37 | } 38 | } 39 | 40 | resource "aws_nat_gateway" "private_gateway" { 41 | allocation_id = aws_eip.nat.id 42 | subnet_id = aws_subnet.public.id 43 | 44 | tags = { 45 | Name = "flintrock-test-private-gateway" 46 | } 47 | } 48 | 49 | resource "aws_internet_gateway" "main" { 50 | vpc_id = aws_vpc.main.id 51 | 52 | tags = { 53 | Name = "flintrock-test-gateway" 54 | } 55 | } 56 | 57 | resource "aws_route_table" "public" { 58 | vpc_id = aws_vpc.main.id 59 | 60 | route { 61 | cidr_block = "0.0.0.0/0" 62 | gateway_id = aws_internet_gateway.main.id 63 | } 64 | 65 | tags = { 66 | Name = "flintrock-test-public" 67 | } 68 | } 69 | 70 | resource "aws_route_table" "private" { 71 | vpc_id = aws_vpc.main.id 72 | 73 | route { 74 | cidr_block = "0.0.0.0/0" 75 | nat_gateway_id = aws_nat_gateway.private_gateway.id 76 | } 77 | 78 | tags = { 79 | Name = "flintrock-test-private" 80 | } 81 | } 82 | 83 | resource "aws_route_table_association" "public" { 84 | subnet_id = aws_subnet.public.id 85 | route_table_id = aws_route_table.public.id 86 | } 87 | 88 | resource "aws_route_table_association" "private" { 89 | subnet_id = aws_subnet.private.id 90 | route_table_id = aws_route_table.private.id 91 | } 92 | -------------------------------------------------------------------------------- /test-infra/provider.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | aws = { 4 | source = "hashicorp/aws" 5 | version = "~> 5" 6 | } 7 | http = { 8 | source = "hashicorp/http" 9 | version = "~> 3" 10 | } 11 | } 12 | } 13 | 14 | provider "aws" { 15 | region = "us-east-1" 16 | } 17 | -------------------------------------------------------------------------------- /test-infra/variables.tf: -------------------------------------------------------------------------------- 1 | variable "ec2_key_name" { 2 | type = string 3 | } 4 | 5 | variable "ssh_key_path" { 6 | type = string 7 | } 8 | 9 | variable "aws_credentials_path" { 10 | type = string 11 | } 12 | 13 | variable "flintrock_config_path" { 14 | type = string 15 | } 16 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # Flintrock Tests 2 | 3 | Use the tests in this directory to help you catch bugs as you work on Flintrock. 4 | 5 | The instructions here assume the following things: 6 | 7 | 1. You've read through our [guide on contributing code](../CONTRIBUTING.md#contributing-code) and installed Flintrock's development dependencies. 8 | 2. You're working from Flintrock's root directory. 9 | 3. You're running Python 3.9+. 10 | 4. You've already setup your Flintrock config file and can launch clusters. 11 | 12 | To run all of Flintrock's tests that don't require AWS credentials, just run: 13 | 14 | ```sh 15 | pytest 16 | ``` 17 | 18 | This is probably what you want to do most of the time. 19 | 20 | To run all of Flintrock's tests, including the ones that require AWS credentials (like acceptance tests), run this: 21 | 22 | ```sh 23 | USE_AWS_CREDENTIALS=true pytest # will launch real clusters! 24 | ``` 25 | 26 | Acceptance tests launch and manipulate real clusters to test Flintrock's various commands and make sure installed services like Spark are working correctly. 27 | 28 | Some things you should keep in mind when running the full test suite with your AWS credentials: 29 | 30 | * **Running the full test suite costs money** (less than $1 for the full test run) since it launches and manipulates real clusters. 31 | * **A failed test run may leave behind running clusters**. You'll need to destroy these manually. 32 | * The full test suite takes a while to run (~30-60 minutes). 33 | * Though the tests that use your AWS credentials are disabled by default, you can explicitly disable them by setting `USE_AWS_CREDENTIALS=""`. Setting that variable to `false` or to any non-empty string won't work. 34 | 35 | Relatively speaking, acceptance tests are expensive, but they are the most valuable type of test for an orchestration tool like Flintrock. Use them judiciously. 36 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import tempfile 4 | import uuid 5 | from collections import OrderedDict 6 | 7 | # Flintrock 8 | from flintrock.core import StorageDirs 9 | 10 | # External 11 | import pytest 12 | 13 | HADOOP_VERSION = '3.3.6' 14 | SPARK_VERSION = '3.5.0' 15 | SPARK_GIT_COMMIT = 'ce5ddad990373636e94071e7cef2f31021add07b' # 3.5.0 16 | JAVA_VERSION = '11' 17 | 18 | 19 | class Dummy(): 20 | pass 21 | 22 | 23 | aws_credentials_required = ( 24 | pytest.mark.skipif( 25 | not bool(os.environ.get('USE_AWS_CREDENTIALS')), 26 | reason="USE_AWS_CREDENTIALS not set")) 27 | 28 | 29 | @pytest.fixture(scope='session') 30 | def project_root_dir(): 31 | return os.path.dirname( 32 | os.path.dirname( 33 | os.path.realpath(__file__) 34 | ) 35 | ) 36 | 37 | 38 | @pytest.fixture(scope='session') 39 | def dummy_cluster(): 40 | storage_dirs = StorageDirs( 41 | root='/media/root', 42 | ephemeral=['/media/eph1', '/media/eph2'], 43 | persistent=None, 44 | ) 45 | 46 | cluster = Dummy() 47 | cluster.name = 'test' 48 | cluster.storage_dirs = storage_dirs 49 | cluster.master_ip = '10.0.0.1' 50 | cluster.master_host = 'master.hostname' 51 | cluster.master_private_host = 'master.privatehostname' 52 | cluster.slave_ips = ['10.0.0.2'] 53 | cluster.slave_hosts = ['slave1.hostname'] 54 | cluster.slave_private_hosts = ['slave1.privatehostname'] 55 | 56 | return cluster 57 | 58 | 59 | def random_string(): 60 | return str(uuid.uuid4())[:8] 61 | 62 | 63 | def launch_cluster( 64 | *, 65 | cluster_name, 66 | instance_type, 67 | spark_version, 68 | spark_git_commit): 69 | p = subprocess.run([ 70 | 'flintrock', 'launch', cluster_name, 71 | '--num-slaves', '1', 72 | '--install-hdfs', 73 | '--hdfs-version', HADOOP_VERSION, 74 | '--install-spark', 75 | '--spark-version', spark_version, 76 | '--spark-git-commit', spark_git_commit, 77 | '--java-version', JAVA_VERSION, 78 | '--assume-yes', 79 | '--ec2-instance-type', instance_type]) 80 | assert p.returncode == 0 81 | 82 | 83 | def stop_cluster(cluster_name): 84 | p = subprocess.run([ 85 | 'flintrock', 'stop', cluster_name, '--assume-yes']) 86 | assert p.returncode == 0 87 | 88 | 89 | def start_cluster(cluster_name): 90 | p = subprocess.run([ 91 | 'flintrock', 'start', cluster_name]) 92 | assert p.returncode == 0 93 | 94 | 95 | # TODO: This should reuse FlintrockCluster. 96 | class ClusterConfig: 97 | def __init__( 98 | self, 99 | *, 100 | restarted, 101 | instance_type, 102 | spark_version=SPARK_VERSION, 103 | spark_git_commit=''): 104 | self.restarted = restarted 105 | self.instance_type = instance_type 106 | self.spark_version = spark_version 107 | self.spark_git_commit = spark_git_commit 108 | 109 | def __str__(self): 110 | return str(OrderedDict(sorted(vars(self).items()))) 111 | 112 | 113 | cluster_configs = [ 114 | ClusterConfig(restarted=False, instance_type='t3.small'), 115 | ClusterConfig(restarted=True, instance_type='t3.small'), 116 | ClusterConfig(restarted=False, instance_type='m5.large'), 117 | ClusterConfig(restarted=True, instance_type='m5.large'), 118 | # We don't test all cluster states when building Spark because 119 | # it takes a very long time. 120 | ClusterConfig( 121 | restarted=True, 122 | instance_type='m5.xlarge', 123 | spark_version='', 124 | spark_git_commit=SPARK_GIT_COMMIT, 125 | ), 126 | ] 127 | 128 | 129 | @pytest.fixture( 130 | scope='module', 131 | params=cluster_configs, 132 | ids=[str(cc) for cc in cluster_configs]) 133 | def running_cluster(request): 134 | """ 135 | Return the name of a running Flintrock cluster. 136 | """ 137 | cluster_name = 'running-cluster-' + random_string() 138 | 139 | try: 140 | launch_cluster( 141 | cluster_name=cluster_name, 142 | instance_type=request.param.instance_type, 143 | spark_version=request.param.spark_version, 144 | spark_git_commit=request.param.spark_git_commit) 145 | 146 | if request.param.restarted: 147 | stop_cluster(cluster_name) 148 | start_cluster(cluster_name) 149 | 150 | yield cluster_name 151 | finally: 152 | p = subprocess.run([ 153 | 'flintrock', 'destroy', cluster_name, '--assume-yes', 154 | ]) 155 | assert p.returncode == 0 156 | 157 | 158 | @pytest.fixture(scope='module') 159 | def stopped_cluster(request): 160 | cluster_name = 'running-cluster-' + random_string() 161 | 162 | try: 163 | p = subprocess.run([ 164 | 'flintrock', 'launch', cluster_name, 165 | '--num-slaves', '1', 166 | '--no-install-hdfs', 167 | '--no-install-spark', 168 | '--assume-yes', 169 | '--ec2-instance-type', 't3.small']) 170 | assert p.returncode == 0 171 | 172 | p = subprocess.run([ 173 | 'flintrock', 'stop', cluster_name, '--assume-yes']) 174 | assert p.returncode == 0 175 | 176 | yield cluster_name 177 | finally: 178 | p = subprocess.run([ 179 | 'flintrock', 'destroy', cluster_name, '--assume-yes', 180 | ]) 181 | assert p.returncode == 0 182 | 183 | 184 | @pytest.fixture(scope='module') 185 | def remote_file(request, running_cluster): 186 | """ 187 | Return the path to a remote dummy file on a running Flintrock cluster. 188 | """ 189 | file_path = '/tmp/remote_dummy_file_for_testing' 190 | 191 | try: 192 | p = subprocess.run([ 193 | 'flintrock', 'run-command', running_cluster, '--', 194 | 'echo -e "{data}" > {path}'.format( 195 | data='test\n' * 3, 196 | path=file_path)]) 197 | assert p.returncode == 0 198 | 199 | yield file_path 200 | finally: 201 | p = subprocess.run([ 202 | 'flintrock', 'run-command', running_cluster, '--', 203 | 'rm', '-f', file_path, 204 | ]) 205 | assert p.returncode == 0 206 | 207 | 208 | @pytest.fixture(scope='module') 209 | def local_file(request): 210 | """ 211 | Return the path to a local dummy file. 212 | """ 213 | file = tempfile.NamedTemporaryFile(delete=False) 214 | try: 215 | with open(file.name, 'wb') as f: 216 | f.truncate(1024) 217 | 218 | yield file.name 219 | finally: 220 | os.remove(file.name) 221 | -------------------------------------------------------------------------------- /tests/test_acceptance.py: -------------------------------------------------------------------------------- 1 | import json 2 | import subprocess 3 | import urllib.request 4 | 5 | # Flintrock modules 6 | from flintrock.exceptions import ClusterInvalidState 7 | from conftest import aws_credentials_required 8 | 9 | pytestmark = aws_credentials_required 10 | 11 | 12 | def test_describe_stopped_cluster(stopped_cluster): 13 | p = subprocess.run([ 14 | 'flintrock', 'describe', stopped_cluster], 15 | stdout=subprocess.PIPE) 16 | assert p.returncode == 0 17 | assert p.stdout.startswith(stopped_cluster.encode()) 18 | 19 | 20 | def test_stop_stopped_cluster(stopped_cluster): 21 | p = subprocess.run([ 22 | 'flintrock', 'stop', stopped_cluster], 23 | stdout=subprocess.PIPE) 24 | assert p.returncode == 0 25 | assert p.stdout == b"Cluster is already stopped.\n" 26 | 27 | 28 | def test_try_launching_duplicate_stopped_cluster(stopped_cluster): 29 | p = subprocess.run([ 30 | 'flintrock', 'launch', stopped_cluster], 31 | stderr=subprocess.PIPE) 32 | assert p.returncode == 1 33 | assert p.stderr.decode('utf-8').startswith( 34 | "Cluster {c} already exists".format(c=stopped_cluster)) 35 | 36 | 37 | def test_start_running_cluster(running_cluster): 38 | p = subprocess.run([ 39 | 'flintrock', 'start', running_cluster], 40 | stdout=subprocess.PIPE) 41 | assert p.returncode == 0 42 | assert p.stdout == b"Cluster is already running.\n" 43 | 44 | 45 | def test_try_launching_duplicate_cluster(running_cluster): 46 | p = subprocess.run([ 47 | 'flintrock', 'launch', running_cluster], 48 | stderr=subprocess.PIPE) 49 | assert p.returncode == 1 50 | assert p.stderr.decode('utf-8').startswith( 51 | "Cluster {c} already exists".format(c=running_cluster)) 52 | 53 | 54 | def test_describe_running_cluster(running_cluster): 55 | p = subprocess.run([ 56 | 'flintrock', 'describe', running_cluster], 57 | stdout=subprocess.PIPE) 58 | assert p.returncode == 0 59 | assert p.stdout.startswith(running_cluster.encode()) 60 | 61 | 62 | def test_run_command_on_running_cluster(running_cluster): 63 | p = subprocess.run([ 64 | 'flintrock', 'run-command', running_cluster, '--', 'ls', '-l']) 65 | assert p.returncode == 0 66 | 67 | 68 | def test_copy_file_on_running_cluster(running_cluster, local_file): 69 | p = subprocess.run([ 70 | 'flintrock', 'copy-file', running_cluster, local_file, '/tmp/copied_from_local']) 71 | assert p.returncode == 0 72 | 73 | 74 | def test_hdfs_on_running_cluster(running_cluster, remote_file): 75 | hdfs_path = '/hdfs_file' 76 | 77 | p = subprocess.run([ 78 | 'flintrock', 'run-command', running_cluster, '--master-only', '--', 79 | './hadoop/bin/hdfs', 'dfs', '-put', remote_file, hdfs_path]) 80 | assert p.returncode == 0 81 | 82 | p = subprocess.run([ 83 | 'flintrock', 'run-command', running_cluster, '--', 84 | './hadoop/bin/hdfs', 'dfs', '-cat', hdfs_path]) 85 | assert p.returncode == 0 86 | 87 | 88 | def test_spark_on_running_cluster(running_cluster, remote_file): 89 | # TODO: Run a real query; e.g. sc.parallelize(range(10)).count() 90 | p = subprocess.run([ 91 | 'flintrock', 'run-command', running_cluster, '--', 92 | './spark/bin/pyspark', '--help']) 93 | assert p.returncode == 0 94 | 95 | p = subprocess.run([ 96 | 'flintrock', 'describe', running_cluster, '--master-hostname-only'], 97 | stdout=subprocess.PIPE) 98 | master_address = p.stdout.strip().decode('utf-8') 99 | assert p.returncode == 0 100 | 101 | spark_master_ui = 'http://{m}:8080/json/'.format(m=master_address) 102 | spark_ui_info = json.loads( 103 | urllib.request.urlopen(spark_master_ui).read().decode('utf-8')) 104 | assert spark_ui_info['status'] == 'ALIVE' 105 | 106 | 107 | def test_operations_against_non_existent_cluster(): 108 | cluster_name = 'this_cluster_doesnt_exist_yo' 109 | expected_error_message = ( 110 | b"No cluster " + cluster_name.encode('utf-8') + b" in region ") 111 | 112 | for command in ['describe', 'stop', 'start', 'login', 'destroy']: 113 | p = subprocess.run( 114 | ['flintrock', command, cluster_name], 115 | stderr=subprocess.PIPE) 116 | assert p.returncode == 1 117 | assert p.stderr.startswith(expected_error_message) 118 | 119 | for command in ['run-command']: 120 | p = subprocess.run( 121 | ['flintrock', command, cluster_name, 'ls'], 122 | stderr=subprocess.PIPE) 123 | assert p.returncode == 1 124 | assert p.stderr.startswith(expected_error_message) 125 | 126 | for command in ['copy-file']: 127 | p = subprocess.run( 128 | ['flintrock', command, cluster_name, __file__, '/remote/path'], 129 | stderr=subprocess.PIPE) 130 | assert p.returncode == 1 131 | assert p.stderr.startswith(expected_error_message) 132 | 133 | 134 | def test_operations_against_stopped_cluster(stopped_cluster): 135 | p = subprocess.run( 136 | ['flintrock', 'run-command', stopped_cluster, 'ls'], 137 | stderr=subprocess.PIPE) 138 | expected_error_message = str( 139 | ClusterInvalidState( 140 | attempted_command='run-command', 141 | state='stopped')) 142 | assert p.returncode == 1 143 | assert p.stderr.decode('utf-8').strip() == expected_error_message 144 | 145 | p = subprocess.run( 146 | ['flintrock', 'copy-file', stopped_cluster, __file__, '/remote/path'], 147 | stderr=subprocess.PIPE) 148 | expected_error_message = str( 149 | ClusterInvalidState( 150 | attempted_command='copy-file', 151 | state='stopped')) 152 | assert p.returncode == 1 153 | assert p.stderr.decode('utf-8').strip() == expected_error_message 154 | 155 | 156 | def test_launch_with_bad_ami(): 157 | p = subprocess.run([ 158 | 'flintrock', 'launch', 'whatever-cluster', 159 | '--ec2-ami', 'ami-badbad00'], 160 | stderr=subprocess.PIPE) 161 | assert p.returncode == 1 162 | assert p.stderr.startswith(b"Error: Could not find") 163 | -------------------------------------------------------------------------------- /tests/test_core.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | 4 | # Flintrock 5 | from flintrock.core import ( 6 | generate_template_mapping, 7 | get_formatted_template, 8 | ) 9 | 10 | FLINTROCK_ROOT_DIR = ( 11 | os.path.dirname( 12 | os.path.dirname( 13 | os.path.realpath(__file__)))) 14 | 15 | 16 | @pytest.mark.parametrize( 17 | 'spark_version', [ 18 | (''), 19 | ('3.5.0'), 20 | ('a28880f3b9c63d86368bcd6cbbaa6a9af7075409'), 21 | ]) 22 | def test_templates(dummy_cluster, spark_version): 23 | template_dir = os.path.join(FLINTROCK_ROOT_DIR, 'flintrock', 'templates') 24 | for (dirpath, dirnames, filenames) in os.walk(template_dir): 25 | if filenames: 26 | for filename in filenames: 27 | template_path = os.path.join(dirpath, filename) 28 | mapping = generate_template_mapping( 29 | cluster=dummy_cluster, 30 | hadoop_version='', 31 | spark_version=spark_version, 32 | spark_executor_instances=0, 33 | ) 34 | get_formatted_template( 35 | path=template_path, 36 | mapping=mapping, 37 | ) 38 | -------------------------------------------------------------------------------- /tests/test_ec2.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import click 3 | from flintrock.ec2 import validate_tags 4 | 5 | 6 | def test_validate_tags(): 7 | # List of test cases; each test case is a tuple, with first element 8 | # the input and the second element the expected output 9 | positive_test_cases = [ 10 | # basic case 11 | (['k1,v1'], [{'Key': 'k1', 'Value': 'v1'}]), 12 | # strips whitespace? 13 | (['k2, v2 '], [{'Key': 'k2', 'Value': 'v2'}]), 14 | # empty Value 15 | (['k3,'], [{'Key': 'k3', 'Value': ''}]), 16 | # multiple tags 17 | (['k4,v4', 'k5,v5'], 18 | [{'Key': 'k4', 'Value': 'v4'}, {'Key': 'k5', 'Value': 'v5'}])] 19 | 20 | for test_case in positive_test_cases: 21 | ec2_tags = validate_tags(test_case[0]) 22 | assert isinstance(ec2_tags, list) 23 | for i, ec2_tag in enumerate(ec2_tags): 24 | expected_dict = test_case[1][i] 25 | for k in expected_dict: 26 | assert k in ec2_tag 27 | assert ec2_tag[k] == expected_dict[k] 28 | 29 | negative_test_cases = [["k1"], ["k2,v2,"], ["k3,,v3"], [",v4"]] 30 | for test_case in negative_test_cases: 31 | with pytest.raises(click.BadParameter): 32 | validate_tags(test_case) 33 | -------------------------------------------------------------------------------- /tests/test_flintrock.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # External modules 4 | import pytest 5 | 6 | # Flintrock modules 7 | from flintrock.exceptions import ( 8 | Error, 9 | UsageError, 10 | ) 11 | from flintrock.flintrock import ( 12 | option_name_to_variable_name, 13 | variable_name_to_option_name, 14 | option_requires, 15 | mutually_exclusive, 16 | get_latest_commit, 17 | validate_download_source, 18 | normalize_keys, 19 | ) 20 | 21 | 22 | def test_option_name_to_variable_name_conversions(): 23 | test_cases = [ 24 | ('--ec2-user', 'ec2_user'), 25 | ('--provider', 'provider'), 26 | ('--spark-git-commit', 'spark_git_commit') 27 | ] 28 | 29 | for option_name, variable_name in test_cases: 30 | assert option_name_to_variable_name(option_name) == variable_name 31 | assert variable_name_to_option_name(variable_name) == option_name 32 | assert option_name == variable_name_to_option_name( 33 | option_name_to_variable_name(option_name)) 34 | assert variable_name == option_name_to_variable_name( 35 | variable_name_to_option_name(variable_name)) 36 | 37 | 38 | def test_option_requires(): 39 | some_option = 'something' 40 | unset_option = None 41 | set_option = '와 짠이다' 42 | 43 | option_requires( 44 | option='--some-option', 45 | requires_all=['--set_option'], 46 | scope=locals() 47 | ) 48 | 49 | option_requires( 50 | option='--some-option', 51 | requires_any=[ 52 | '--set_option', 53 | '--unset-option'], 54 | scope=locals() 55 | ) 56 | 57 | with pytest.raises(UsageError): 58 | option_requires( 59 | option='--some-option', 60 | requires_all=[ 61 | '--set-option', 62 | '--unset-option'], 63 | scope=locals() 64 | ) 65 | 66 | with pytest.raises(UsageError): 67 | option_requires( 68 | option='--some-option', 69 | requires_any=[ 70 | '--unset-option'], 71 | scope=locals() 72 | ) 73 | 74 | 75 | def test_option_requires_conditional_value(): 76 | unset_option = None 77 | set_option = '대박' 78 | 79 | some_option = 'magic' 80 | option_requires( 81 | option='--some-option', 82 | conditional_value='magic', 83 | requires_any=[ 84 | '--set-option', 85 | '--unset-option'], 86 | scope=locals() 87 | ) 88 | 89 | some_option = 'not magic' 90 | option_requires( 91 | option='--some-option', 92 | conditional_value='magic', 93 | requires_any=[ 94 | '--unset-option'], 95 | scope=locals() 96 | ) 97 | 98 | some_option = '' 99 | option_requires( 100 | option='--some-option', 101 | conditional_value='', 102 | requires_any=[ 103 | '--unset-option'], 104 | scope=locals() 105 | ) 106 | 107 | with pytest.raises(UsageError): 108 | some_option = 'magic' 109 | option_requires( 110 | option='--some-option', 111 | conditional_value='magic', 112 | requires_any=[ 113 | '--unset-option'], 114 | scope=locals() 115 | ) 116 | 117 | 118 | def test_mutually_exclusive(): 119 | option1 = 'yes' 120 | option2 = None 121 | 122 | mutually_exclusive( 123 | options=[ 124 | '--option1', 125 | '--option2'], 126 | scope=locals()) 127 | 128 | option2 = 'no' 129 | with pytest.raises(UsageError): 130 | mutually_exclusive( 131 | options=[ 132 | '--option1', 133 | '--option2'], 134 | scope=locals()) 135 | 136 | 137 | @pytest.mark.xfail( 138 | reason="This may fail on CI with HTTP Error 403: rate limit exceeded.", 139 | raises=Exception, 140 | condition=(os.environ.get('CI') == 'true'), 141 | ) 142 | def test_get_latest_commit(): 143 | sha = get_latest_commit("https://github.com/apache/spark") 144 | assert len(sha) == 40 145 | 146 | with pytest.raises(UsageError): 147 | get_latest_commit("https://google.com") 148 | 149 | with pytest.raises(Exception): 150 | get_latest_commit("https://github.com/apache/nonexistent-repo") 151 | 152 | 153 | @pytest.mark.xfail( 154 | reason=( 155 | "This test will fail whenever a new Hadoop or Spark " 156 | "release is made, which is out of our control." 157 | ), 158 | raises=Error, 159 | ) 160 | def test_validate_valid_download_source(): 161 | validate_download_source("https://www.apache.org/dyn/closer.lua?action=download&filename=hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz") 162 | validate_download_source("https://www.apache.org/dyn/closer.lua?action=download&filename=spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz") 163 | 164 | 165 | def test_validate_invalid_download_source(): 166 | with pytest.raises(Error): 167 | validate_download_source("https://www.apache.org/dyn/closer.lua?action=download&filename=hadoop/common/hadoop-invalid-version/hadoop-invalid-version.tar.gz") 168 | 169 | 170 | def test_normalize_keys(): 171 | config_file_settings = { 172 | "java-version": 11, 173 | "ec2": { 174 | "spot-price": 0.05, 175 | "key-name": "key.pem", 176 | }, 177 | "tags": ["name, test-cluster"], 178 | } 179 | cli_settings = { 180 | "java_version": 11, 181 | "ec2": { 182 | "spot_price": 0.05, 183 | "key_name": "key.pem", 184 | }, 185 | "tags": ["name, test-cluster"], 186 | } 187 | assert normalize_keys(config_file_settings) == cli_settings 188 | -------------------------------------------------------------------------------- /tests/test_pyinstaller_packaging.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import shutil 4 | import subprocess 5 | 6 | from conftest import aws_credentials_required 7 | 8 | # External modules 9 | import pytest 10 | 11 | 12 | def pyinstaller_exists(): 13 | return shutil.which('pyinstaller') is not None 14 | 15 | 16 | # PyTest doesn't let you place skipif markers on fixures. Otherwise, 17 | # we'd ideally be able to do that and all the dependent tests would be 18 | # skipped automatically. 19 | @pytest.fixture(scope='session') 20 | def pyinstaller_flintrock(): 21 | flintrock_executable_path = './dist/flintrock/flintrock' 22 | p = subprocess.run([ 23 | 'python', 'generate-standalone-package.py' 24 | ]) 25 | assert p.returncode == 0 26 | assert glob.glob('./dist/*.zip') 27 | assert os.path.isfile(flintrock_executable_path) 28 | return flintrock_executable_path 29 | 30 | 31 | @pytest.mark.skipif(not pyinstaller_exists(), reason="PyInstaller is required") 32 | def test_pyinstaller_flintrock_help(pyinstaller_flintrock): 33 | p = subprocess.run( 34 | # Without explicitly setting the locale here, Click will complain 35 | # when this test is run via GitHub Desktop that the locale is 36 | # misconfigured. 37 | """ 38 | export LANG=en_US.UTF-8 39 | {flintrock_executable} 40 | """.format( 41 | flintrock_executable=pyinstaller_flintrock 42 | ), 43 | shell=True) 44 | assert p.returncode == 0 45 | 46 | 47 | @pytest.mark.skipif(not pyinstaller_exists(), reason="PyInstaller is required") 48 | @aws_credentials_required 49 | def test_pyinstaller_flintrock_describe(pyinstaller_flintrock): 50 | # This test picks up some PyInstaller packaging issues that are not 51 | # exposed by the help test. 52 | p = subprocess.run( 53 | # Without explicitly setting the locale here, Click will complain 54 | # when this test is run via GitHub Desktop that the locale is 55 | # misconfigured. 56 | """ 57 | export LANG=en_US.UTF-8 58 | {flintrock_executable} describe 59 | """.format( 60 | flintrock_executable=pyinstaller_flintrock, 61 | ), 62 | shell=True) 63 | assert p.returncode == 0 64 | -------------------------------------------------------------------------------- /tests/test_scripts.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import tempfile 4 | 5 | import pytest 6 | 7 | 8 | @pytest.fixture(scope='module') 9 | def tgz_file(request): 10 | with tempfile.NamedTemporaryFile() as source_file: 11 | source_file.file.write('Hi!'.encode('utf-8')) 12 | tgz_file_name = source_file.name + '.tgz' 13 | subprocess.run( 14 | ['tar', 'czf', tgz_file_name, source_file.name], 15 | check=True, 16 | ) 17 | 18 | def destroy(): 19 | subprocess.run( 20 | ['rm', tgz_file_name], 21 | check=True, 22 | ) 23 | request.addfinalizer(destroy) 24 | 25 | return tgz_file_name 26 | 27 | 28 | def test_download_package(project_root_dir, tgz_file): 29 | with tempfile.TemporaryDirectory() as temp_dir: 30 | subprocess.run( 31 | [ 32 | 'python', 33 | os.path.join(project_root_dir, 'flintrock/scripts/download-package.py'), 34 | 'file://' + tgz_file, 35 | temp_dir, 36 | ], 37 | check=True, 38 | ) 39 | -------------------------------------------------------------------------------- /tests/test_static.py: -------------------------------------------------------------------------------- 1 | import compileall 2 | import os 3 | import subprocess 4 | 5 | # External modules 6 | import yaml 7 | 8 | FLINTROCK_ROOT_DIR = ( 9 | os.path.dirname( 10 | os.path.dirname( 11 | os.path.realpath(__file__)))) 12 | 13 | TEST_TARGETS = [ 14 | 'setup.py', 15 | 'flintrock/', 16 | 'tests/'] 17 | 18 | TEST_PATHS = [ 19 | os.path.join(FLINTROCK_ROOT_DIR, path) for path in TEST_TARGETS] 20 | 21 | 22 | def test_code_compiles(): 23 | for path in TEST_PATHS: 24 | if os.path.isdir(path): 25 | result = compileall.compile_dir(path) 26 | else: 27 | result = compileall.compile_file(path) 28 | # NOTE: This is not publicly documented, but a return of 1 means 29 | # the compilation succeeded. 30 | # See: http://bugs.python.org/issue25768 31 | assert result == 1 32 | 33 | 34 | def test_flake8(): 35 | ret = subprocess.call(['flake8'], cwd=FLINTROCK_ROOT_DIR) 36 | assert ret == 0 37 | 38 | 39 | def test_config_template_is_valid(): 40 | config_template = os.path.join(FLINTROCK_ROOT_DIR, 'flintrock', 'config.yaml.template') 41 | with open(config_template) as f: 42 | yaml.safe_load(f) 43 | -------------------------------------------------------------------------------- /tests/test_util.py: -------------------------------------------------------------------------------- 1 | from flintrock.util import spark_hadoop_build_version 2 | 3 | 4 | def test_spark_hadoop_build_version(): 5 | assert spark_hadoop_build_version('3.1.3') == 'hadoop3.2' 6 | --------------------------------------------------------------------------------