├── .github ├── ISSUE_TEMPLATE.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── lint.yml │ └── publish.yml ├── CHANGELOG ├── CONTRIBUTING.md ├── LICENSE-2.0.txt ├── README.md ├── base-alpine └── Dockerfile ├── base-debian └── Dockerfile └── k8s-dataflow ├── Dockerfile └── docker-entrypoint.sh /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 10 | 11 | **Docker image**: 12 | 13 | **Version**: 14 | 15 | **Expected behavior**: 16 | 17 | **Actual behavior**: 18 | 19 | **Steps to reproduce**: 20 | 21 | 23 | 1. 24 | 2. 25 | 26 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 10 | 11 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint Dockerfiles 2 | 3 | on: push 4 | 5 | jobs: 6 | lint: 7 | name: Lint Dockerfiles 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - name: Checkout code 12 | uses: actions/checkout@v2 13 | 14 | - name: Lint base-alpine 15 | uses: brpaz/hadolint-action@master 16 | with: 17 | dockerfile: base-alpine/Dockerfile 18 | 19 | - name: Lint base-debian 20 | uses: brpaz/hadolint-action@master 21 | with: 22 | dockerfile: base-debian/Dockerfile 23 | 24 | - name: Lint k8s-dataflow 25 | uses: brpaz/hadolint-action@master 26 | with: 27 | dockerfile: k8s-dataflow/Dockerfile 28 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish images to Docker Hub 2 | 3 | on: 4 | push: 5 | tags: 6 | - base-alpine/* 7 | - base-debian/* 8 | - k8s-dataflow/* 9 | 10 | jobs: 11 | publish: 12 | name: Publish images to Docker Hub 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Checkout code 17 | uses: actions/checkout@v2 18 | 19 | - name: Extract release version from ref 20 | id: get_version 21 | run: echo ::set-output name=VERSION::${GITHUB_REF##*/} 22 | 23 | - name: Build and push base-alpine 24 | if: startsWith(github.ref, 'refs/tags/base-alpine/') 25 | uses: docker/build-push-action@v1 26 | with: 27 | username: ${{ secrets.DOCKER_USERNAME }} 28 | password: ${{ secrets.DOCKER_PASSWORD }} 29 | repository: snowplow/base-alpine 30 | dockerfile: base-alpine/Dockerfile 31 | path: base-alpine 32 | tags: latest,${{ steps.get_version.outputs.VERSION }} 33 | push: true 34 | 35 | - name: Build and push base-debian 36 | if: startsWith(github.ref, 'refs/tags/base-debian/') 37 | uses: docker/build-push-action@v1 38 | with: 39 | username: ${{ secrets.DOCKER_USERNAME }} 40 | password: ${{ secrets.DOCKER_PASSWORD }} 41 | repository: snowplow/base-debian 42 | dockerfile: base-debian/Dockerfile 43 | path: base-debian 44 | tags: latest,${{ steps.get_version.outputs.VERSION }} 45 | push: true 46 | 47 | - name: Build and push k8s-dataflow 48 | if: startsWith(github.ref, 'refs/tags/k8s-dataflow/') 49 | uses: docker/build-push-action@v1 50 | with: 51 | username: ${{ secrets.DOCKER_USERNAME }} 52 | password: ${{ secrets.DOCKER_PASSWORD }} 53 | repository: snowplow/k8s-dataflow 54 | dockerfile: k8s-dataflow/Dockerfile 55 | path: k8s-dataflow 56 | tags: latest,${{ steps.get_version.outputs.VERSION }} 57 | push: true 58 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | Release 20 (2020-10-29) 2 | ----------------------- 3 | Fix unsigned repository issue of GCP SDK (#154) 4 | Make k8s-dataflow based on JDK11 (#153) 5 | Add migration note to README (#139) 6 | Migrate from Bintray to Docker Hub (#151) 7 | Migrate from Travis to Github Actions (#152) 8 | Fix linting issues (#150) 9 | Integrate hadolint (#149) 10 | Remove all non-maintained images from the repository (#147) 11 | 12 | Release 19 (2020-09-18) 13 | ----------------------- 14 | Bump base images of base images (#144) 15 | 16 | Release 18 (2020-07-24) 17 | ----------------------- 18 | Update travis not to build pipeline components (#142) 19 | Fix permission issue in k8s-dataflow (#140) 20 | Add support for beam-enrich CLI params in k8s-dataflow (#137) 21 | add image for k8s-based dataflow deployment 22 | 23 | Release 17 (2019-06-13) 24 | ----------------------- 25 | Align deployment for base images (#132) 26 | 27 | Release 16 (2019-06-13) 28 | ----------------------- 29 | Add base-debian:0.2.0 image with JDK 11.0.3 (#130) 30 | 31 | Release 15 (2019-05-17) 32 | ----------------------- 33 | Use Stream Enrich 0.21.0 in the Docker Compose example (#128) 34 | Add images for Stream Enrich 0.21.0 (#127) 35 | Add image for EmrEtlRunner R114 (#126) 36 | Change Scala Stream Collector port from 80 to 8080 (#124) 37 | 38 | Release 14 (2019-02-27) 39 | ----------------------- 40 | Add images for the Scala Stream Collector 0.15.0 (#119) 41 | Add images for Stream Enrich 0.20.0 (#118) 42 | Add image for EmrEtlRunner R113 (#120) 43 | Use the Scala Stream Collector 0.15.0 in the Docker Compose example (#122) 44 | Use Stream Enrich 0.20.0 in the Docker Compose example (#121) 45 | 46 | Release 13 (2019-02-19) 47 | ----------------------- 48 | Add EmrEtlRunner image (#34) 49 | Add EmrEtlRunner R112 image (#116) 50 | Add CI/CD for the EmrEtlRunner image (#107) 51 | Add igluctl images (#114) 52 | Version base images (#108) 53 | Add a contributing guide (#115) 54 | Add GitHub templates (#105) 55 | Replace base image links in the different readmes (#113) 56 | Fix base-debian image test flakiness (#112) 57 | Add a Kubernetes deployment example for Iglu Server (#117) 58 | Replace ip lookups with anon ip in the example's enrichments (#102) 59 | Dowgrade the version of the resolver in the example to 1-0-2 (#99) 60 | Remove Stream Enrich PubSub deployment (#97) 61 | Update copyrights to 2019 (#110) 62 | 63 | Release 12 (2018-09-07) 64 | ----------------------- 65 | Add Stream Enrich 0.19.1 image (#94) 66 | Use Stream Enrich 0.19.1 in the Docker Compose example (#95) 67 | Add a cache ttl to the example resolver (#85) 68 | Document the nsq_pubsub endpoint in the example README (#93) 69 | 70 | Release 11 (2018-09-05) 71 | ----------------------- 72 | Add Iglu Server 0.4.0 image (#74) 73 | 74 | Release 10 (2018-08-31) 75 | ----------------------- 76 | Fix entrypoint script on piinguin for new base image (#86) 77 | Fix README files to include gosu (#88) 78 | Fix CHANGELOG year in last three releases (#90) 79 | 80 | Release 9 (2018-08-22) 81 | ---------------------- 82 | Add Scala Stream Collector 0.14.0 image (#80) 83 | Add Stream Enrich 0.19.0 image (#81) 84 | Add v1.0.0-compat tag to the nsq images in the example (#84) 85 | Use SSC 0.14.0 and SE 0.19.0 in the Docker Compose example (#82) 86 | 87 | Release 8 (2018-08-06) 88 | ---------------------- 89 | Add piinguin server image (#47) 90 | Add Elasticsearch Loader 0.10.2 image (#78) 91 | Expose nsq_pubsub in the docker compose example (#76) 92 | Fix travis condition when publishing (#72) 93 | 94 | Release 7 (2018-07-18) 95 | ---------------------- 96 | Add Stream Enrich 0.18.0 image (#67) 97 | Use Stream Enrich 0.18.0 in the Docker Compose example (#68) 98 | Rename the base image to base-alpine (#51) 99 | Bump base image JDK to 1.8.0_171 (#61) 100 | Put the Scala Stream Collector on port 8080 in the Docker Compose example (#70) 101 | 102 | Release 6 (2018-06-18) 103 | ---------------------- 104 | Add 0.17.0 stream-enrich (#58) 105 | Modify the GCP images to use debian-base (#52) 106 | Add base-debian docker image (#50) 107 | Fix formatting of logging options in compose file (#55) 108 | 109 | Release 5 (2018-05-09) 110 | ---------------------- 111 | Add Stream Enrich 0.16.1 image (#46) 112 | 113 | Release 4 (2018-04-20) 114 | ---------------------- 115 | Add Dockerfile for Iglu Server (#2) 116 | Scala Stream Collector 0.13.0 images (#41) 117 | Stream Enrich 0.16.0 image (#40) 118 | Stream Enrich 0.15.0 image (#39) 119 | Stream Enrich 0.14.0 image (#36) 120 | Use SSC 0.13.0 and SE 0.16.0 in the Docker Compose example (#44) 121 | Replace all 0.x.0 entries with $VERSION (#38) 122 | Extend copyright notice to 2018 (#43) 123 | 124 | Release 3 (2018-01-08) 125 | ---------------------- 126 | Stream Enrich 0.13.0 image (#31) 127 | Scala Stream Collector 0.12.0 image (#30) 128 | Use SSC 0.12.0 and SE 0.13.0 in the Docker Compose example (#32) 129 | 130 | Release 2 (2017-11-28) 131 | ---------------------- 132 | Stream Enrich 0.12.0 image (#24) 133 | Scala Stream Collector 0.11.0 image (#25) 134 | Rely on NSQ for the Docker Compose example (#27) 135 | Change directory when running Stream Enrich (#29) 136 | Install LZO as part of the S3 Loader image (#23) 137 | Replace default --usage flag by --help (#26) 138 | Travis badge (#22) 139 | 140 | Release 1 (2017-10-13) 141 | ---------------------- 142 | Add Dockerfile for Scala Stream Collector (#6) 143 | Add Dockerfile for Stream Enrich (#7) 144 | Add Dockerfile for Elasticsearch Loader (#5) 145 | Add Dockerfile for S3 Loader (#1) 146 | Add base image (#4) 147 | Provide a Docker Compose example (#15) 148 | Ensure the JVM process adheres to CPU and memory limits imposed by Docker (#21) 149 | Add CI/CD to deploy all images into our Bintray (#16) 150 | Add Bintray credentials to .travis.yml (#20) 151 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributor guide 2 | 3 | Snowplow Docker is maintained by the pipeline team at Snowplow Analytics and improved on by external 4 | contributors for which we are extremely grateful. 5 | 6 | ## Getting in touch 7 | 8 | ### Community support requests 9 | 10 | First and foremost, please do not log an issue if you are asking for support, all of our community 11 | support requests go through our Discourse forum: https://discourse.snowplowanalytics.com/. 12 | 13 | Posting your problem there ensures more people will see it and you should get support faster than 14 | creating a new issue on GitHub. Please do create a new issue on GitHub if you think you've found a 15 | bug though! 16 | 17 | ### Gitter 18 | 19 | If you want to discuss already created issues, potential bugs, new features you would like to work 20 | on or any kind of developer chat, you can head over to our main 21 | [Gitter room](https://gitter.im/snowplow/snowplow). 22 | 23 | ## Repository structure 24 | 25 | The `snowplow-docker` project is split into different subfolders each corresponding to a project for 26 | which we provide a Docker image. Each subfolder is further split for each version for which an 27 | image was published. 28 | 29 | ## Issues 30 | 31 | ### Creating an issue 32 | 33 | The project contains an issue template which should help guiding you through the process. However, 34 | please keep in mind that support requests should go to our Discourse forum: 35 | https://discourse.snowplowanalytics.com/ and not GitHub issues. 36 | 37 | It's also a good idea to log an issue before starting to work on a pull request to discuss it with 38 | the maintainers. 39 | 40 | ### Working on an issue 41 | 42 | If you see an issue you would like to work on, please let us know in the issue! That will help us in 43 | terms of scheduling and not doubling the amount of work. 44 | 45 | ## Pull requests 46 | 47 | These are a few guidelines to keep in mind when opening pull requests, there is a GitHub template 48 | that reiterates most of the points described here. 49 | 50 | ### Commit hygiene 51 | 52 | We keep a strict 1-to-1 correspondance between commits and issues, as such our commit messages are 53 | formatted in the following fashion: 54 | 55 | `Add issues description (closes #1234)` 56 | 57 | for example: 58 | 59 | `Add igluctl image (closes #1234)` 60 | 61 | ### Feedback cycle 62 | 63 | Reviews should happen fairly quickly during weekdays. If you feel your pull request has been 64 | forgotten, please ping one or more maintainers in the pull request. 65 | 66 | ### Getting your pull request merged 67 | 68 | If your pull request is fairly chunky, there might be a non-trivial delay between the moment the 69 | pull request is approved and the moment it gets merged. This is because your pull request will have 70 | been scheduled for a specific milestone which might or might not be actively worked on by a 71 | maintainer at the moment. 72 | 73 | ### Contributor license agreement 74 | 75 | We require outside contributors to sign a Contributor license agreement (or CLA) before we can merge 76 | their pull requests. 77 | You can find more information on the topic in 78 | [the dedicated wiki page](https://github.com/snowplow/snowplow/wiki/CLA). 79 | The @snowplowcla bot will guide you through the process. 80 | -------------------------------------------------------------------------------- /LICENSE-2.0.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2017-2019 Snowplow Analytics Ltd. 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Snowplow Docker 2 | 3 | [![License][license-image]][license] 4 | 5 | ## Introduction 6 | 7 | This repository contains the Dockerfiles of base images we use at Snowplow: 8 | 9 | - [base-alpine][base-alpine]: Base image of Snowplow apps targeting JDK 8. It is based on Alpine Linux which results in much slimmer images compared to `base-debian`. 10 | - [base-debian][base-debian]: Base image of Snowplow apps targeting JDK 11. It is based on Debian Buster variant of `openjdk:11` image. 11 | - [k8s-dataflow][k8s-dataflow]: Base image of GCP Dataflow apps at Snowplow. It is based on `base-debian` and comes with prerequisite checks necessary to run a Dataflow job. 12 | 13 | **Note that** we migrated the maintenance of Dockerfiles for pipeline components from this repository to projects' own repositories. 14 | 15 | ## Pulling 16 | 17 | You can pull the images from the registry directly: 18 | 19 | ```bash 20 | # base-alpine 21 | docker pull snowplow/base-alpine 22 | 23 | # base-debian 24 | docker pull snowplow/base-debian 25 | 26 | # k8s-dataflow 27 | docker pull snowplow/k8s-dataflow 28 | ``` 29 | 30 | ## Building 31 | 32 | Alternatively, you can build them yourself: 33 | 34 | ```bash 35 | # base-alpine 36 | docker build -t snowplow/base-alpine:latest base-alpine 37 | 38 | # base-debian 39 | docker build -t snowplow/base-debian:latest base-debian 40 | 41 | # k8s-dataflow 42 | docker build -t snowplow/k8s-dataflow:latest k8s-dataflow 43 | ``` 44 | 45 | ## Copyright and license 46 | 47 | Copyright 2017-2020 Snowplow Analytics Ltd. 48 | 49 | Licensed under the [Apache License, Version 2.0][license] (the "License"); 50 | you may not use this software except in compliance with the License. 51 | 52 | Unless required by applicable law or agreed to in writing, software 53 | distributed under the License is distributed on an "AS IS" BASIS, 54 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 55 | See the License for the specific language governing permissions and 56 | limitations under the License. 57 | 58 | [license-image]: https://img.shields.io/badge/license-Apache--2-blue.svg?style=flat 59 | [license]: https://www.apache.org/licenses/LICENSE-2.0 60 | 61 | [base-alpine]: https://hub.docker.com/r/snowplow/base-alpine 62 | 63 | [base-debian]: https://hub.docker.com/r/snowplow/base-debian 64 | 65 | [k8s-dataflow]: https://hub.docker.com/r/snowplow/k8s-dataflow 66 | -------------------------------------------------------------------------------- /base-alpine/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM openjdk:8u212-jre-alpine 2 | LABEL maintainer="Snowplow Analytics Ltd. " 3 | 4 | # Snowplow components will be installed in this folder. 5 | ENV SNOWPLOW_PATH="/snowplow" 6 | ENV SNOWPLOW_CONFIG_PATH="${SNOWPLOW_PATH}/config" \ 7 | SNOWPLOW_BIN_PATH="${SNOWPLOW_PATH}/bin" 8 | 9 | # Create a snowplow group and user. 10 | RUN addgroup snowplow && \ 11 | adduser -S -G snowplow snowplow 12 | 13 | # Install the components common to all apps. 14 | # https://github.com/yelp/dumb-init: lightweight init system 15 | # https://github.com/ncopa/su-exec: sudo replacement 16 | RUN apk add --no-cache dumb-init=1.2.2-r1 su-exec=0.2-r0 ca-certificates=20191127-r2 wget=1.20.3-r0 17 | 18 | # /snowplow/bin is meant to contain the application jar. 19 | # /snowplow/config is meant to contain the necessary configuration. 20 | RUN mkdir -p ${SNOWPLOW_BIN_PATH} && \ 21 | mkdir -p ${SNOWPLOW_CONFIG_PATH} && \ 22 | chown -R snowplow:snowplow ${SNOWPLOW_PATH} 23 | 24 | # Expose the configuration directory. 25 | VOLUME ${SNOWPLOW_CONFIG_PATH} 26 | -------------------------------------------------------------------------------- /base-debian/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM openjdk:11.0.8-slim-buster 2 | LABEL maintainer="Snowplow Analytics Ltd. " 3 | 4 | # Snowplow components will be installed in this folder. 5 | ENV SNOWPLOW_PATH="/snowplow" 6 | ENV SNOWPLOW_CONFIG_PATH="${SNOWPLOW_PATH}/config" \ 7 | SNOWPLOW_BIN_PATH="${SNOWPLOW_PATH}/bin" 8 | 9 | # Create a snowplow group and user. 10 | RUN addgroup snowplow && \ 11 | adduser --system --ingroup snowplow snowplow 12 | 13 | # Install the components common to all apps. 14 | # https://github.com/yelp/dumb-init: lightweight init system 15 | # https://github.com/tianon/gosu/: sudo replacement 16 | RUN apt-get update && \ 17 | apt-get install -y --no-install-recommends ca-certificates=20200601~deb10u1 wget=1.20.1-1.1 gnupg=2.2.12-1+deb10u1 && \ 18 | rm -rf /var/lib/apt/lists && \ 19 | wget https://github.com/Yelp/dumb-init/releases/download/v1.2.2/dumb-init_1.2.2_amd64.deb && \ 20 | dpkg -i dumb-init_*.deb && \ 21 | export GOSU_VERSION=1.12 && \ 22 | wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \ 23 | wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64.asc" && \ 24 | GNUPGHOME="$(mktemp -d)" && \ 25 | export GNUPGHOME && \ 26 | for server in $(shuf -e ha.pool.sks-keyservers.net \ 27 | hkp://p80.pool.sks-keyservers.net:80 \ 28 | keyserver.ubuntu.com \ 29 | hkp://keyserver.ubuntu.com:80 \ 30 | pgp.mit.edu) ; do \ 31 | gpg --batch --keyserver "$server" --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 && break || : ; \ 32 | done && \ 33 | gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu && \ 34 | gpgconf --kill all || : && \ 35 | rm -r "$GNUPGHOME" /usr/local/bin/gosu.asc && \ 36 | chmod +x /usr/local/bin/gosu && \ 37 | apt-get purge -y --auto-remove gnupg 38 | 39 | # /snowplow/bin is meant to contain the application jar. 40 | # /snowplow/config is meant to contain the necessary configuration. 41 | RUN mkdir -p ${SNOWPLOW_BIN_PATH} && \ 42 | mkdir -p ${SNOWPLOW_CONFIG_PATH} && \ 43 | chown -R snowplow:snowplow ${SNOWPLOW_PATH} 44 | 45 | # Expose the configuration directory. 46 | VOLUME ${SNOWPLOW_CONFIG_PATH} 47 | -------------------------------------------------------------------------------- /k8s-dataflow/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM snowplow/base-debian:0.2.2 2 | LABEL MAINTAINER="Snowplow Analytics Ltd. " 3 | 4 | SHELL ["/bin/bash", "-o", "pipefail", "-c"] 5 | 6 | RUN \ 7 | apt-get update &&\ 8 | echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list &&\ 9 | apt-get install -y --no-install-recommends apt-transport-https=1.8.2.1 ca-certificates=20200601~deb10u1 gnupg=2.2.12-1+deb10u1 curl=7.64.0-4+deb10u1 &&\ 10 | curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - &&\ 11 | apt-get update &&\ 12 | apt-get install -y --no-install-recommends google-cloud-sdk=316.0.0-0 &&\ 13 | apt-get clean &&\ 14 | rm -rf /var/lib/apt/lists/* &&\ 15 | apt-get purge -y --auto-remove gnupg 16 | 17 | USER snowplow 18 | 19 | COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh 20 | ENTRYPOINT [ "docker-entrypoint.sh" ] 21 | 22 | CMD [ "" ] 23 | -------------------------------------------------------------------------------- /k8s-dataflow/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | DELAY=3 4 | THRESHOLD=5 5 | BUCKET="" 6 | 7 | ITER=0 8 | PARAMS="" 9 | 10 | # extract params 11 | for opt in "$@"; do 12 | case "$opt" in 13 | --tempLocation=*) 14 | BUCKET="${opt#*=}" 15 | PARAMS="$PARAMS $opt" 16 | shift 17 | ;; 18 | --gcpTempLocation=*) 19 | BUCKET="${opt#*=}" 20 | PARAMS="$PARAMS $opt" 21 | shift 22 | ;; 23 | --gcsThreshold=*) 24 | THRESHOLD="${opt#*=}" 25 | shift 26 | ;; 27 | --gcsDelay=*) 28 | DELAY="${opt#*=}" 29 | shift 30 | ;; 31 | *) # preserve positional arguments 32 | PARAMS="$PARAMS $opt" 33 | shift 34 | ;; 35 | esac 36 | done 37 | 38 | if [ -z "$BUCKET" ]; then 39 | echo "Missing --tempLocation flag. Exiting." 40 | exit 1 41 | fi 42 | 43 | echo "params: ${PARAMS} threshold: ${THRESHOLD} delay: ${DELAY} GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-Not Available}" 44 | 45 | # enable service account if credentials are available 46 | if [ -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]; then 47 | echo "GOOGLE_APPLICATION_CREDENTIALS is not defined, gcloud isn't authenticated!" 48 | exit 1 49 | else 50 | gcloud auth activate-service-account --key-file="$GOOGLE_APPLICATION_CREDENTIALS" 51 | fi 52 | 53 | # wait for GCS bucket to be available 54 | while [ "$ITER" -le "$THRESHOLD" ]; do 55 | if gsutil ls -b "${BUCKET}"; 56 | then 57 | echo "Bucket ${BUCKET} exists! Proceeding." 58 | break 59 | else 60 | echo "Bucket ${BUCKET} does not exist. Retry: ${ITER}/${THRESHOLD}" 61 | sleep "${DELAY}" 62 | ITER=$(( ITER+1 )) 63 | fi 64 | done 65 | 66 | # check if retry limit was reached or not 67 | if [ "$ITER" -le "$THRESHOLD" ]; then 68 | $PARAMS 69 | else 70 | echo "Bucket ${BUCKET} does not exist. Not retrying anymore." 71 | exit 1 72 | fi 73 | --------------------------------------------------------------------------------