├── .asf.yaml ├── .github ├── PULL_REQUEST_TEMPLATE └── workflows │ ├── build_3.5.0.yaml │ ├── build_3.5.1.yaml │ ├── build_3.5.2.yaml │ ├── build_3.5.3.yaml │ ├── build_3.5.4.yaml │ ├── build_3.5.5.yaml │ ├── build_3.5.6.yaml │ ├── build_4.0.0-preview1.yaml │ ├── build_4.0.0-preview2.yaml │ ├── build_4.0.0.yaml │ ├── main.yml │ ├── publish-java17.yaml │ ├── publish-java21.yaml │ ├── publish.yml │ └── test.yml ├── 3.5.0 ├── scala2.12-java11-python3-r-ubuntu │ └── Dockerfile ├── scala2.12-java11-python3-ubuntu │ └── Dockerfile ├── scala2.12-java11-r-ubuntu │ └── Dockerfile ├── scala2.12-java11-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── scala2.12-java17-python3-r-ubuntu │ └── Dockerfile ├── scala2.12-java17-python3-ubuntu │ └── Dockerfile ├── scala2.12-java17-r-ubuntu │ └── Dockerfile └── scala2.12-java17-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── 3.5.1 ├── scala2.12-java11-python3-r-ubuntu │ └── Dockerfile ├── scala2.12-java11-python3-ubuntu │ └── Dockerfile ├── scala2.12-java11-r-ubuntu │ └── Dockerfile ├── scala2.12-java11-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── scala2.12-java17-python3-r-ubuntu │ └── Dockerfile ├── scala2.12-java17-python3-ubuntu │ └── Dockerfile ├── scala2.12-java17-r-ubuntu │ └── Dockerfile └── scala2.12-java17-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── 3.5.2 ├── scala2.12-java11-python3-r-ubuntu │ └── Dockerfile ├── scala2.12-java11-python3-ubuntu │ └── Dockerfile ├── scala2.12-java11-r-ubuntu │ └── Dockerfile ├── scala2.12-java11-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── scala2.12-java17-python3-r-ubuntu │ └── Dockerfile ├── scala2.12-java17-python3-ubuntu │ └── Dockerfile ├── scala2.12-java17-r-ubuntu │ └── Dockerfile └── scala2.12-java17-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── 3.5.3 ├── scala2.12-java11-python3-r-ubuntu │ └── Dockerfile ├── scala2.12-java11-python3-ubuntu │ └── Dockerfile ├── scala2.12-java11-r-ubuntu │ └── Dockerfile ├── scala2.12-java11-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── scala2.12-java17-python3-r-ubuntu │ └── Dockerfile ├── scala2.12-java17-python3-ubuntu │ └── Dockerfile ├── scala2.12-java17-r-ubuntu │ └── Dockerfile └── scala2.12-java17-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── 3.5.4 ├── scala2.12-java11-python3-r-ubuntu │ └── Dockerfile ├── scala2.12-java11-python3-ubuntu │ └── Dockerfile ├── scala2.12-java11-r-ubuntu │ └── Dockerfile ├── scala2.12-java11-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── scala2.12-java17-python3-r-ubuntu │ └── Dockerfile ├── scala2.12-java17-python3-ubuntu │ └── Dockerfile ├── scala2.12-java17-r-ubuntu │ └── Dockerfile └── scala2.12-java17-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── 3.5.5 ├── scala2.12-java11-python3-r-ubuntu │ └── Dockerfile ├── scala2.12-java11-python3-ubuntu │ └── Dockerfile ├── scala2.12-java11-r-ubuntu │ └── Dockerfile ├── scala2.12-java11-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── scala2.12-java17-python3-r-ubuntu │ └── Dockerfile ├── scala2.12-java17-python3-ubuntu │ └── Dockerfile ├── scala2.12-java17-r-ubuntu │ └── Dockerfile └── scala2.12-java17-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── 3.5.6 ├── scala2.12-java11-python3-r-ubuntu │ └── Dockerfile ├── scala2.12-java11-python3-ubuntu │ └── Dockerfile ├── scala2.12-java11-r-ubuntu │ └── Dockerfile ├── scala2.12-java11-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── scala2.12-java17-python3-r-ubuntu │ └── Dockerfile ├── scala2.12-java17-python3-ubuntu │ └── Dockerfile ├── scala2.12-java17-r-ubuntu │ └── Dockerfile └── scala2.12-java17-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── 4.0.0-preview1 ├── scala2.13-java17-python3-r-ubuntu │ └── Dockerfile ├── scala2.13-java17-python3-ubuntu │ └── Dockerfile ├── scala2.13-java17-r-ubuntu │ └── Dockerfile ├── scala2.13-java17-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── scala2.13-java21-python3-r-ubuntu │ └── Dockerfile ├── scala2.13-java21-python3-ubuntu │ └── Dockerfile ├── scala2.13-java21-r-ubuntu │ └── Dockerfile └── scala2.13-java21-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── 4.0.0-preview2 ├── scala2.13-java17-python3-r-ubuntu │ └── Dockerfile ├── scala2.13-java17-python3-ubuntu │ └── Dockerfile ├── scala2.13-java17-r-ubuntu │ └── Dockerfile ├── scala2.13-java17-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── scala2.13-java21-python3-r-ubuntu │ └── Dockerfile ├── scala2.13-java21-python3-ubuntu │ └── Dockerfile ├── scala2.13-java21-r-ubuntu │ └── Dockerfile └── scala2.13-java21-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── 4.0.0 ├── scala2.13-java17-python3-r-ubuntu │ └── Dockerfile ├── scala2.13-java17-python3-ubuntu │ └── Dockerfile ├── scala2.13-java17-r-ubuntu │ └── Dockerfile ├── scala2.13-java17-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── scala2.13-java21-python3-r-ubuntu │ └── Dockerfile ├── scala2.13-java21-python3-ubuntu │ └── Dockerfile ├── scala2.13-java21-r-ubuntu │ └── Dockerfile └── scala2.13-java21-ubuntu │ ├── Dockerfile │ └── entrypoint.sh ├── CONTRIBUTING.md ├── Dockerfile.template ├── LICENSE ├── NOTICE ├── OVERVIEW.md ├── README.md ├── add-dockerfiles.sh ├── awesome-spark-docker.md ├── entrypoint.sh.template ├── merge_spark_docker_pr.py ├── r-python.template ├── testing ├── run_tests.sh └── testing.sh ├── tools ├── ci_runner_cleaner │ ├── free_disk_space.sh │ └── free_disk_space_container.sh ├── manifest.py ├── requirements.txt └── template.py └── versions.json /.asf.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # https://cwiki.apache.org/confluence/display/INFRA/git+-+.asf.yaml+features 17 | --- 18 | github: 19 | description: "Official Dockerfile for Apache Spark" 20 | homepage: https://spark.apache.org/ 21 | labels: 22 | - python 23 | - scala 24 | - r 25 | - java 26 | - big-data 27 | - jdbc 28 | - sql 29 | - spark 30 | enabled_merge_buttons: 31 | merge: false 32 | squash: true 33 | rebase: true 34 | 35 | notifications: 36 | pullrequests: reviews@spark.apache.org 37 | issues: reviews@spark.apache.org 38 | commits: commits@spark.apache.org 39 | 40 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE: -------------------------------------------------------------------------------- 1 | 10 | 11 | ### What changes were proposed in this pull request? 12 | 18 | 19 | 20 | ### Why are the changes needed? 21 | 25 | 26 | 27 | ### Does this PR introduce _any_ user-facing change? 28 | 34 | 35 | 36 | ### How was this patch tested? 37 | 42 | -------------------------------------------------------------------------------- /.github/workflows/build_3.5.0.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | name: "Build and Test (3.5.0)" 21 | 22 | on: 23 | pull_request: 24 | branches: 25 | - 'master' 26 | paths: 27 | - '3.5.0/**' 28 | 29 | jobs: 30 | run-build: 31 | strategy: 32 | matrix: 33 | image-type: ["all", "python", "scala", "r"] 34 | java: [11, 17] 35 | name: Run 36 | secrets: inherit 37 | uses: ./.github/workflows/main.yml 38 | with: 39 | spark: 3.5.0 40 | scala: 2.12 41 | java: ${{ matrix.java }} 42 | image-type: ${{ matrix.image-type }} 43 | -------------------------------------------------------------------------------- /.github/workflows/build_3.5.1.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | name: "Build and Test (3.5.1)" 21 | 22 | on: 23 | pull_request: 24 | branches: 25 | - 'master' 26 | paths: 27 | - '3.5.1/**' 28 | 29 | jobs: 30 | run-build: 31 | strategy: 32 | matrix: 33 | image-type: ["all", "python", "scala", "r"] 34 | java: [11, 17] 35 | name: Run 36 | secrets: inherit 37 | uses: ./.github/workflows/main.yml 38 | with: 39 | spark: 3.5.1 40 | scala: 2.12 41 | java: ${{ matrix.java }} 42 | image-type: ${{ matrix.image-type }} 43 | 44 | -------------------------------------------------------------------------------- /.github/workflows/build_3.5.2.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | name: "Build and Test (3.5.2)" 21 | 22 | on: 23 | pull_request: 24 | branches: 25 | - 'master' 26 | paths: 27 | - '3.5.2/**' 28 | 29 | jobs: 30 | run-build: 31 | strategy: 32 | matrix: 33 | image-type: ["all", "python", "scala", "r"] 34 | java: [11, 17] 35 | name: Run 36 | secrets: inherit 37 | uses: ./.github/workflows/main.yml 38 | with: 39 | spark: 3.5.2 40 | scala: 2.12 41 | java: ${{ matrix.java }} 42 | image-type: ${{ matrix.image-type }} 43 | 44 | -------------------------------------------------------------------------------- /.github/workflows/build_3.5.3.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | name: "Build and Test (3.5.3)" 21 | 22 | on: 23 | pull_request: 24 | branches: 25 | - 'master' 26 | paths: 27 | - '3.5.3/**' 28 | 29 | jobs: 30 | run-build: 31 | strategy: 32 | matrix: 33 | image-type: ["all", "python", "scala", "r"] 34 | java: [11, 17] 35 | name: Run 36 | secrets: inherit 37 | uses: ./.github/workflows/main.yml 38 | with: 39 | spark: 3.5.3 40 | scala: 2.12 41 | java: ${{ matrix.java }} 42 | image-type: ${{ matrix.image-type }} 43 | 44 | -------------------------------------------------------------------------------- /.github/workflows/build_3.5.4.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | name: "Build and Test (3.5.4)" 21 | 22 | on: 23 | pull_request: 24 | branches: 25 | - 'master' 26 | paths: 27 | - '3.5.4/**' 28 | 29 | jobs: 30 | run-build: 31 | strategy: 32 | matrix: 33 | image-type: ["all", "python", "scala", "r"] 34 | java: [11, 17] 35 | name: Run 36 | secrets: inherit 37 | uses: ./.github/workflows/main.yml 38 | with: 39 | spark: 3.5.4 40 | scala: 2.12 41 | java: ${{ matrix.java }} 42 | image-type: ${{ matrix.image-type }} 43 | 44 | -------------------------------------------------------------------------------- /.github/workflows/build_3.5.5.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | name: "Build and Test (3.5.5)" 21 | 22 | on: 23 | pull_request: 24 | branches: 25 | - 'master' 26 | paths: 27 | - '3.5.5/**' 28 | 29 | jobs: 30 | run-build: 31 | strategy: 32 | matrix: 33 | image-type: ["all", "python", "scala", "r"] 34 | java: [11, 17] 35 | name: Run 36 | secrets: inherit 37 | uses: ./.github/workflows/main.yml 38 | with: 39 | spark: 3.5.5 40 | scala: 2.12 41 | java: ${{ matrix.java }} 42 | image-type: ${{ matrix.image-type }} 43 | 44 | -------------------------------------------------------------------------------- /.github/workflows/build_3.5.6.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | name: "Build and Test (3.5.6)" 21 | 22 | on: 23 | pull_request: 24 | branches: 25 | - 'master' 26 | paths: 27 | - '3.5.6/**' 28 | 29 | jobs: 30 | run-build: 31 | strategy: 32 | matrix: 33 | image-type: ["all", "python", "scala", "r"] 34 | java: [11, 17] 35 | name: Run 36 | secrets: inherit 37 | uses: ./.github/workflows/main.yml 38 | with: 39 | spark: 3.5.6 40 | scala: 2.12 41 | java: ${{ matrix.java }} 42 | image-type: ${{ matrix.image-type }} 43 | 44 | -------------------------------------------------------------------------------- /.github/workflows/build_4.0.0-preview1.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | name: "Build and Test (4.0.0-preview1)" 21 | 22 | on: 23 | pull_request: 24 | branches: 25 | - 'master' 26 | paths: 27 | - '4.0.0-preview1/**' 28 | 29 | jobs: 30 | run-build: 31 | strategy: 32 | matrix: 33 | image-type: ["all", "python", "scala", "r"] 34 | java: [17, 21] 35 | name: Run 36 | secrets: inherit 37 | uses: ./.github/workflows/main.yml 38 | with: 39 | spark: 4.0.0-preview1 40 | scala: 2.13 41 | java: ${{ matrix.java }} 42 | image-type: ${{ matrix.image-type }} 43 | 44 | -------------------------------------------------------------------------------- /.github/workflows/build_4.0.0-preview2.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | name: "Build and Test (4.0.0-preview2)" 21 | 22 | on: 23 | pull_request: 24 | branches: 25 | - 'master' 26 | paths: 27 | - '4.0.0-preview2/**' 28 | 29 | jobs: 30 | run-build: 31 | strategy: 32 | matrix: 33 | image-type: ["all", "python", "scala", "r"] 34 | java: [17, 21] 35 | name: Run 36 | secrets: inherit 37 | uses: ./.github/workflows/main.yml 38 | with: 39 | spark: 4.0.0-preview2 40 | scala: 2.13 41 | java: ${{ matrix.java }} 42 | image-type: ${{ matrix.image-type }} 43 | 44 | -------------------------------------------------------------------------------- /.github/workflows/build_4.0.0.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | name: "Build and Test (4.0.0)" 21 | 22 | on: 23 | pull_request: 24 | branches: 25 | - 'master' 26 | paths: 27 | - '4.0.0/**' 28 | 29 | jobs: 30 | run-build: 31 | strategy: 32 | matrix: 33 | image-type: ["all", "python", "scala", "r"] 34 | java: [17, 21] 35 | name: Run 36 | secrets: inherit 37 | uses: ./.github/workflows/main.yml 38 | with: 39 | spark: 4.0.0 40 | scala: 2.13 41 | java: ${{ matrix.java }} 42 | image-type: ${{ matrix.image-type }} 43 | 44 | -------------------------------------------------------------------------------- /.github/workflows/publish-java17.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | name: "Publish (Java 17 only)" 21 | 22 | on: 23 | workflow_dispatch: 24 | inputs: 25 | spark: 26 | description: 'The Spark version of Spark image.' 27 | required: true 28 | default: '4.0.0' 29 | type: choice 30 | options: 31 | - 4.0.0 32 | - 4.0.0-preview1 33 | - 4.0.0-preview2 34 | publish: 35 | description: 'Publish the image or not.' 36 | default: false 37 | type: boolean 38 | required: true 39 | repository: 40 | description: The registry to be published (Available only when publish is true). 41 | required: false 42 | default: ghcr.io/apache/spark-docker 43 | type: choice 44 | options: 45 | # GHCR: This required the write permission of apache/spark-docker (Spark Committer) 46 | - ghcr.io/apache/spark-docker 47 | # Dockerhub: This required the DOCKERHUB_TOKEN and DOCKERHUB_USER (Spark Committer) 48 | - apache 49 | 50 | jobs: 51 | # We first build and publish the base image 52 | run-base-build: 53 | strategy: 54 | matrix: 55 | scala: [2.13] 56 | java: [17] 57 | image-type: ["scala"] 58 | permissions: 59 | packages: write 60 | name: Run Base 61 | secrets: inherit 62 | uses: ./.github/workflows/main.yml 63 | with: 64 | spark: ${{ inputs.spark }} 65 | scala: ${{ matrix.scala }} 66 | java: ${{ matrix.java }} 67 | publish: ${{ inputs.publish }} 68 | repository: ${{ inputs.repository }} 69 | image-type: ${{ matrix.image-type }} 70 | 71 | # Then publish the all / python / r images 72 | run-build: 73 | needs: run-base-build 74 | strategy: 75 | matrix: 76 | scala: [2.13] 77 | java: [17] 78 | image-type: ["all", "python", "r"] 79 | permissions: 80 | packages: write 81 | name: Run 82 | secrets: inherit 83 | uses: ./.github/workflows/main.yml 84 | with: 85 | spark: ${{ inputs.spark }} 86 | scala: ${{ matrix.scala }} 87 | java: ${{ matrix.java }} 88 | publish: ${{ inputs.publish }} 89 | repository: ${{ inputs.repository }} 90 | image-type: ${{ matrix.image-type }} 91 | -------------------------------------------------------------------------------- /.github/workflows/publish-java21.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | name: "Publish (Java 21 only)" 21 | 22 | on: 23 | workflow_dispatch: 24 | inputs: 25 | spark: 26 | description: 'The Spark version of Spark image.' 27 | required: true 28 | default: '4.0.0' 29 | type: choice 30 | options: 31 | - 4.0.0 32 | - 4.0.0-preview1 33 | - 4.0.0-preview2 34 | publish: 35 | description: 'Publish the image or not.' 36 | default: false 37 | type: boolean 38 | required: true 39 | repository: 40 | description: The registry to be published (Available only when publish is true). 41 | required: false 42 | default: ghcr.io/apache/spark-docker 43 | type: choice 44 | options: 45 | # GHCR: This required the write permission of apache/spark-docker (Spark Committer) 46 | - ghcr.io/apache/spark-docker 47 | # Dockerhub: This required the DOCKERHUB_TOKEN and DOCKERHUB_USER (Spark Committer) 48 | - apache 49 | 50 | jobs: 51 | # We first build and publish the base image 52 | run-base-build: 53 | strategy: 54 | matrix: 55 | scala: [2.13] 56 | java: [21] 57 | image-type: ["scala"] 58 | permissions: 59 | packages: write 60 | name: Run Base 61 | secrets: inherit 62 | uses: ./.github/workflows/main.yml 63 | with: 64 | spark: ${{ inputs.spark }} 65 | scala: ${{ matrix.scala }} 66 | java: ${{ matrix.java }} 67 | publish: ${{ inputs.publish }} 68 | repository: ${{ inputs.repository }} 69 | image-type: ${{ matrix.image-type }} 70 | 71 | # Then publish the all / python / r images 72 | run-build: 73 | needs: run-base-build 74 | strategy: 75 | matrix: 76 | scala: [2.13] 77 | java: [21] 78 | image-type: ["all", "python", "r"] 79 | permissions: 80 | packages: write 81 | name: Run 82 | secrets: inherit 83 | uses: ./.github/workflows/main.yml 84 | with: 85 | spark: ${{ inputs.spark }} 86 | scala: ${{ matrix.scala }} 87 | java: ${{ matrix.java }} 88 | publish: ${{ inputs.publish }} 89 | repository: ${{ inputs.repository }} 90 | image-type: ${{ matrix.image-type }} 91 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | name: "Publish" 21 | 22 | on: 23 | workflow_dispatch: 24 | inputs: 25 | spark: 26 | description: 'The Spark version of Spark image.' 27 | required: true 28 | default: '3.5.6' 29 | type: choice 30 | options: 31 | - 3.5.6 32 | publish: 33 | description: 'Publish the image or not.' 34 | default: false 35 | type: boolean 36 | required: true 37 | repository: 38 | description: The registry to be published (Available only when publish is true). 39 | required: false 40 | default: ghcr.io/apache/spark-docker 41 | type: choice 42 | options: 43 | # GHCR: This required the write permission of apache/spark-docker (Spark Committer) 44 | - ghcr.io/apache/spark-docker 45 | # Dockerhub: This required the DOCKERHUB_TOKEN and DOCKERHUB_USER (Spark Committer) 46 | - apache 47 | 48 | jobs: 49 | # We first build and publish the base image 50 | run-base-build: 51 | # if: startsWith(inputs.spark, '3.3') 52 | strategy: 53 | matrix: 54 | scala: [2.12] 55 | java: [11, 17] 56 | image-type: ["scala"] 57 | permissions: 58 | packages: write 59 | name: Run Base 60 | secrets: inherit 61 | uses: ./.github/workflows/main.yml 62 | with: 63 | spark: ${{ inputs.spark }} 64 | scala: ${{ matrix.scala }} 65 | java: ${{ matrix.java }} 66 | publish: ${{ inputs.publish }} 67 | repository: ${{ inputs.repository }} 68 | image-type: ${{ matrix.image-type }} 69 | 70 | # Then publish the all / python / r images 71 | run-build: 72 | needs: run-base-build 73 | # if: startsWith(inputs.spark, '3.3') 74 | strategy: 75 | matrix: 76 | scala: [2.12] 77 | java: [11, 17] 78 | image-type: ["all", "python", "r"] 79 | permissions: 80 | packages: write 81 | name: Run 82 | secrets: inherit 83 | uses: ./.github/workflows/main.yml 84 | with: 85 | spark: ${{ inputs.spark }} 86 | scala: ${{ matrix.scala }} 87 | java: ${{ matrix.java }} 88 | publish: ${{ inputs.publish }} 89 | repository: ${{ inputs.repository }} 90 | image-type: ${{ matrix.image-type }} 91 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | name: "Test" 21 | 22 | on: 23 | workflow_dispatch: 24 | inputs: 25 | spark: 26 | description: 'The Spark version of Spark image.' 27 | required: true 28 | default: '4.0.0' 29 | type: choice 30 | options: 31 | - 4.0.0 32 | - 4.0.0-preview2 33 | - 4.0.0-preview1 34 | - 3.5.6 35 | - 3.5.5 36 | - 3.5.4 37 | - 3.5.3 38 | - 3.5.2 39 | - 3.5.1 40 | - 3.5.0 41 | java: 42 | description: 'The Java version of Spark image.' 43 | default: 11 44 | type: string 45 | required: true 46 | options: 47 | - 11 48 | - 17 49 | scala: 50 | description: 'The Scala version of Spark image.' 51 | default: 2.12 52 | type: string 53 | required: true 54 | options: 55 | - 2.12 56 | - 2.13 57 | image-type: 58 | description: 'The image type of Spark image.' 59 | required: true 60 | default: 'python' 61 | type: choice 62 | options: 63 | - all 64 | - python 65 | - scala 66 | - r 67 | repository: 68 | description: The registry to be tested. 69 | required: true 70 | type: string 71 | default: ghcr.io/apache/spark-docker 72 | image-tag: 73 | description: 'The image tag to be tested.' 74 | default: latest 75 | type: string 76 | required: true 77 | 78 | jobs: 79 | run-build: 80 | name: Test ${{ inputs.repository }}/spark:${{ inputs.image-tag }} 81 | secrets: inherit 82 | uses: ./.github/workflows/main.yml 83 | with: 84 | spark: ${{ inputs.spark }} 85 | scala: ${{ inputs.scala }} 86 | java: ${{ inputs.java }} 87 | repository: ${{ inputs.repository }} 88 | image-tag: ${{ inputs.image-tag }} 89 | image-type: ${{ inputs.image-type }} 90 | build: false 91 | -------------------------------------------------------------------------------- /3.5.0/scala2.12-java11-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.0-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME /usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /3.5.0/scala2.12-java11-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.0-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /3.5.0/scala2.12-java11-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.0-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME /usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /3.5.0/scala2.12-java11-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:11-jre-focal 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz \ 40 | SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz.asc \ 41 | GPG_KEY=FC3AE3A7EAA1BAC98770840E7E1ABCC53AAA2216 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | mv kubernetes/tests /opt/spark/; \ 64 | mv data /opt/spark/; \ 65 | mv python/pyspark /opt/spark/python/pyspark/; \ 66 | mv python/lib /opt/spark/python/lib/; \ 67 | mv R /opt/spark/; \ 68 | chmod a+x /opt/decom.sh; \ 69 | cd ..; \ 70 | rm -rf "$SPARK_TMP"; 71 | 72 | COPY entrypoint.sh /opt/ 73 | 74 | ENV SPARK_HOME /opt/spark 75 | 76 | WORKDIR /opt/spark/work-dir 77 | 78 | USER spark 79 | 80 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 81 | -------------------------------------------------------------------------------- /3.5.0/scala2.12-java11-ubuntu/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | # Prevent any errors from being silently ignored 19 | set -eo pipefail 20 | 21 | attempt_setup_fake_passwd_entry() { 22 | # Check whether there is a passwd entry for the container UID 23 | local myuid; myuid="$(id -u)" 24 | # If there is no passwd entry for the container UID, attempt to fake one 25 | # You can also refer to the https://github.com/docker-library/official-images/pull/13089#issuecomment-1534706523 26 | # It's to resolve OpenShift random UID case. 27 | # See also: https://github.com/docker-library/postgres/pull/448 28 | if ! getent passwd "$myuid" &> /dev/null; then 29 | local wrapper 30 | for wrapper in {/usr,}/lib{/*,}/libnss_wrapper.so; do 31 | if [ -s "$wrapper" ]; then 32 | NSS_WRAPPER_PASSWD="$(mktemp)" 33 | NSS_WRAPPER_GROUP="$(mktemp)" 34 | export LD_PRELOAD="$wrapper" NSS_WRAPPER_PASSWD NSS_WRAPPER_GROUP 35 | local mygid; mygid="$(id -g)" 36 | printf 'spark:x:%s:%s:${SPARK_USER_NAME:-anonymous uid}:%s:/bin/false\n' "$myuid" "$mygid" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD" 37 | printf 'spark:x:%s:\n' "$mygid" > "$NSS_WRAPPER_GROUP" 38 | break 39 | fi 40 | done 41 | fi 42 | } 43 | 44 | if [ -z "$JAVA_HOME" ]; then 45 | JAVA_HOME=$(java -XshowSettings:properties -version 2>&1 > /dev/null | grep 'java.home' | awk '{print $3}') 46 | fi 47 | 48 | SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*" 49 | for v in "${!SPARK_JAVA_OPT_@}"; do 50 | SPARK_EXECUTOR_JAVA_OPTS+=( "${!v}" ) 51 | done 52 | 53 | if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then 54 | SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH" 55 | fi 56 | 57 | if ! [ -z "${PYSPARK_PYTHON+x}" ]; then 58 | export PYSPARK_PYTHON 59 | fi 60 | if ! [ -z "${PYSPARK_DRIVER_PYTHON+x}" ]; then 61 | export PYSPARK_DRIVER_PYTHON 62 | fi 63 | 64 | # If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor. 65 | # It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding customizations of this value from elsewhere e.g. Docker/K8s. 66 | if [ -n "${HADOOP_HOME}" ] && [ -z "${SPARK_DIST_CLASSPATH}" ]; then 67 | export SPARK_DIST_CLASSPATH="$($HADOOP_HOME/bin/hadoop classpath)" 68 | fi 69 | 70 | if ! [ -z "${HADOOP_CONF_DIR+x}" ]; then 71 | SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH"; 72 | fi 73 | 74 | if ! [ -z "${SPARK_CONF_DIR+x}" ]; then 75 | SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH"; 76 | elif ! [ -z "${SPARK_HOME+x}" ]; then 77 | SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH"; 78 | fi 79 | 80 | # SPARK-43540: add current working directory into executor classpath 81 | SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD" 82 | 83 | # Switch to spark if no USER specified (root by default) otherwise use USER directly 84 | switch_spark_if_root() { 85 | if [ $(id -u) -eq 0 ]; then 86 | echo gosu spark 87 | fi 88 | } 89 | 90 | case "$1" in 91 | driver) 92 | shift 1 93 | CMD=( 94 | "$SPARK_HOME/bin/spark-submit" 95 | --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" 96 | --conf "spark.executorEnv.SPARK_DRIVER_POD_IP=$SPARK_DRIVER_BIND_ADDRESS" 97 | --deploy-mode client 98 | "$@" 99 | ) 100 | attempt_setup_fake_passwd_entry 101 | # Execute the container CMD under tini for better hygiene 102 | exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" 103 | ;; 104 | executor) 105 | shift 1 106 | CMD=( 107 | ${JAVA_HOME}/bin/java 108 | "${SPARK_EXECUTOR_JAVA_OPTS[@]}" 109 | -Xms"$SPARK_EXECUTOR_MEMORY" 110 | -Xmx"$SPARK_EXECUTOR_MEMORY" 111 | -cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH" 112 | org.apache.spark.scheduler.cluster.k8s.KubernetesExecutorBackend 113 | --driver-url "$SPARK_DRIVER_URL" 114 | --executor-id "$SPARK_EXECUTOR_ID" 115 | --cores "$SPARK_EXECUTOR_CORES" 116 | --app-id "$SPARK_APPLICATION_ID" 117 | --hostname "$SPARK_EXECUTOR_POD_IP" 118 | --resourceProfileId "$SPARK_RESOURCE_PROFILE_ID" 119 | --podName "$SPARK_EXECUTOR_POD_NAME" 120 | ) 121 | attempt_setup_fake_passwd_entry 122 | # Execute the container CMD under tini for better hygiene 123 | exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" 124 | ;; 125 | 126 | *) 127 | # Non-spark-on-k8s command provided, proceeding in pass-through mode... 128 | exec "$@" 129 | ;; 130 | esac 131 | -------------------------------------------------------------------------------- /3.5.0/scala2.12-java17-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.0-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME /usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /3.5.0/scala2.12-java17-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.0-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /3.5.0/scala2.12-java17-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.0-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME /usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /3.5.0/scala2.12-java17-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:17-jammy 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz \ 40 | SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz.asc \ 41 | GPG_KEY=FC3AE3A7EAA1BAC98770840E7E1ABCC53AAA2216 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | mv kubernetes/tests /opt/spark/; \ 64 | mv data /opt/spark/; \ 65 | mv python/pyspark /opt/spark/python/pyspark/; \ 66 | mv python/lib /opt/spark/python/lib/; \ 67 | mv R /opt/spark/; \ 68 | chmod a+x /opt/decom.sh; \ 69 | cd ..; \ 70 | rm -rf "$SPARK_TMP"; 71 | 72 | COPY entrypoint.sh /opt/ 73 | 74 | ENV SPARK_HOME /opt/spark 75 | 76 | WORKDIR /opt/spark/work-dir 77 | 78 | USER spark 79 | 80 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 81 | -------------------------------------------------------------------------------- /3.5.0/scala2.12-java17-ubuntu/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | # Prevent any errors from being silently ignored 19 | set -eo pipefail 20 | 21 | attempt_setup_fake_passwd_entry() { 22 | # Check whether there is a passwd entry for the container UID 23 | local myuid; myuid="$(id -u)" 24 | # If there is no passwd entry for the container UID, attempt to fake one 25 | # You can also refer to the https://github.com/docker-library/official-images/pull/13089#issuecomment-1534706523 26 | # It's to resolve OpenShift random UID case. 27 | # See also: https://github.com/docker-library/postgres/pull/448 28 | if ! getent passwd "$myuid" &> /dev/null; then 29 | local wrapper 30 | for wrapper in {/usr,}/lib{/*,}/libnss_wrapper.so; do 31 | if [ -s "$wrapper" ]; then 32 | NSS_WRAPPER_PASSWD="$(mktemp)" 33 | NSS_WRAPPER_GROUP="$(mktemp)" 34 | export LD_PRELOAD="$wrapper" NSS_WRAPPER_PASSWD NSS_WRAPPER_GROUP 35 | local mygid; mygid="$(id -g)" 36 | printf 'spark:x:%s:%s:${SPARK_USER_NAME:-anonymous uid}:%s:/bin/false\n' "$myuid" "$mygid" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD" 37 | printf 'spark:x:%s:\n' "$mygid" > "$NSS_WRAPPER_GROUP" 38 | break 39 | fi 40 | done 41 | fi 42 | } 43 | 44 | if [ -z "$JAVA_HOME" ]; then 45 | JAVA_HOME=$(java -XshowSettings:properties -version 2>&1 > /dev/null | grep 'java.home' | awk '{print $3}') 46 | fi 47 | 48 | SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*" 49 | for v in "${!SPARK_JAVA_OPT_@}"; do 50 | SPARK_EXECUTOR_JAVA_OPTS+=( "${!v}" ) 51 | done 52 | 53 | if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then 54 | SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH" 55 | fi 56 | 57 | if ! [ -z "${PYSPARK_PYTHON+x}" ]; then 58 | export PYSPARK_PYTHON 59 | fi 60 | if ! [ -z "${PYSPARK_DRIVER_PYTHON+x}" ]; then 61 | export PYSPARK_DRIVER_PYTHON 62 | fi 63 | 64 | # If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor. 65 | # It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding customizations of this value from elsewhere e.g. Docker/K8s. 66 | if [ -n "${HADOOP_HOME}" ] && [ -z "${SPARK_DIST_CLASSPATH}" ]; then 67 | export SPARK_DIST_CLASSPATH="$($HADOOP_HOME/bin/hadoop classpath)" 68 | fi 69 | 70 | if ! [ -z "${HADOOP_CONF_DIR+x}" ]; then 71 | SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH"; 72 | fi 73 | 74 | if ! [ -z "${SPARK_CONF_DIR+x}" ]; then 75 | SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH"; 76 | elif ! [ -z "${SPARK_HOME+x}" ]; then 77 | SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH"; 78 | fi 79 | 80 | # SPARK-43540: add current working directory into executor classpath 81 | SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD" 82 | 83 | # Switch to spark if no USER specified (root by default) otherwise use USER directly 84 | switch_spark_if_root() { 85 | if [ $(id -u) -eq 0 ]; then 86 | echo gosu spark 87 | fi 88 | } 89 | 90 | case "$1" in 91 | driver) 92 | shift 1 93 | CMD=( 94 | "$SPARK_HOME/bin/spark-submit" 95 | --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" 96 | --conf "spark.executorEnv.SPARK_DRIVER_POD_IP=$SPARK_DRIVER_BIND_ADDRESS" 97 | --deploy-mode client 98 | "$@" 99 | ) 100 | attempt_setup_fake_passwd_entry 101 | # Execute the container CMD under tini for better hygiene 102 | exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" 103 | ;; 104 | executor) 105 | shift 1 106 | CMD=( 107 | ${JAVA_HOME}/bin/java 108 | "${SPARK_EXECUTOR_JAVA_OPTS[@]}" 109 | -Xms"$SPARK_EXECUTOR_MEMORY" 110 | -Xmx"$SPARK_EXECUTOR_MEMORY" 111 | -cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH" 112 | org.apache.spark.scheduler.cluster.k8s.KubernetesExecutorBackend 113 | --driver-url "$SPARK_DRIVER_URL" 114 | --executor-id "$SPARK_EXECUTOR_ID" 115 | --cores "$SPARK_EXECUTOR_CORES" 116 | --app-id "$SPARK_APPLICATION_ID" 117 | --hostname "$SPARK_EXECUTOR_POD_IP" 118 | --resourceProfileId "$SPARK_RESOURCE_PROFILE_ID" 119 | --podName "$SPARK_EXECUTOR_POD_NAME" 120 | ) 121 | attempt_setup_fake_passwd_entry 122 | # Execute the container CMD under tini for better hygiene 123 | exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" 124 | ;; 125 | 126 | *) 127 | # Non-spark-on-k8s command provided, proceeding in pass-through mode... 128 | exec "$@" 129 | ;; 130 | esac 131 | -------------------------------------------------------------------------------- /3.5.1/scala2.12-java11-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.1-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME /usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /3.5.1/scala2.12-java11-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.1-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /3.5.1/scala2.12-java11-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.1-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME /usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /3.5.1/scala2.12-java11-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:11-jre-focal 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz \ 40 | SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz.asc \ 41 | GPG_KEY=FD3E84942E5E6106235A1D25BD356A9F8740E4FF 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | mv kubernetes/tests /opt/spark/; \ 64 | mv data /opt/spark/; \ 65 | mv python/pyspark /opt/spark/python/pyspark/; \ 66 | mv python/lib /opt/spark/python/lib/; \ 67 | mv R /opt/spark/; \ 68 | chmod a+x /opt/decom.sh; \ 69 | cd ..; \ 70 | rm -rf "$SPARK_TMP"; 71 | 72 | COPY entrypoint.sh /opt/ 73 | 74 | ENV SPARK_HOME /opt/spark 75 | 76 | WORKDIR /opt/spark/work-dir 77 | 78 | USER spark 79 | 80 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 81 | -------------------------------------------------------------------------------- /3.5.1/scala2.12-java11-ubuntu/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | # Prevent any errors from being silently ignored 19 | set -eo pipefail 20 | 21 | attempt_setup_fake_passwd_entry() { 22 | # Check whether there is a passwd entry for the container UID 23 | local myuid; myuid="$(id -u)" 24 | # If there is no passwd entry for the container UID, attempt to fake one 25 | # You can also refer to the https://github.com/docker-library/official-images/pull/13089#issuecomment-1534706523 26 | # It's to resolve OpenShift random UID case. 27 | # See also: https://github.com/docker-library/postgres/pull/448 28 | if ! getent passwd "$myuid" &> /dev/null; then 29 | local wrapper 30 | for wrapper in {/usr,}/lib{/*,}/libnss_wrapper.so; do 31 | if [ -s "$wrapper" ]; then 32 | NSS_WRAPPER_PASSWD="$(mktemp)" 33 | NSS_WRAPPER_GROUP="$(mktemp)" 34 | export LD_PRELOAD="$wrapper" NSS_WRAPPER_PASSWD NSS_WRAPPER_GROUP 35 | local mygid; mygid="$(id -g)" 36 | printf 'spark:x:%s:%s:${SPARK_USER_NAME:-anonymous uid}:%s:/bin/false\n' "$myuid" "$mygid" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD" 37 | printf 'spark:x:%s:\n' "$mygid" > "$NSS_WRAPPER_GROUP" 38 | break 39 | fi 40 | done 41 | fi 42 | } 43 | 44 | if [ -z "$JAVA_HOME" ]; then 45 | JAVA_HOME=$(java -XshowSettings:properties -version 2>&1 > /dev/null | grep 'java.home' | awk '{print $3}') 46 | fi 47 | 48 | SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*" 49 | for v in "${!SPARK_JAVA_OPT_@}"; do 50 | SPARK_EXECUTOR_JAVA_OPTS+=( "${!v}" ) 51 | done 52 | 53 | if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then 54 | SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH" 55 | fi 56 | 57 | if ! [ -z "${PYSPARK_PYTHON+x}" ]; then 58 | export PYSPARK_PYTHON 59 | fi 60 | if ! [ -z "${PYSPARK_DRIVER_PYTHON+x}" ]; then 61 | export PYSPARK_DRIVER_PYTHON 62 | fi 63 | 64 | # If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor. 65 | # It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding customizations of this value from elsewhere e.g. Docker/K8s. 66 | if [ -n "${HADOOP_HOME}" ] && [ -z "${SPARK_DIST_CLASSPATH}" ]; then 67 | export SPARK_DIST_CLASSPATH="$($HADOOP_HOME/bin/hadoop classpath)" 68 | fi 69 | 70 | if ! [ -z "${HADOOP_CONF_DIR+x}" ]; then 71 | SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH"; 72 | fi 73 | 74 | if ! [ -z "${SPARK_CONF_DIR+x}" ]; then 75 | SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH"; 76 | elif ! [ -z "${SPARK_HOME+x}" ]; then 77 | SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH"; 78 | fi 79 | 80 | # SPARK-43540: add current working directory into executor classpath 81 | SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD" 82 | 83 | # Switch to spark if no USER specified (root by default) otherwise use USER directly 84 | switch_spark_if_root() { 85 | if [ $(id -u) -eq 0 ]; then 86 | echo gosu spark 87 | fi 88 | } 89 | 90 | case "$1" in 91 | driver) 92 | shift 1 93 | CMD=( 94 | "$SPARK_HOME/bin/spark-submit" 95 | --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" 96 | --conf "spark.executorEnv.SPARK_DRIVER_POD_IP=$SPARK_DRIVER_BIND_ADDRESS" 97 | --deploy-mode client 98 | "$@" 99 | ) 100 | attempt_setup_fake_passwd_entry 101 | # Execute the container CMD under tini for better hygiene 102 | exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" 103 | ;; 104 | executor) 105 | shift 1 106 | CMD=( 107 | ${JAVA_HOME}/bin/java 108 | "${SPARK_EXECUTOR_JAVA_OPTS[@]}" 109 | -Xms"$SPARK_EXECUTOR_MEMORY" 110 | -Xmx"$SPARK_EXECUTOR_MEMORY" 111 | -cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH" 112 | org.apache.spark.scheduler.cluster.k8s.KubernetesExecutorBackend 113 | --driver-url "$SPARK_DRIVER_URL" 114 | --executor-id "$SPARK_EXECUTOR_ID" 115 | --cores "$SPARK_EXECUTOR_CORES" 116 | --app-id "$SPARK_APPLICATION_ID" 117 | --hostname "$SPARK_EXECUTOR_POD_IP" 118 | --resourceProfileId "$SPARK_RESOURCE_PROFILE_ID" 119 | --podName "$SPARK_EXECUTOR_POD_NAME" 120 | ) 121 | attempt_setup_fake_passwd_entry 122 | # Execute the container CMD under tini for better hygiene 123 | exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" 124 | ;; 125 | 126 | *) 127 | # Non-spark-on-k8s command provided, proceeding in pass-through mode... 128 | exec "$@" 129 | ;; 130 | esac 131 | -------------------------------------------------------------------------------- /3.5.1/scala2.12-java17-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.1-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME /usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /3.5.1/scala2.12-java17-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.1-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /3.5.1/scala2.12-java17-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.1-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME /usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /3.5.1/scala2.12-java17-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:17-jammy 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz \ 40 | SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz.asc \ 41 | GPG_KEY=FD3E84942E5E6106235A1D25BD356A9F8740E4FF 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | mv kubernetes/tests /opt/spark/; \ 64 | mv data /opt/spark/; \ 65 | mv python/pyspark /opt/spark/python/pyspark/; \ 66 | mv python/lib /opt/spark/python/lib/; \ 67 | mv R /opt/spark/; \ 68 | chmod a+x /opt/decom.sh; \ 69 | cd ..; \ 70 | rm -rf "$SPARK_TMP"; 71 | 72 | COPY entrypoint.sh /opt/ 73 | 74 | ENV SPARK_HOME /opt/spark 75 | 76 | WORKDIR /opt/spark/work-dir 77 | 78 | USER spark 79 | 80 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 81 | -------------------------------------------------------------------------------- /3.5.2/scala2.12-java11-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.2-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME /usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /3.5.2/scala2.12-java11-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.2-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /3.5.2/scala2.12-java11-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.2-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME /usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /3.5.2/scala2.12-java11-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:11-jre-focal 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.2/spark-3.5.2-bin-hadoop3.tgz \ 40 | SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.2/spark-3.5.2-bin-hadoop3.tgz.asc \ 41 | GPG_KEY=D76E23B9F11B5BF6864613C4F7051850A0AF904D 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | mv kubernetes/tests /opt/spark/; \ 64 | mv data /opt/spark/; \ 65 | mv python/pyspark /opt/spark/python/pyspark/; \ 66 | mv python/lib /opt/spark/python/lib/; \ 67 | mv R /opt/spark/; \ 68 | chmod a+x /opt/decom.sh; \ 69 | cd ..; \ 70 | rm -rf "$SPARK_TMP"; 71 | 72 | COPY entrypoint.sh /opt/ 73 | 74 | ENV SPARK_HOME /opt/spark 75 | 76 | WORKDIR /opt/spark/work-dir 77 | 78 | USER spark 79 | 80 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 81 | -------------------------------------------------------------------------------- /3.5.2/scala2.12-java17-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.2-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME /usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /3.5.2/scala2.12-java17-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.2-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /3.5.2/scala2.12-java17-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.2-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME /usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /3.5.2/scala2.12-java17-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:17-jammy 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.2/spark-3.5.2-bin-hadoop3.tgz \ 40 | SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.2/spark-3.5.2-bin-hadoop3.tgz.asc \ 41 | GPG_KEY=D76E23B9F11B5BF6864613C4F7051850A0AF904D 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | mv kubernetes/tests /opt/spark/; \ 64 | mv data /opt/spark/; \ 65 | mv python/pyspark /opt/spark/python/pyspark/; \ 66 | mv python/lib /opt/spark/python/lib/; \ 67 | mv R /opt/spark/; \ 68 | chmod a+x /opt/decom.sh; \ 69 | cd ..; \ 70 | rm -rf "$SPARK_TMP"; 71 | 72 | COPY entrypoint.sh /opt/ 73 | 74 | ENV SPARK_HOME /opt/spark 75 | 76 | WORKDIR /opt/spark/work-dir 77 | 78 | USER spark 79 | 80 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 81 | -------------------------------------------------------------------------------- /3.5.3/scala2.12-java11-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.3-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME /usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /3.5.3/scala2.12-java11-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.3-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /3.5.3/scala2.12-java11-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.3-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME /usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /3.5.3/scala2.12-java11-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:11-jre-focal 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.3/spark-3.5.3-bin-hadoop3.tgz \ 40 | SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.3/spark-3.5.3-bin-hadoop3.tgz.asc \ 41 | GPG_KEY=0A2D660358B6F6F8071FD16F6606986CF5A8447C 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \ 64 | mv kubernetes/tests /opt/spark/; \ 65 | mv data /opt/spark/; \ 66 | mv python/pyspark /opt/spark/python/pyspark/; \ 67 | mv python/lib /opt/spark/python/lib/; \ 68 | mv R /opt/spark/; \ 69 | chmod a+x /opt/decom.sh; \ 70 | cd ..; \ 71 | rm -rf "$SPARK_TMP"; 72 | 73 | COPY entrypoint.sh /opt/ 74 | 75 | ENV SPARK_HOME /opt/spark 76 | 77 | WORKDIR /opt/spark/work-dir 78 | 79 | USER spark 80 | 81 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 82 | -------------------------------------------------------------------------------- /3.5.3/scala2.12-java17-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.3-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME /usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /3.5.3/scala2.12-java17-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.3-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /3.5.3/scala2.12-java17-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.3-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME /usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /3.5.3/scala2.12-java17-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:17-jammy 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.3/spark-3.5.3-bin-hadoop3.tgz \ 40 | SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.3/spark-3.5.3-bin-hadoop3.tgz.asc \ 41 | GPG_KEY=0A2D660358B6F6F8071FD16F6606986CF5A8447C 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \ 64 | mv kubernetes/tests /opt/spark/; \ 65 | mv data /opt/spark/; \ 66 | mv python/pyspark /opt/spark/python/pyspark/; \ 67 | mv python/lib /opt/spark/python/lib/; \ 68 | mv R /opt/spark/; \ 69 | chmod a+x /opt/decom.sh; \ 70 | cd ..; \ 71 | rm -rf "$SPARK_TMP"; 72 | 73 | COPY entrypoint.sh /opt/ 74 | 75 | ENV SPARK_HOME /opt/spark 76 | 77 | WORKDIR /opt/spark/work-dir 78 | 79 | USER spark 80 | 81 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 82 | -------------------------------------------------------------------------------- /3.5.4/scala2.12-java11-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.4-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME /usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /3.5.4/scala2.12-java11-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.4-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /3.5.4/scala2.12-java11-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.4-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME /usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /3.5.4/scala2.12-java11-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:11-jre-focal 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz \ 40 | SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz.asc \ 41 | GPG_KEY=19F745C40A0E550420BB2C522541488DA93FE4B4 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \ 64 | mv kubernetes/tests /opt/spark/; \ 65 | mv data /opt/spark/; \ 66 | mv python/pyspark /opt/spark/python/pyspark/; \ 67 | mv python/lib /opt/spark/python/lib/; \ 68 | mv R /opt/spark/; \ 69 | chmod a+x /opt/decom.sh; \ 70 | cd ..; \ 71 | rm -rf "$SPARK_TMP"; 72 | 73 | COPY entrypoint.sh /opt/ 74 | 75 | ENV SPARK_HOME /opt/spark 76 | 77 | WORKDIR /opt/spark/work-dir 78 | 79 | USER spark 80 | 81 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 82 | -------------------------------------------------------------------------------- /3.5.4/scala2.12-java17-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.4-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME /usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /3.5.4/scala2.12-java17-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.4-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /3.5.4/scala2.12-java17-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.4-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME /usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /3.5.4/scala2.12-java17-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:17-jammy 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz \ 40 | SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz.asc \ 41 | GPG_KEY=19F745C40A0E550420BB2C522541488DA93FE4B4 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \ 64 | mv kubernetes/tests /opt/spark/; \ 65 | mv data /opt/spark/; \ 66 | mv python/pyspark /opt/spark/python/pyspark/; \ 67 | mv python/lib /opt/spark/python/lib/; \ 68 | mv R /opt/spark/; \ 69 | chmod a+x /opt/decom.sh; \ 70 | cd ..; \ 71 | rm -rf "$SPARK_TMP"; 72 | 73 | COPY entrypoint.sh /opt/ 74 | 75 | ENV SPARK_HOME /opt/spark 76 | 77 | WORKDIR /opt/spark/work-dir 78 | 79 | USER spark 80 | 81 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 82 | -------------------------------------------------------------------------------- /3.5.5/scala2.12-java11-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.5-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME=/usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /3.5.5/scala2.12-java11-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.5-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /3.5.5/scala2.12-java11-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.5-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME=/usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /3.5.5/scala2.12-java11-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:11-jre-focal 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.5/spark-3.5.5-bin-hadoop3.tgz \ 40 | SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.5/spark-3.5.5-bin-hadoop3.tgz.asc \ 41 | GPG_KEY=F28C9C925C188C35E345614DEDA00CE834F0FC5C 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \ 64 | mv kubernetes/tests /opt/spark/; \ 65 | mv data /opt/spark/; \ 66 | mv python/pyspark /opt/spark/python/pyspark/; \ 67 | mv python/lib /opt/spark/python/lib/; \ 68 | mv R /opt/spark/; \ 69 | chmod a+x /opt/decom.sh; \ 70 | cd ..; \ 71 | rm -rf "$SPARK_TMP"; 72 | 73 | COPY entrypoint.sh /opt/ 74 | 75 | ENV SPARK_HOME=/opt/spark 76 | 77 | WORKDIR /opt/spark/work-dir 78 | 79 | USER spark 80 | 81 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 82 | -------------------------------------------------------------------------------- /3.5.5/scala2.12-java17-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.5-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME=/usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /3.5.5/scala2.12-java17-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.5-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /3.5.5/scala2.12-java17-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.5-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME=/usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /3.5.5/scala2.12-java17-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:17-jammy 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.5/spark-3.5.5-bin-hadoop3.tgz \ 40 | SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.5/spark-3.5.5-bin-hadoop3.tgz.asc \ 41 | GPG_KEY=F28C9C925C188C35E345614DEDA00CE834F0FC5C 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \ 64 | mv kubernetes/tests /opt/spark/; \ 65 | mv data /opt/spark/; \ 66 | mv python/pyspark /opt/spark/python/pyspark/; \ 67 | mv python/lib /opt/spark/python/lib/; \ 68 | mv R /opt/spark/; \ 69 | chmod a+x /opt/decom.sh; \ 70 | cd ..; \ 71 | rm -rf "$SPARK_TMP"; 72 | 73 | COPY entrypoint.sh /opt/ 74 | 75 | ENV SPARK_HOME=/opt/spark 76 | 77 | WORKDIR /opt/spark/work-dir 78 | 79 | USER spark 80 | 81 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 82 | -------------------------------------------------------------------------------- /3.5.6/scala2.12-java11-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.6-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME=/usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /3.5.6/scala2.12-java11-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.6-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /3.5.6/scala2.12-java11-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.6-scala2.12-java11-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME=/usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /3.5.6/scala2.12-java11-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:11-jre-focal 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.6/spark-3.5.6-bin-hadoop3.tgz \ 40 | SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.6/spark-3.5.6-bin-hadoop3.tgz.asc \ 41 | GPG_KEY=0FE4571297AB84440673665669600C8338F65970 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \ 64 | mv kubernetes/tests /opt/spark/; \ 65 | mv data /opt/spark/; \ 66 | mv python/pyspark /opt/spark/python/pyspark/; \ 67 | mv python/lib /opt/spark/python/lib/; \ 68 | mv R /opt/spark/; \ 69 | chmod a+x /opt/decom.sh; \ 70 | cd ..; \ 71 | rm -rf "$SPARK_TMP"; 72 | 73 | COPY entrypoint.sh /opt/ 74 | 75 | ENV SPARK_HOME=/opt/spark 76 | 77 | WORKDIR /opt/spark/work-dir 78 | 79 | USER spark 80 | 81 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 82 | -------------------------------------------------------------------------------- /3.5.6/scala2.12-java17-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.6-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME=/usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /3.5.6/scala2.12-java17-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.6-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /3.5.6/scala2.12-java17-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:3.5.6-scala2.12-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME=/usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /3.5.6/scala2.12-java17-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:17-jammy 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.6/spark-3.5.6-bin-hadoop3.tgz \ 40 | SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.6/spark-3.5.6-bin-hadoop3.tgz.asc \ 41 | GPG_KEY=0FE4571297AB84440673665669600C8338F65970 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \ 64 | mv kubernetes/tests /opt/spark/; \ 65 | mv data /opt/spark/; \ 66 | mv python/pyspark /opt/spark/python/pyspark/; \ 67 | mv python/lib /opt/spark/python/lib/; \ 68 | mv R /opt/spark/; \ 69 | chmod a+x /opt/decom.sh; \ 70 | cd ..; \ 71 | rm -rf "$SPARK_TMP"; 72 | 73 | COPY entrypoint.sh /opt/ 74 | 75 | ENV SPARK_HOME=/opt/spark 76 | 77 | WORKDIR /opt/spark/work-dir 78 | 79 | USER spark 80 | 81 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 82 | -------------------------------------------------------------------------------- /4.0.0-preview1/scala2.13-java17-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:4.0.0-preview1-scala2.13-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME /usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /4.0.0-preview1/scala2.13-java17-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:4.0.0-preview1-scala2.13-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /4.0.0-preview1/scala2.13-java17-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:4.0.0-preview1-scala2.13-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME /usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /4.0.0-preview1/scala2.13-java17-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:17-jammy 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-4.0.0-preview1/spark-4.0.0-preview1-bin-hadoop3.tgz \ 40 | SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-4.0.0-preview1/spark-4.0.0-preview1-bin-hadoop3.tgz.asc \ 41 | GPG_KEY=4DC9676CEF9A83E98FCA02784D6620843CD87F5A 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \ 64 | mv kubernetes/tests /opt/spark/; \ 65 | mv data /opt/spark/; \ 66 | mv python/pyspark /opt/spark/python/pyspark/; \ 67 | mv python/lib /opt/spark/python/lib/; \ 68 | mv R /opt/spark/; \ 69 | chmod a+x /opt/decom.sh; \ 70 | cd ..; \ 71 | rm -rf "$SPARK_TMP"; 72 | 73 | COPY entrypoint.sh /opt/ 74 | 75 | ENV SPARK_HOME /opt/spark 76 | 77 | WORKDIR /opt/spark/work-dir 78 | 79 | USER spark 80 | 81 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 82 | -------------------------------------------------------------------------------- /4.0.0-preview1/scala2.13-java21-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:4.0.0-preview1-scala2.13-java21-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME /usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /4.0.0-preview1/scala2.13-java21-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:4.0.0-preview1-scala2.13-java21-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /4.0.0-preview1/scala2.13-java21-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:4.0.0-preview1-scala2.13-java21-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME /usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /4.0.0-preview1/scala2.13-java21-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:21-jammy 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-4.0.0-preview1/spark-4.0.0-preview1-bin-hadoop3.tgz \ 40 | SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-4.0.0-preview1/spark-4.0.0-preview1-bin-hadoop3.tgz.asc \ 41 | GPG_KEY=4DC9676CEF9A83E98FCA02784D6620843CD87F5A 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \ 64 | mv kubernetes/tests /opt/spark/; \ 65 | mv data /opt/spark/; \ 66 | mv python/pyspark /opt/spark/python/pyspark/; \ 67 | mv python/lib /opt/spark/python/lib/; \ 68 | mv R /opt/spark/; \ 69 | chmod a+x /opt/decom.sh; \ 70 | cd ..; \ 71 | rm -rf "$SPARK_TMP"; 72 | 73 | COPY entrypoint.sh /opt/ 74 | 75 | ENV SPARK_HOME /opt/spark 76 | 77 | WORKDIR /opt/spark/work-dir 78 | 79 | USER spark 80 | 81 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 82 | -------------------------------------------------------------------------------- /4.0.0-preview2/scala2.13-java17-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:4.0.0-preview2-scala2.13-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME /usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /4.0.0-preview2/scala2.13-java17-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:4.0.0-preview2-scala2.13-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /4.0.0-preview2/scala2.13-java17-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:4.0.0-preview2-scala2.13-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME /usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /4.0.0-preview2/scala2.13-java17-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:17-jammy 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-4.0.0-preview2/spark-4.0.0-preview2-bin-hadoop3.tgz \ 40 | SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-4.0.0-preview2/spark-4.0.0-preview2-bin-hadoop3.tgz.asc \ 41 | GPG_KEY=F28C9C925C188C35E345614DEDA00CE834F0FC5C 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \ 64 | mv kubernetes/tests /opt/spark/; \ 65 | mv data /opt/spark/; \ 66 | mv python/pyspark /opt/spark/python/pyspark/; \ 67 | mv python/lib /opt/spark/python/lib/; \ 68 | mv R /opt/spark/; \ 69 | chmod a+x /opt/decom.sh; \ 70 | cd ..; \ 71 | rm -rf "$SPARK_TMP"; 72 | 73 | COPY entrypoint.sh /opt/ 74 | 75 | ENV SPARK_HOME /opt/spark 76 | 77 | WORKDIR /opt/spark/work-dir 78 | 79 | USER spark 80 | 81 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 82 | -------------------------------------------------------------------------------- /4.0.0-preview2/scala2.13-java21-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:4.0.0-preview2-scala2.13-java21-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME /usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /4.0.0-preview2/scala2.13-java21-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:4.0.0-preview2-scala2.13-java21-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /4.0.0-preview2/scala2.13-java21-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:4.0.0-preview2-scala2.13-java21-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME /usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /4.0.0-preview2/scala2.13-java21-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:21-jammy 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-4.0.0-preview2/spark-4.0.0-preview2-bin-hadoop3.tgz \ 40 | SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-4.0.0-preview2/spark-4.0.0-preview2-bin-hadoop3.tgz.asc \ 41 | GPG_KEY=F28C9C925C188C35E345614DEDA00CE834F0FC5C 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \ 64 | mv kubernetes/tests /opt/spark/; \ 65 | mv data /opt/spark/; \ 66 | mv python/pyspark /opt/spark/python/pyspark/; \ 67 | mv python/lib /opt/spark/python/lib/; \ 68 | mv R /opt/spark/; \ 69 | chmod a+x /opt/decom.sh; \ 70 | cd ..; \ 71 | rm -rf "$SPARK_TMP"; 72 | 73 | COPY entrypoint.sh /opt/ 74 | 75 | ENV SPARK_HOME /opt/spark 76 | 77 | WORKDIR /opt/spark/work-dir 78 | 79 | USER spark 80 | 81 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 82 | -------------------------------------------------------------------------------- /4.0.0/scala2.13-java17-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:4.0.0-scala2.13-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME=/usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /4.0.0/scala2.13-java17-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:4.0.0-scala2.13-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /4.0.0/scala2.13-java17-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:4.0.0-scala2.13-java17-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME=/usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /4.0.0/scala2.13-java17-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:17-jammy 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark -d /nonexistent spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://www.apache.org/dyn/closer.lua/spark/spark-4.0.0/spark-4.0.0-bin-hadoop3.tgz?action=download \ 40 | SPARK_TGZ_ASC_URL=https://www.apache.org/dyn/closer.lua/spark/spark-4.0.0/spark-4.0.0-bin-hadoop3.tgz.asc?action=download \ 41 | GPG_KEY=4DC9676CEF9A83E98FCA02784D6620843CD87F5A 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \ 64 | mv kubernetes/tests /opt/spark/; \ 65 | mv data /opt/spark/; \ 66 | mv python/pyspark /opt/spark/python/pyspark/; \ 67 | mv python/lib /opt/spark/python/lib/; \ 68 | mv R /opt/spark/; \ 69 | chmod a+x /opt/decom.sh; \ 70 | cd ..; \ 71 | rm -rf "$SPARK_TMP"; 72 | 73 | COPY entrypoint.sh /opt/ 74 | 75 | ENV SPARK_HOME=/opt/spark 76 | 77 | WORKDIR /opt/spark/work-dir 78 | 79 | USER spark 80 | 81 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 82 | -------------------------------------------------------------------------------- /4.0.0/scala2.13-java21-python3-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:4.0.0-scala2.13-java21-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | apt-get install -y r-base r-base-dev; \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | ENV R_HOME=/usr/lib/R 28 | 29 | USER spark 30 | -------------------------------------------------------------------------------- /4.0.0/scala2.13-java21-python3-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:4.0.0-scala2.13-java21-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y python3 python3-pip; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | USER spark 27 | -------------------------------------------------------------------------------- /4.0.0/scala2.13-java21-r-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:4.0.0-scala2.13-java21-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | apt-get install -y r-base r-base-dev; \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | ENV R_HOME=/usr/lib/R 27 | 28 | USER spark 29 | -------------------------------------------------------------------------------- /4.0.0/scala2.13-java21-ubuntu/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM eclipse-temurin:21-jammy 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark -d /nonexistent spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://www.apache.org/dyn/closer.lua/spark/spark-4.0.0/spark-4.0.0-bin-hadoop3.tgz?action=download \ 40 | SPARK_TGZ_ASC_URL=https://www.apache.org/dyn/closer.lua/spark/spark-4.0.0/spark-4.0.0-bin-hadoop3.tgz.asc?action=download \ 41 | GPG_KEY=4DC9676CEF9A83E98FCA02784D6620843CD87F5A 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \ 64 | mv kubernetes/tests /opt/spark/; \ 65 | mv data /opt/spark/; \ 66 | mv python/pyspark /opt/spark/python/pyspark/; \ 67 | mv python/lib /opt/spark/python/lib/; \ 68 | mv R /opt/spark/; \ 69 | chmod a+x /opt/decom.sh; \ 70 | cd ..; \ 71 | rm -rf "$SPARK_TMP"; 72 | 73 | COPY entrypoint.sh /opt/ 74 | 75 | ENV SPARK_HOME=/opt/spark 76 | 77 | WORKDIR /opt/spark/work-dir 78 | 79 | USER spark 80 | 81 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 82 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing to Spark Docker 2 | 3 | Thanks for improving the project! *Before opening a pull request*, review the 4 | [Contributing to Spark guide](https://spark.apache.org/contributing.html). 5 | It lists steps that are required before creating a PR. In particular, consider: 6 | 7 | - Is the change important and ready enough to ask the community to spend time reviewing? 8 | - Have you searched for existing, related JIRAs and pull requests? 9 | - Is this a new feature that can stand alone as a [third party project](https://spark.apache.org/third-party-projects.html) ? 10 | - Is the change being proposed clearly explained and motivated? 11 | 12 | When you contribute code, you affirm that the contribution is your original work and that you 13 | license the work to the project under the project's open source license. Whether or not you 14 | state this explicitly, by submitting any copyrighted material via pull request, email, or 15 | other means you agree to license the material under the project's open source license and 16 | warrant that you have the legal authority to do so. 17 | 18 | ### How to update Dockerfile 19 | 20 | - Update `Dockerfile.template` 21 | - Update `tools/template.py` if need template file render change 22 | - Exec `add-dockerfiles.sh ` -------------------------------------------------------------------------------- /Dockerfile.template: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM {{ BASE_IMAGE }} 18 | 19 | ARG spark_uid=185 20 | 21 | RUN groupadd --system --gid=${spark_uid} spark && \ 22 | useradd --system --uid=${spark_uid} --gid=spark -d /nonexistent spark 23 | 24 | RUN set -ex; \ 25 | apt-get update; \ 26 | apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ 27 | mkdir -p /opt/spark; \ 28 | mkdir /opt/spark/python; \ 29 | mkdir -p /opt/spark/examples; \ 30 | mkdir -p /opt/spark/work-dir; \ 31 | chmod g+w /opt/spark/work-dir; \ 32 | touch /opt/spark/RELEASE; \ 33 | chown -R spark:spark /opt/spark; \ 34 | echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | # Install Apache Spark 38 | # https://downloads.apache.org/spark/KEYS 39 | ENV SPARK_TGZ_URL=https://www.apache.org/dyn/closer.lua/spark/spark-{{ SPARK_VERSION }}/spark-{{ SPARK_VERSION }}-bin-hadoop3.tgz?action=download \ 40 | SPARK_TGZ_ASC_URL=https://www.apache.org/dyn/closer.lua/spark/spark-{{ SPARK_VERSION }}/spark-{{ SPARK_VERSION }}-bin-hadoop3.tgz.asc?action=download \ 41 | GPG_KEY={{ SPARK_GPG_KEY }} 42 | 43 | RUN set -ex; \ 44 | export SPARK_TMP="$(mktemp -d)"; \ 45 | cd $SPARK_TMP; \ 46 | wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ 47 | wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ 48 | export GNUPGHOME="$(mktemp -d)"; \ 49 | gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ 50 | gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ 51 | gpg --batch --verify spark.tgz.asc spark.tgz; \ 52 | gpgconf --kill all; \ 53 | rm -rf "$GNUPGHOME" spark.tgz.asc; \ 54 | \ 55 | tar -xf spark.tgz --strip-components=1; \ 56 | chown -R spark:spark .; \ 57 | mv jars /opt/spark/; \ 58 | mv RELEASE /opt/spark/; \ 59 | mv bin /opt/spark/; \ 60 | mv sbin /opt/spark/; \ 61 | mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ 62 | mv examples /opt/spark/; \ 63 | ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \ 64 | mv kubernetes/tests /opt/spark/; \ 65 | mv data /opt/spark/; \ 66 | mv python/pyspark /opt/spark/python/pyspark/; \ 67 | mv python/lib /opt/spark/python/lib/; \ 68 | mv R /opt/spark/; \ 69 | chmod a+x /opt/decom.sh; \ 70 | cd ..; \ 71 | rm -rf "$SPARK_TMP"; 72 | 73 | COPY entrypoint.sh /opt/ 74 | 75 | ENV SPARK_HOME=/opt/spark 76 | 77 | WORKDIR /opt/spark/work-dir 78 | 79 | USER spark 80 | 81 | ENTRYPOINT [ "/opt/entrypoint.sh" ] 82 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Apache Spark 2 | Copyright 2014 and onwards The Apache Software Foundation. 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). 6 | 7 | -------------------------------------------------------------------------------- /OVERVIEW.md: -------------------------------------------------------------------------------- 1 | # What is Apache Spark™? 2 | 3 | Apache Spark™ is a multi-language engine for executing data engineering, data science, and machine learning on single-node machines or clusters. It provides high-level APIs in Scala, Java, Python, and R, and an optimized engine that supports general computation graphs for data analysis. It also supports a rich set of higher-level tools including Spark SQL for SQL and DataFrames, pandas API on Spark for pandas workloads, MLlib for machine learning, GraphX for graph processing, and Structured Streaming for stream processing. 4 | 5 | https://spark.apache.org/ 6 | 7 | ## Online Documentation 8 | 9 | You can find the latest Spark documentation, including a programming guide, on the [project web page](https://spark.apache.org/documentation.html). This README file only contains basic setup instructions. 10 | 11 | ## Interactive Scala Shell 12 | 13 | The easiest way to start using Spark is through the Scala shell: 14 | 15 | ``` 16 | docker run -it apache/spark /opt/spark/bin/spark-shell 17 | ``` 18 | 19 | Try the following command, which should return 1,000,000,000: 20 | 21 | ``` 22 | scala> spark.range(1000 * 1000 * 1000).count() 23 | ``` 24 | 25 | ## Interactive Python Shell 26 | 27 | The easiest way to start using PySpark is through the Python shell: 28 | 29 | ``` 30 | docker run -it apache/spark /opt/spark/bin/pyspark 31 | ``` 32 | 33 | And run the following command, which should also return 1,000,000,000: 34 | 35 | ``` 36 | >>> spark.range(1000 * 1000 * 1000).count() 37 | ``` 38 | 39 | ## Interactive R Shell 40 | 41 | The easiest way to start using R on Spark is through the R shell: 42 | 43 | ``` 44 | docker run -it apache/spark:r /opt/spark/bin/sparkR 45 | ``` 46 | 47 | ## Running Spark on Kubernetes 48 | 49 | https://spark.apache.org/docs/latest/running-on-kubernetes.html 50 | 51 | ## Supported tags and respective Dockerfile links 52 | 53 | Currently, the `apache/spark` docker image supports 4 types for each version: 54 | 55 | Such as for v3.4.0: 56 | - [3.4.0-scala2.12-java11-python3-ubuntu, 3.4.0-python3, 3.4.0, python3, latest](https://github.com/apache/spark-docker/tree/fe05e38f0ffad271edccd6ae40a77d5f14f3eef7/3.4.0/scala2.12-java11-python3-ubuntu) 57 | - [3.4.0-scala2.12-java11-r-ubuntu, 3.4.0-r, r](https://github.com/apache/spark-docker/tree/fe05e38f0ffad271edccd6ae40a77d5f14f3eef7/3.4.0/scala2.12-java11-r-ubuntu) 58 | - [3.4.0-scala2.12-java11-ubuntu, 3.4.0-scala, scala](https://github.com/apache/spark-docker/tree/fe05e38f0ffad271edccd6ae40a77d5f14f3eef7/3.4.0/scala2.12-java11-ubuntu) 59 | - [3.4.0-scala2.12-java11-python3-r-ubuntu](https://github.com/apache/spark-docker/tree/fe05e38f0ffad271edccd6ae40a77d5f14f3eef7/3.4.0/scala2.12-java11-python3-r-ubuntu) 60 | 61 | ## Environment Variable 62 | 63 | The environment variables of entrypoint.sh are listed below: 64 | 65 | | Environment Variable | Meaning | 66 | |----------------------|-----------| 67 | | SPARK_EXTRA_CLASSPATH | The extra path to be added to the classpath, see also in https://spark.apache.org/docs/latest/running-on-kubernetes.html#dependency-management | 68 | | PYSPARK_PYTHON | Python binary executable to use for PySpark in both driver and workers (default is python3 if available, otherwise python). Property spark.pyspark.python take precedence if it is set | 69 | | PYSPARK_DRIVER_PYTHON | Python binary executable to use for PySpark in driver only (default is PYSPARK_PYTHON). Property spark.pyspark.driver.python take precedence if it is set | 70 | | SPARK_DIST_CLASSPATH | Distribution-defined classpath to add to processes | 71 | | SPARK_DRIVER_BIND_ADDRESS | Hostname or IP address where to bind listening sockets. See also `spark.driver.bindAddress` | 72 | | SPARK_EXECUTOR_JAVA_OPTS | The Java opts of Spark Executor | 73 | | SPARK_APPLICATION_ID | A unique identifier for the Spark application | 74 | | SPARK_EXECUTOR_POD_IP | The Pod IP address of spark executor | 75 | | SPARK_RESOURCE_PROFILE_ID | The resource profile ID | 76 | | SPARK_EXECUTOR_POD_NAME | The executor pod name | 77 | | SPARK_CONF_DIR | Alternate conf dir. (Default: ${SPARK_HOME}/conf) | 78 | | SPARK_EXECUTOR_CORES | Number of cores for the executors (Default: 1) | 79 | | SPARK_EXECUTOR_MEMORY | Memory per Executor (e.g. 1000M, 2G) (Default: 1G) | 80 | | SPARK_DRIVER_MEMORY | Memory for Driver (e.g. 1000M, 2G) (Default: 1G) | 81 | 82 | See also in https://spark.apache.org/docs/latest/configuration.html and https://spark.apache.org/docs/latest/running-on-kubernetes.html 83 | 84 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Apache Spark Official Dockerfiles 2 | 3 | ## What is Apache Spark? 4 | 5 | Spark is a unified analytics engine for large-scale data processing. It provides 6 | high-level APIs in Scala, Java, Python, and R, and an optimized engine that 7 | supports general computation graphs for data analysis. It also supports a 8 | rich set of higher-level tools including Spark SQL for SQL and DataFrames, 9 | pandas API on Spark for pandas workloads, MLlib for machine learning, GraphX for graph processing, 10 | and Structured Streaming for stream processing. 11 | 12 | https://spark.apache.org/ 13 | 14 | ## Create a new version 15 | 16 | ### Step 1 Add dockerfiles for a new version. 17 | 18 | You can see [3.4.0 PR](https://github.com/apache/spark-docker/pull/33) as reference. 19 | 20 | - 1.1 Add gpg key to [tools/template.py](https://github.com/apache/spark-docker/blob/master/tools/template.py#L24) 21 | 22 | This gpg key will be used by Dockerfiles (such as [3.4.0](https://github.com/apache/spark-docker/blob/04e85239a8fcc9b3dcfe146bc144ee2b981f8f42/3.4.0/scala2.12-java11-ubuntu/Dockerfile#L41)) to verify the signature of the Apache Spark tarball. 23 | 24 | - 1.2 Add image build workflow (such as [3.4.0 yaml](https://github.com/apache/spark-docker/blob/04e85239a8fcc9b3dcfe146bc144ee2b981f8f42/.github/workflows/build_3.4.0.yaml)) 25 | 26 | This file will be used by GitHub Actions to build the Docker image when you submit the PR to make sure dockerfiles are correct and pass all tests (build/standalone/kubernetes). 27 | 28 | - 1.3 Using `./add-dockerfiles.sh [version]` to add Dockerfiles. 29 | 30 | You will get a new directory with the Dockerfiles for the specified version. 31 | 32 | - 1.4 Add version and tag info to versions.json, publish.yml and test.yml. 33 | 34 | This version file will be used by image build workflow (such as [3.4.0](https://github.com/apache/spark-docker/commit/47c357a52625f482b8b0cb831ccb8c9df523affd) reference) and docker official image. 35 | 36 | ### Step 2. Publish apache/spark Images. 37 | 38 | Click [Publish (Java 21 only)](https://github.com/apache/spark-docker/actions/workflows/publish-java21.yaml), [Publish (Java 17 only)](https://github.com/apache/spark-docker/actions/workflows/publish-java17.yaml) (such as 4.x) or [Publish](https://github.com/apache/spark-docker/actions/workflows/publish.yml) (such as 3.x) to publish images. 39 | 40 | After this, the [apache/spark](https://hub.docker.com/r/apache/spark) docker images will be published. 41 | 42 | 43 | ### Step 3. Publish spark Docker Official Images. 44 | 45 | Submit the PR to [docker-library/official-images](https://github.com/docker-library/official-images/), see (link)[https://github.com/docker-library/official-images/pull/15363] as reference. 46 | 47 | You can type `tools/manifest.py manifest` to generate the content. 48 | 49 | After this, the [spark](https://hub.docker.com/_/spark) docker images will be published. 50 | 51 | ## About images 52 | 53 | | | Apache Spark Image | Spark Docker Official Image | 54 | |---------------|--------------------------------------------------------|--------------------------------------------------------| 55 | | Name | apache/spark | spark | 56 | | Maintenance | Reviewed, published by Apache Spark community | Reviewed, published and maintained by Docker community | 57 | | Update policy | Only build and push once when specific version release | Actively rebuild for updates and security fixes | 58 | | Link | https://hub.docker.com/r/apache/spark | https://hub.docker.com/_/spark | 59 | | source | [apache/spark-docker](https://github.com/apache/spark-docker) | [apache/spark-docker](https://github.com/apache/spark-docker) and [docker-library/official-images](https://github.com/docker-library/official-images/blob/master/library/spark) | 60 | 61 | We recommend using [Spark Docker Official Image](https://hub.docker.com/_/spark), the [Apache Spark Image](https://hub.docker.com/r/apache/spark) are provided in case of delays in the review process by Docker community. 62 | 63 | ## About this repository 64 | 65 | This repository contains the Dockerfiles used to build the Apache Spark Docker Image. 66 | 67 | See more in [SPARK-40513: SPIP: Support Docker Official Image for Spark](https://issues.apache.org/jira/browse/SPARK-40513). 68 | -------------------------------------------------------------------------------- /add-dockerfiles.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # Usage: $0 [version] 21 | # Generate dockerfiles for specified spark version. 22 | # 23 | # Examples: 24 | # - Add 3.3.0 dockerfiles: 25 | # $ ./add-dockerfiles.sh 26 | # - Add 3.3.1 dockerfiles: 27 | # $ ./add-dockerfiles.sh 3.3.1 28 | 29 | VERSION=${1:-"3.5.0"} 30 | 31 | if echo $VERSION | grep -Eq "^4."; then 32 | # 4.x default 33 | TAGS=" 34 | scala2.13-java17-python3-r-ubuntu 35 | scala2.13-java17-python3-ubuntu 36 | scala2.13-java17-r-ubuntu 37 | scala2.13-java17-ubuntu 38 | scala2.13-java21-python3-r-ubuntu 39 | scala2.13-java21-python3-ubuntu 40 | scala2.13-java21-r-ubuntu 41 | scala2.13-java21-ubuntu 42 | " 43 | elif echo $VERSION | grep -Eq "^3."; then 44 | # 3.x default 45 | TAGS=" 46 | scala2.12-java11-python3-r-ubuntu 47 | scala2.12-java11-python3-ubuntu 48 | scala2.12-java11-r-ubuntu 49 | scala2.12-java11-ubuntu 50 | " 51 | # java17 images were added in 3.5.0. We need to skip java17 for 3.3.x and 3.4.x 52 | if ! echo $VERSION | grep -Eq "^3.3|^3.4"; then 53 | TAGS+=" 54 | scala2.12-java17-python3-r-ubuntu 55 | scala2.12-java17-python3-ubuntu 56 | scala2.12-java17-r-ubuntu 57 | scala2.12-java17-ubuntu 58 | " 59 | fi 60 | fi 61 | 62 | for TAG in $TAGS; do 63 | OPTS="" 64 | if echo $TAG | grep -q "python"; then 65 | OPTS+=" --pyspark" 66 | fi 67 | 68 | if echo $TAG | grep -q "r-"; then 69 | OPTS+=" --sparkr" 70 | fi 71 | 72 | if echo $TAG | grep -q "scala2.12"; then 73 | OPTS+=" --scala-version 2.12" 74 | elif echo $TAG | grep -q "scala2.13"; then 75 | OPTS+=" --scala-version 2.13" 76 | fi 77 | 78 | if echo $TAG | grep -q "java21"; then 79 | OPTS+=" --java-version 21 --image eclipse-temurin:21-jammy" 80 | elif echo $TAG | grep -q "java17"; then 81 | OPTS+=" --java-version 17 --image eclipse-temurin:17-jammy" 82 | elif echo $TAG | grep -q "java11"; then 83 | OPTS+=" --java-version 11 --image eclipse-temurin:11-jre-focal" 84 | fi 85 | 86 | OPTS+=" --spark-version $VERSION" 87 | 88 | mkdir -p $VERSION/$TAG 89 | 90 | if [ "$TAG" == "scala2.12-java11-ubuntu" ] || [ "$TAG" == "scala2.12-java17-ubuntu" ] || [ "$TAG" == "scala2.13-java17-ubuntu" ] || [ "$TAG" == "scala2.13-java21-ubuntu" ]; then 91 | python3 tools/template.py $OPTS > $VERSION/$TAG/Dockerfile 92 | python3 tools/template.py $OPTS -f entrypoint.sh.template > $VERSION/$TAG/entrypoint.sh 93 | chmod a+x $VERSION/$TAG/entrypoint.sh 94 | else 95 | python3 tools/template.py $OPTS -f r-python.template > $VERSION/$TAG/Dockerfile 96 | fi 97 | 98 | done 99 | -------------------------------------------------------------------------------- /awesome-spark-docker.md: -------------------------------------------------------------------------------- 1 | A curated list of awesome Apache Spark Docker resources. 2 | 3 | - [jupyter/docker-stacks/pyspark-notebook](https://github.com/jupyter/docker-stacks/tree/master/pyspark-notebook) - PySpark with Jupyter Notebook. 4 | - [big-data-europe/docker-spark](https://github.com/big-data-europe/docker-spark) - The standalone cluster and spark applications related Dockerfiles. 5 | - [openeuler/spark](https://github.com/openeuler-mirror/openeuler-docker-images/tree/master/spark) - Dockerfile reference for dnf/yum based OS. 6 | - [GoogleCloudPlatform/spark-on-k8s-operator](https://github.com/GoogleCloudPlatform/spark-on-k8s-operator) - Kubernetes operator for managing the lifecycle of Apache Spark applications on Kubernetes. 7 | 8 | -------------------------------------------------------------------------------- /entrypoint.sh.template: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | # Prevent any errors from being silently ignored 19 | set -eo pipefail 20 | 21 | attempt_setup_fake_passwd_entry() { 22 | # Check whether there is a passwd entry for the container UID 23 | local myuid; myuid="$(id -u)" 24 | # If there is no passwd entry for the container UID, attempt to fake one 25 | # You can also refer to the https://github.com/docker-library/official-images/pull/13089#issuecomment-1534706523 26 | # It's to resolve OpenShift random UID case. 27 | # See also: https://github.com/docker-library/postgres/pull/448 28 | if ! getent passwd "$myuid" &> /dev/null; then 29 | local wrapper 30 | for wrapper in {/usr,}/lib{/*,}/libnss_wrapper.so; do 31 | if [ -s "$wrapper" ]; then 32 | NSS_WRAPPER_PASSWD="$(mktemp)" 33 | NSS_WRAPPER_GROUP="$(mktemp)" 34 | export LD_PRELOAD="$wrapper" NSS_WRAPPER_PASSWD NSS_WRAPPER_GROUP 35 | local mygid; mygid="$(id -g)" 36 | printf 'spark:x:%s:%s:${SPARK_USER_NAME:-anonymous uid}:%s:/bin/false\n' "$myuid" "$mygid" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD" 37 | printf 'spark:x:%s:\n' "$mygid" > "$NSS_WRAPPER_GROUP" 38 | break 39 | fi 40 | done 41 | fi 42 | } 43 | 44 | if [ -z "$JAVA_HOME" ]; then 45 | JAVA_HOME=$(java -XshowSettings:properties -version 2>&1 > /dev/null | grep 'java.home' | awk '{print $3}') 46 | fi 47 | 48 | SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*" 49 | for v in "${!SPARK_JAVA_OPT_@}"; do 50 | SPARK_EXECUTOR_JAVA_OPTS+=( "${!v}" ) 51 | done 52 | 53 | if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then 54 | SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH" 55 | fi 56 | 57 | if ! [ -z "${PYSPARK_PYTHON+x}" ]; then 58 | export PYSPARK_PYTHON 59 | fi 60 | if ! [ -z "${PYSPARK_DRIVER_PYTHON+x}" ]; then 61 | export PYSPARK_DRIVER_PYTHON 62 | fi 63 | 64 | # If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor. 65 | # It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding customizations of this value from elsewhere e.g. Docker/K8s. 66 | if [ -n "${HADOOP_HOME}" ] && [ -z "${SPARK_DIST_CLASSPATH}" ]; then 67 | export SPARK_DIST_CLASSPATH="$($HADOOP_HOME/bin/hadoop classpath)" 68 | fi 69 | 70 | if ! [ -z "${HADOOP_CONF_DIR+x}" ]; then 71 | SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH"; 72 | fi 73 | 74 | if ! [ -z "${SPARK_CONF_DIR+x}" ]; then 75 | SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH"; 76 | elif ! [ -z "${SPARK_HOME+x}" ]; then 77 | SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH"; 78 | fi 79 | 80 | # SPARK-43540: add current working directory into executor classpath 81 | SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD" 82 | 83 | # Switch to spark if no USER specified (root by default) otherwise use USER directly 84 | switch_spark_if_root() { 85 | if [ $(id -u) -eq 0 ]; then 86 | echo gosu spark 87 | fi 88 | } 89 | 90 | case "$1" in 91 | driver) 92 | shift 1 93 | CMD=( 94 | "$SPARK_HOME/bin/spark-submit" 95 | --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" 96 | --conf "spark.executorEnv.SPARK_DRIVER_POD_IP=$SPARK_DRIVER_BIND_ADDRESS" 97 | --deploy-mode client 98 | "$@" 99 | ) 100 | attempt_setup_fake_passwd_entry 101 | # Execute the container CMD under tini for better hygiene 102 | exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" 103 | ;; 104 | executor) 105 | shift 1 106 | CMD=( 107 | ${JAVA_HOME}/bin/java 108 | "${SPARK_EXECUTOR_JAVA_OPTS[@]}" 109 | -Xms"$SPARK_EXECUTOR_MEMORY" 110 | -Xmx"$SPARK_EXECUTOR_MEMORY" 111 | -cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH" 112 | org.apache.spark.scheduler.cluster.k8s.KubernetesExecutorBackend 113 | --driver-url "$SPARK_DRIVER_URL" 114 | --executor-id "$SPARK_EXECUTOR_ID" 115 | --cores "$SPARK_EXECUTOR_CORES" 116 | --app-id "$SPARK_APPLICATION_ID" 117 | --hostname "$SPARK_EXECUTOR_POD_IP" 118 | --resourceProfileId "$SPARK_RESOURCE_PROFILE_ID" 119 | --podName "$SPARK_EXECUTOR_POD_NAME" 120 | ) 121 | attempt_setup_fake_passwd_entry 122 | # Execute the container CMD under tini for better hygiene 123 | exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" 124 | ;; 125 | 126 | *) 127 | # Non-spark-on-k8s command provided, proceeding in pass-through mode... 128 | exec "$@" 129 | ;; 130 | esac 131 | -------------------------------------------------------------------------------- /r-python.template: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | FROM spark:{{ SPARK_VERSION }}-scala{{ SCALA_VERSION }}-java{{ JAVA_VERSION }}-ubuntu 18 | 19 | USER root 20 | 21 | RUN set -ex; \ 22 | apt-get update; \ 23 | {%- if HAVE_PY %} 24 | apt-get install -y python3 python3-pip; \ 25 | {%- endif %} 26 | {%- if HAVE_R %} 27 | apt-get install -y r-base r-base-dev; \ 28 | {%- endif %} 29 | rm -rf /var/lib/apt/lists/* 30 | {%- if HAVE_R %} 31 | 32 | ENV R_HOME=/usr/lib/R 33 | {%- endif %} 34 | 35 | USER spark 36 | -------------------------------------------------------------------------------- /testing/run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | set -eo errexit 20 | 21 | SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) 22 | 23 | . "${SCRIPT_DIR}/testing.sh" 24 | 25 | echo "Test successfully finished" 26 | -------------------------------------------------------------------------------- /tools/ci_runner_cleaner/free_disk_space.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | echo "==================================" 21 | echo "Free up disk space on CI system" 22 | echo "==================================" 23 | 24 | echo "Listing 100 largest packages" 25 | dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -n | tail -n 100 26 | df -h 27 | 28 | echo "Removing large packages" 29 | sudo rm -rf /usr/share/dotnet/ 30 | sudo rm -rf /usr/share/php/ 31 | sudo rm -rf /usr/local/graalvm/ 32 | sudo rm -rf /usr/local/.ghcup/ 33 | sudo rm -rf /usr/local/share/powershell 34 | sudo rm -rf /usr/local/share/chromium 35 | sudo rm -rf /usr/local/lib/android 36 | sudo rm -rf /usr/local/lib/node_modules 37 | 38 | sudo rm -rf /opt/az 39 | sudo rm -rf /opt/hostedtoolcache/CodeQL 40 | sudo rm -rf /opt/hostedtoolcache/go 41 | sudo rm -rf /opt/hostedtoolcache/node 42 | 43 | sudo apt-get remove --purge -y '^aspnet.*' 44 | sudo apt-get remove --purge -y '^dotnet-.*' 45 | sudo apt-get remove --purge -y '^llvm-.*' 46 | sudo apt-get remove --purge -y 'php.*' 47 | sudo apt-get remove --purge -y '^mongodb-.*' 48 | sudo apt-get remove --purge -y snapd google-chrome-stable microsoft-edge-stable firefox 49 | sudo apt-get remove --purge -y azure-cli google-cloud-sdk mono-devel powershell libgl1-mesa-dri 50 | sudo apt-get autoremove --purge -y 51 | sudo apt-get clean 52 | 53 | df -h -------------------------------------------------------------------------------- /tools/ci_runner_cleaner/free_disk_space_container.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | echo "==================================" 21 | echo "Free up disk space on CI system" 22 | echo "==================================" 23 | 24 | echo "Listing 100 largest packages" 25 | dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -n | tail -n 100 26 | df -h 27 | 28 | echo "Removing large packages" 29 | rm -rf /__t/CodeQL 30 | rm -rf /__t/go 31 | rm -rf /__t/node 32 | 33 | df -h -------------------------------------------------------------------------------- /tools/manifest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | from argparse import ArgumentParser 21 | import json 22 | import subprocess 23 | 24 | 25 | def run_cmd(cmd): 26 | if isinstance(cmd, list): 27 | return subprocess.check_output(cmd).decode("utf-8") 28 | else: 29 | return subprocess.check_output(cmd.split(" ")).decode("utf-8") 30 | 31 | 32 | def generate_manifest(versions): 33 | output = ( 34 | "Maintainers: Apache Spark Developers (@ApacheSpark)\n" 35 | "GitRepo: https://github.com/apache/spark-docker.git\n\n" 36 | ) 37 | git_commit = run_cmd("git rev-parse HEAD").replace("\n", "") 38 | content = ( 39 | "Tags: %s\n" 40 | "Architectures: amd64, arm64v8\n" 41 | "GitCommit: %s\n" 42 | "Directory: ./%s\n\n" 43 | ) 44 | for version in versions: 45 | tags = ", ".join(version["tags"]) 46 | path = version["path"] 47 | output += content % (tags, git_commit, path) 48 | return output 49 | 50 | 51 | def parse_opts(): 52 | parser = ArgumentParser(prog="manifest.py") 53 | 54 | parser.add_argument( 55 | dest="mode", 56 | choices=["tags", "manifest"], 57 | type=str, 58 | help="The print mode of script", 59 | ) 60 | 61 | parser.add_argument( 62 | "-p", 63 | "--path", 64 | type=str, 65 | help="The path to specific dockerfile", 66 | ) 67 | 68 | parser.add_argument( 69 | "-i", 70 | "--image", 71 | type=str, 72 | help="The complete image registry url (such as `apache/spark`)", 73 | ) 74 | 75 | parser.add_argument( 76 | "-f", 77 | "--file", 78 | type=str, 79 | default="versions.json", 80 | help="The version json of image meta.", 81 | ) 82 | 83 | args, unknown = parser.parse_known_args() 84 | if unknown: 85 | parser.error("Unsupported arguments: %s" % " ".join(unknown)) 86 | return args 87 | 88 | 89 | def main(): 90 | opts = parse_opts() 91 | filepath = opts.path 92 | image = opts.image 93 | mode = opts.mode 94 | version_file = opts.file 95 | 96 | if mode == "tags": 97 | tags = [] 98 | with open(version_file, "r") as f: 99 | versions = json.load(f).get("versions") 100 | # Filter the specific dockerfile 101 | versions = list(filter(lambda x: x.get("path") == filepath, versions)) 102 | # Get matched version's tags 103 | tags = versions[0]["tags"] if versions else [] 104 | print(",".join(["%s:%s" % (image, t) for t in tags])) 105 | elif mode == "manifest": 106 | with open(version_file, "r") as f: 107 | versions = json.load(f).get("versions") 108 | print(generate_manifest(versions)) 109 | 110 | 111 | if __name__ == "__main__": 112 | main() 113 | -------------------------------------------------------------------------------- /tools/requirements.txt: -------------------------------------------------------------------------------- 1 | jinja2 2 | -------------------------------------------------------------------------------- /tools/template.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | from argparse import ArgumentParser 21 | 22 | from jinja2 import Environment, FileSystemLoader 23 | 24 | GPG_KEY_DICT = { 25 | # issuer "maxgekk@apache.org" 26 | "3.3.0": "80FB8EBE8EBA68504989703491B5DC815DBF10D3", 27 | # issuer "yumwang@apache.org" 28 | "3.3.1": "86727D43E73A415F67A0B1A14E68B3E6CD473653", 29 | # issuer "viirya@apache.org" 30 | "3.3.2": "C56349D886F2B01F8CAE794C653C2301FEA493EE", 31 | # issuer "yumwang@apache.org" 32 | "3.3.3": "F6468A4FF8377B4F1C07BC2AA077F928A0BF68D8", 33 | # issuer "xinrong@apache.org" 34 | "3.4.0": "CC68B3D16FE33A766705160BA7E57908C7A4E1B1", 35 | # issuer "dongjoon@apache.org" 36 | "3.4.1": "F28C9C925C188C35E345614DEDA00CE834F0FC5C", 37 | # issuer "dongjoon@apache.org" 38 | "3.4.2": "F28C9C925C188C35E345614DEDA00CE834F0FC5C", 39 | # issuer "dongjoon@apache.org" 40 | "3.4.3": "F28C9C925C188C35E345614DEDA00CE834F0FC5C", 41 | # issuer "dongjoon@apache.org" 42 | "3.4.4": "F28C9C925C188C35E345614DEDA00CE834F0FC5C", 43 | # issuer "liyuanjian@apache.org" 44 | "3.5.0": "FC3AE3A7EAA1BAC98770840E7E1ABCC53AAA2216", 45 | # issuer "kabhwan@apache.org" 46 | "3.5.1": "FD3E84942E5E6106235A1D25BD356A9F8740E4FF", 47 | # issuer "yao@apache.org" 48 | "3.5.2": "D76E23B9F11B5BF6864613C4F7051850A0AF904D", 49 | # issuer "haejoon@apache.org" 50 | "3.5.3": "0A2D660358B6F6F8071FD16F6606986CF5A8447C", 51 | # issuer "yangjie01@apache.org" 52 | "3.5.4": "19F745C40A0E550420BB2C522541488DA93FE4B4", 53 | # issuer "dongjoon@apache.org" 54 | "3.5.5": "F28C9C925C188C35E345614DEDA00CE834F0FC5C", 55 | # issuer "gurwls223@apache.org" 56 | "3.5.6": "0FE4571297AB84440673665669600C8338F65970", 57 | # issuer "wenchen@apache.org" 58 | "4.0.0-preview1": "4DC9676CEF9A83E98FCA02784D6620843CD87F5A", 59 | # issuer "dongjoon@apache.org" 60 | "4.0.0-preview2": "F28C9C925C188C35E345614DEDA00CE834F0FC5C", 61 | # issuer "wenchen@apache.org" 62 | "4.0.0": "4DC9676CEF9A83E98FCA02784D6620843CD87F5A" 63 | } 64 | 65 | 66 | def parse_opts(): 67 | parser = ArgumentParser(prog="template") 68 | 69 | parser.add_argument( 70 | "-f", 71 | "--template-file", 72 | help="The Dockerfile template file path.", 73 | default="Dockerfile.template", 74 | ) 75 | 76 | parser.add_argument( 77 | "-v", 78 | "--spark-version", 79 | help="The Spark version of Dockerfile.", 80 | default="3.3.0", 81 | ) 82 | 83 | parser.add_argument( 84 | "-j", 85 | "--java-version", 86 | help="Java version of Dockerfile.", 87 | default="11", 88 | ) 89 | 90 | parser.add_argument( 91 | "-s", 92 | "--scala-version", 93 | help="The Spark version of Dockerfile.", 94 | default="2.12", 95 | ) 96 | 97 | parser.add_argument( 98 | "-i", 99 | "--image", 100 | help="The base image tag of Dockerfile.", 101 | default="eclipse-temurin:11-jre-focal", 102 | ) 103 | 104 | parser.add_argument( 105 | "-p", 106 | "--pyspark", 107 | action="store_true", 108 | help="Have PySpark support or not.", 109 | ) 110 | 111 | parser.add_argument( 112 | "-r", 113 | "--sparkr", 114 | action="store_true", 115 | help="Have SparkR support or not.", 116 | ) 117 | 118 | args, unknown = parser.parse_known_args() 119 | if unknown: 120 | parser.error("Unsupported arguments: %s" % " ".join(unknown)) 121 | return args 122 | 123 | 124 | def main(): 125 | opts = parse_opts() 126 | env = Environment(loader=FileSystemLoader("./")) 127 | template = env.get_template(opts.template_file) 128 | print( 129 | template.render( 130 | BASE_IMAGE=opts.image, 131 | HAVE_PY=opts.pyspark, 132 | HAVE_R=opts.sparkr, 133 | SPARK_VERSION=opts.spark_version, 134 | SPARK_GPG_KEY=GPG_KEY_DICT.get(opts.spark_version), 135 | JAVA_VERSION=opts.java_version, 136 | SCALA_VERSION=opts.scala_version, 137 | ) 138 | ) 139 | 140 | 141 | if __name__ == "__main__": 142 | main() 143 | --------------------------------------------------------------------------------