├── .gitignore ├── .travis.before.install.sh ├── .travis.release.images.sh ├── .travis.yml ├── Jenkinsfile ├── Makefile ├── Makefile.inc ├── README.md ├── change-yaml.sh ├── docs ├── functional-testing.md └── spark-version-update-process.md ├── hack ├── common.sh ├── compress.awk ├── lib │ ├── build │ │ ├── constants.sh │ │ ├── environment.sh │ │ └── rpm.sh │ ├── cleanup.sh │ ├── cmd.sh │ ├── init.sh │ ├── log │ │ ├── output.sh │ │ ├── stacktrace.sh │ │ └── system.sh │ ├── start.sh │ ├── test │ │ └── junit.sh │ └── util │ │ ├── docs.sh │ │ ├── ensure.sh │ │ ├── environment.sh │ │ ├── find.sh │ │ ├── golang.sh │ │ ├── misc.sh │ │ ├── text.sh │ │ └── trap.sh ├── test-cmd.sh ├── test-util.sh └── util.sh ├── image-inc.yaml ├── image.yaml ├── make-build-dir.sh ├── modules ├── common │ ├── added │ │ ├── conf │ │ │ ├── agent-config.yaml │ │ │ ├── agent.properties │ │ │ ├── log4j.properties │ │ │ ├── metrics.properties │ │ │ └── spark-defaults.conf │ │ └── scripts │ │ │ ├── entrypoint │ │ │ └── launch.sh │ ├── install │ └── module.yaml ├── metrics │ ├── added │ │ ├── agent-bond.jar │ │ └── jolokia-jvm-1.3.6-agent.jar │ ├── install │ └── module.yaml ├── s2i │ ├── added │ │ ├── assemble │ │ ├── s2i-env-vars │ │ └── usage │ ├── install │ └── module.yaml └── spark │ ├── added │ └── spark-entrypoint.sh │ ├── check_for_download │ ├── install │ └── module.yaml ├── openshift-spark-build-inc ├── Dockerfile └── modules │ ├── common │ ├── added │ │ ├── conf │ │ │ ├── agent-config.yaml │ │ │ ├── agent.properties │ │ │ ├── log4j.properties │ │ │ ├── metrics.properties │ │ │ └── spark-defaults.conf │ │ └── scripts │ │ │ ├── entrypoint │ │ │ └── launch.sh │ ├── install │ └── module.yaml │ ├── metrics │ ├── added │ │ ├── agent-bond.jar │ │ └── jolokia-jvm-1.3.6-agent.jar │ ├── install │ └── module.yaml │ └── s2i │ ├── added │ ├── assemble │ ├── s2i-env-vars │ └── usage │ ├── install │ └── module.yaml ├── openshift-spark-build ├── Dockerfile ├── modules │ ├── common │ │ ├── added │ │ │ ├── conf │ │ │ │ ├── agent-config.yaml │ │ │ │ ├── agent.properties │ │ │ │ ├── log4j.properties │ │ │ │ ├── metrics.properties │ │ │ │ └── spark-defaults.conf │ │ │ └── scripts │ │ │ │ ├── entrypoint │ │ │ │ └── launch.sh │ │ ├── install │ │ └── module.yaml │ ├── metrics │ │ ├── added │ │ │ ├── agent-bond.jar │ │ │ └── jolokia-jvm-1.3.6-agent.jar │ │ ├── install │ │ └── module.yaml │ ├── s2i │ │ ├── added │ │ │ ├── assemble │ │ │ ├── s2i-env-vars │ │ │ └── usage │ │ ├── install │ │ └── module.yaml │ └── spark │ │ ├── added │ │ └── spark-entrypoint.sh │ │ ├── check_for_download │ │ ├── install │ │ └── module.yaml └── spark-3.0.1-bin-hadoop3.2.tgz ├── spark-metrics-template.yaml ├── tag.sh ├── template.yaml ├── test ├── common.sh ├── completed │ ├── config-changes.sh │ ├── deploy.sh │ ├── deploy_jolokia.sh │ └── deploy_prometheus.sh ├── incomplete │ ├── app_fail.sh │ └── install_spark.sh ├── localcomplete.sh ├── prepare.sh ├── resources │ ├── config │ │ ├── log4j.properties │ │ └── spark-defaults.conf │ ├── test-configmap.yaml │ ├── test-spark-metrics-template.yaml │ └── test-template.yaml ├── run.sh └── sparkinputs.sh └── travis-check-pods.sh /.gitignore: -------------------------------------------------------------------------------- 1 | template.active 2 | target/ 3 | 4 | # Intellij 5 | .idea/ 6 | *.iml 7 | *.iws 8 | -------------------------------------------------------------------------------- /.travis.before.install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -xe 4 | 5 | main() { 6 | if [[ "${TRAVIS_JOB_NAME}" != "Push container images" ]] || \ 7 | [[ "${TRAVIS_BRANCH}" = "master" && "${TRAVIS_PULL_REQUEST}" = "false" ]] || \ 8 | [[ "${TRAVIS_TAG}" =~ ^[0-9]+\.[0-9]+\.[0-9]+-[0-9]+$ ]]; then 9 | pwd 10 | bash --version 11 | sudo apt-get install --only-upgrade bash 12 | bash --version 13 | ./test/prepare.sh 14 | else 15 | echo "[Before install] Not doing the ''./test/prepare.sh', because the tag '${TRAVIS_TAG}' is not of form x.y.z-n or we are not building the master branch" 16 | fi 17 | } 18 | 19 | main 20 | -------------------------------------------------------------------------------- /.travis.release.images.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -xe 4 | 5 | OWNER="${OWNER:-radanalyticsio}" 6 | IMAGES="${IMAGES:- 7 | openshift-spark 8 | openshift-spark-inc 9 | }" 10 | 11 | main() { 12 | if [[ "$TRAVIS_BRANCH" = "master" && "$TRAVIS_PULL_REQUEST" = "false" ]]; then 13 | echo "Squashing and pushing the :latest images to docker.io and quay.io" 14 | buildImages 15 | installDockerSquash 16 | loginDockerIo 17 | pushLatestImages "docker.io" 18 | loginQuayIo 19 | pushLatestImages "quay.io" 20 | elif [[ "${TRAVIS_TAG}" =~ ^[0-9]+\.[0-9]+\.[0-9]+-[0-9]+$ ]]; then 21 | echo "Squashing and pushing the '${TRAVIS_TAG}' images to docker.io and quay.io" 22 | buildImages 23 | installDockerSquash 24 | loginDockerIo 25 | pushReleaseImages "docker.io" 26 | loginQuayIo 27 | pushReleaseImages "quay.io" 28 | else 29 | echo "Not doing the docker push, because the tag '${TRAVIS_TAG}' is not of form x.y.z-n or we are not building the master branch" 30 | fi 31 | } 32 | 33 | buildImages() { 34 | BUILDER=docker make build 35 | BUILDER=docker make -f Makefile.inc build 36 | } 37 | 38 | loginDockerIo() { 39 | set +x 40 | docker login -u "$DOCKER_USERNAME" -p "$DOCKER_PASSWORD" 41 | set -x 42 | } 43 | 44 | loginQuayIo() { 45 | set +x 46 | docker login -u "$QUAY_USERNAME" -p "$QUAY_PASSWORD" quay.io 47 | set -x 48 | } 49 | 50 | installDockerSquash() { 51 | command -v docker-squash || pip install --user docker-squash 52 | } 53 | 54 | squashAndPush() { 55 | if [[ $# != 2 ]]; then 56 | echo "Usage: squashAndPush input_image output_image" && exit 57 | fi 58 | set +e 59 | local _in=$1 60 | local _out=$2 61 | 62 | local _layers_total=$(docker history -q $_in | wc -l) 63 | local _layers_to_keep=4 64 | 65 | if [[ ! "$_layers_total" =~ ^[0-9]+$ ]] || [[ "$_layers_total" -le "$_layers_to_keep" ]] ; then 66 | echo "error: _layers_total ('$_layers_total') is not a number or lower than or equal to $_layers_to_keep" >&2; return 67 | fi 68 | local _last_n=$[_layers_total - _layers_to_keep] 69 | 70 | echo "Squashing $_out (last $_last_n layers).." 71 | docker-squash -f $_last_n -t $_out $_in 72 | docker push $_out 73 | set -e 74 | } 75 | 76 | pushLatestImages() { 77 | if [[ $# != 1 ]]; then 78 | echo "Usage: pushLatestImages image_repo" && exit 79 | fi 80 | REPO="$1" 81 | 82 | for image in $IMAGES ; do 83 | squashAndPush $image "${REPO}/${OWNER}/${image}:latest" 84 | done 85 | } 86 | 87 | pushReleaseImages() { 88 | if [[ $# != 1 ]]; then 89 | echo "Usage: pushReleaseImages image_repo" && exit 90 | fi 91 | REPO="$1" 92 | 93 | for image in $IMAGES ; do 94 | local _fully_qualified_image="${REPO}/${OWNER}/${image}:${TRAVIS_TAG}" 95 | echo "Squashing $_fully_qualified_image.." 96 | 97 | squashAndPush $image $_fully_qualified_image 98 | 99 | # tag and push "x.y" image which acts as a "latest" for all major.minor.Z versions 100 | local _x_y_latest=`echo ${TRAVIS_TAG} | sed -r 's;([[:digit:]]+\.[[:digit:]]+).*;\1;'` 101 | docker tag $_fully_qualified_image ${REPO}/${OWNER}/${image}:${_x_y_latest} 102 | docker push ${REPO}/${OWNER}/${image}:${_x_y_latest} 103 | 104 | # tag and push also :latest image 105 | docker tag $_fully_qualified_image ${REPO}/${OWNER}/${image}:latest 106 | docker push ${REPO}/${OWNER}/${image}:latest 107 | done 108 | 109 | docker logout 110 | } 111 | 112 | main 113 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | dist: trusty 3 | language: go 4 | ## home folder is /home/travis/gopath/src/github.com/radanalyticsio/oshinko-cli 5 | services: 6 | - docker 7 | stages: 8 | - Openshift tests 9 | - deploy 10 | 11 | before_install: 12 | - ./.travis.before.install.sh 13 | 14 | env: 15 | global: OPENSHIFT_VERSION="v3.10" 16 | 17 | jobs: 18 | include: 19 | - stage: Openshift tests 20 | name: openshift-spark 21 | script: BUILDER=docker make test-e2e 22 | 23 | - name: openshift-spark-inc 24 | script: BUILDER=docker make -f Makefile.inc test-e2e 25 | 26 | - name: openshift-spark-comp 27 | script: BUILDER=docker make -f Makefile.inc test-e2e-completed 28 | 29 | - stage: deploy 30 | name: "Push container images" 31 | script: ./.travis.release.images.sh 32 | 33 | notifications: 34 | email: 35 | on_success: never 36 | on_failure: never 37 | -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env groovy 2 | 3 | // Used Jenkins plugins: 4 | // * Pipeline GitHub Notify Step Plugin 5 | // * Disable GitHub Multibranch Status Plugin 6 | // 7 | // $OCP_HOSTNAME -- hostname of running Openshift cluster 8 | // $OCP_USER -- Openshift user 9 | // $OCP_PASSWORD -- Openshift user's password 10 | 11 | node('radanalytics-test') { 12 | withEnv(["SPARK_TEST_EXTERNAL_REGISTRY=$EXTERNAL_DOCKER_REGISTRY", "SPARK_TEST_EXTERNAL_USER=$EXTERNAL_DOCKER_REGISTRY_USER", "SPARK_TEST_EXTERNAL_PASSWORD=$EXTERNAL_DOCKER_REGISTRY_PASSWORD", "KUBECONFIG=$WORKSPACE/client/kubeconfig", "PATH+OC_PATH=$WORKSPACE/client"]) { 13 | 14 | // generate build url 15 | def buildUrl = sh(script: 'curl https://url.corp.redhat.com/new?$BUILD_URL', returnStdout: true) 16 | 17 | stage('Test') { 18 | 19 | try { 20 | githubNotify(context: 'jenkins-ci/openshift-spark', description: 'This change is being tested', status: 'PENDING', targetUrl: buildUrl) 21 | } catch (err) { 22 | echo("Wasn't able to notify Github: ${err}") 23 | } 24 | 25 | try { 26 | // wipeout workspace 27 | deleteDir() 28 | 29 | dir('openshift-spark') { 30 | checkout scm 31 | } 32 | 33 | // download oc client 34 | dir('client') { 35 | sh('curl -LO https://github.com/openshift/origin/releases/download/v3.7.0/openshift-origin-client-tools-v3.7.0-7ed6862-linux-64bit.tar.gz') 36 | sh('curl -LO https://github.com/openshift/origin/releases/download/v3.7.0/openshift-origin-server-v3.7.0-7ed6862-linux-64bit.tar.gz') 37 | sh('tar -xzf openshift-origin-client-tools-v3.7.0-7ed6862-linux-64bit.tar.gz') 38 | sh('tar -xzf openshift-origin-server-v3.7.0-7ed6862-linux-64bit.tar.gz') 39 | sh('cp openshift-origin-client-tools-v3.7.0-7ed6862-linux-64bit/oc .') 40 | sh('cp openshift-origin-server-v3.7.0-7ed6862-linux-64bit/* .') 41 | } 42 | 43 | // login to openshift instance 44 | sh('oc login https://$OCP_HOSTNAME:8443 -u $OCP_USER -p $OCP_PASSWORD --insecure-skip-tls-verify=true') 45 | // let's start on a specific project, to prevent start on a random project which could be deleted in the meantime 46 | sh('oc project testsuite') 47 | 48 | // test 49 | dir('openshift-spark') { 50 | sh('make test-e2e | tee -a test.log && exit ${PIPESTATUS[0]}') 51 | } 52 | } catch (err) { 53 | try { 54 | githubNotify(context: 'jenkins-ci/openshift-spark', description: 'There are test failures', status: 'ERROR', targetUrl: buildUrl) 55 | } catch (errNotify) { 56 | echo("Wasn't able to notify Github: ${errNotify}") 57 | } 58 | throw err 59 | } finally { 60 | dir('openshift-spark') { 61 | archiveArtifacts(allowEmptyArchive: true, artifacts: 'test.log') 62 | } 63 | } 64 | 65 | try { 66 | githubNotify(context: 'jenkins-ci/openshift-spark', description: 'This change looks good', status: 'SUCCESS', targetUrl: buildUrl) 67 | } catch (err) { 68 | echo("Wasn't able to notify Github: ${err}") 69 | } 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | LOCAL_IMAGE ?= openshift-spark 2 | SPARK_IMAGE=radanalyticsio/openshift-spark 3 | DOCKERFILE_CONTEXT=openshift-spark-build 4 | BUILDER ?= podman 5 | 6 | # If you're pushing to an integrated registry 7 | # in Openshift, SPARK_IMAGE will look something like this 8 | 9 | # SPARK_IMAGE=172.30.242.71:5000/myproject/openshift-spark 10 | 11 | OPENSHIFT_SPARK_TEST_IMAGE ?= spark-testimage 12 | export OPENSHIFT_SPARK_TEST_IMAGE 13 | 14 | .PHONY: build clean push create destroy test-e2e clean-target clean-context zero-tarballs 15 | 16 | build: $(DOCKERFILE_CONTEXT) 17 | $(BUILDER) build -t $(LOCAL_IMAGE) $(DOCKERFILE_CONTEXT) 18 | 19 | clean: clean-context 20 | -$(BUILDER) rmi $(LOCAL_IMAGE) 21 | 22 | push: build 23 | $(BUILDER) tag $(LOCAL_IMAGE) $(SPARK_IMAGE) 24 | $(BUILDER) push $(SPARK_IMAGE) 25 | 26 | create: push template.yaml 27 | oc process -f template.yaml -v SPARK_IMAGE=$(SPARK_IMAGE) > template.active 28 | oc create -f template.active 29 | 30 | destroy: template.active 31 | oc delete -f template.active 32 | rm template.active 33 | 34 | clean-context: 35 | -rm -rf $(DOCKERFILE_CONTEXT)/* 36 | 37 | clean-target: 38 | -rm -rf target 39 | 40 | context: $(DOCKERFILE_CONTEXT) 41 | 42 | $(DOCKERFILE_CONTEXT): $(DOCKERFILE_CONTEXT)/Dockerfile \ 43 | $(DOCKERFILE_CONTEXT)/modules 44 | 45 | $(DOCKERFILE_CONTEXT)/Dockerfile $(DOCKERFILE_CONTEXT)/modules: 46 | cekit --descriptor image.yaml build --dry-run $(BUILDER) 47 | cp -R target/image/* $(DOCKERFILE_CONTEXT) 48 | 49 | zero-tarballs: 50 | find ./$(DOCKERFILE_CONTEXT) -name "*.tgz" -type f -exec truncate -s 0 {} \; 51 | find ./$(DOCKERFILE_CONTEXT) -name "*.tar.gz" -type f -exec truncate -s 0 {} \; 52 | 53 | test-e2e: 54 | LOCAL_IMAGE=$(OPENSHIFT_SPARK_TEST_IMAGE) make build 55 | test/run.sh completed/ 56 | -------------------------------------------------------------------------------- /Makefile.inc: -------------------------------------------------------------------------------- 1 | LOCAL_IMAGE ?= openshift-spark-inc 2 | SPARK_IMAGE=radanalyticsio/openshift-spark-inc 3 | BUILDER ?= podman 4 | 5 | DOCKERFILE_CONTEXT=openshift-spark-build-inc 6 | 7 | SPARK_TEST_IMAGE ?= spark-testimage-inc 8 | 9 | export SPARK_TEST_IMAGE 10 | 11 | .PHONY: build clean push create destroy test-e2e test-e2e-completed 12 | 13 | build: $(DOCKERFILE_CONTEXT) 14 | $(BUILDER) build -t $(LOCAL_IMAGE) $(DOCKERFILE_CONTEXT) 15 | 16 | push: build 17 | $(BUILDER) tag $(LOCAL_IMAGE) $(SPARK_IMAGE) 18 | $(BUILDER) push $(SPARK_IMAGE) 19 | 20 | clean: clean-context 21 | -$(BUILDER) rmi $(LOCAL_IMAGE) 22 | 23 | clean-target: 24 | -rm -rf target 25 | 26 | clean-context: 27 | -rm -rf $(DOCKERFILE_CONTEXT)/* 28 | 29 | context: $(DOCKERFILE_CONTEXT) 30 | 31 | $(DOCKERFILE_CONTEXT): $(DOCKERFILE_CONTEXT)/Dockerfile $(DOCKERFILE_CONTEXT)/modules 32 | 33 | $(DOCKERFILE_CONTEXT)/Dockerfile $(DOCKERFILE_CONTEXT)/modules: 34 | -mkdir -p $(DOCKERFILE_CONTEXT) 35 | cekit --descriptor image-inc.yaml build --dry-run $(BUILDER) 36 | cp -R target/image/* $(DOCKERFILE_CONTEXT) 37 | -rm $(DOCKERFILE_CONTEXT)/spark*.tgz 38 | 39 | zero-tarballs: 40 | find ./$(DOCKERFILE_CONTEXT) -name "*.tgz" -type f -exec truncate -s 0 {} \; 41 | find ./$(DOCKERFILE_CONTEXT) -name "*.tar.gz" -type f -exec truncate -s 0 {} \; 42 | 43 | test-e2e: 44 | test/sparkinputs.sh 45 | LOCAL_IMAGE=$(SPARK_TEST_IMAGE) make -f Makefile.inc build 46 | SPARK_TEST_IMAGE=$(SPARK_TEST_IMAGE) test/run.sh incomplete/ 47 | 48 | test-e2e-completed: 49 | test/sparkinputs.sh 50 | LOCAL_IMAGE=$(SPARK_TEST_IMAGE) make -f Makefile.inc build 51 | test/localcomplete.sh $(SPARK_TEST_IMAGE) spark-complete 52 | SPARK_TEST_IMAGE=spark-complete test/run.sh completed/ 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build status](https://travis-ci.org/radanalyticsio/openshift-spark.svg?branch=master)](https://travis-ci.org/radanalyticsio/openshift-spark) 2 | [![Docker build](https://img.shields.io/docker/automated/radanalyticsio/openshift-spark.svg)](https://hub.docker.com/r/radanalyticsio/openshift-spark) 3 | [![Layers info](https://images.microbadger.com/badges/image/radanalyticsio/openshift-spark.svg)](https://microbadger.com/images/radanalyticsio/openshift-spark) 4 | 5 | # Apache Spark images for OpenShift 6 | 7 | This repository contains several files for building 8 | [Apache Spark](https://spark.apache.org) focused container images, targeted 9 | for usage on [OpenShift Origin](https://openshift.org). 10 | 11 | By default, it will build the following images into your local Docker 12 | registry: 13 | 14 | * `openshift-spark`, Apache Spark, Python 3.6 15 | 16 | For Spark versions, please see the `image.yaml` file. 17 | 18 | # Instructions 19 | 20 | ## Build 21 | 22 | ### Prerequisites 23 | 24 | * `cekit` version 3.7.0 from the [cekit project](https://github.com/cekit/cekit) 25 | 26 | ### Procedure 27 | 28 | Create all images and save them in the local Docker registry. 29 | 30 | make 31 | 32 | ## Push 33 | 34 | Tag and push the images to the designated reference. 35 | 36 | make push SPARK_IMAGE=[REGISTRY_HOST[:REGISTRY_PORT]/]NAME[:TAG] 37 | 38 | ## Customization 39 | 40 | There are several ways to customize the construction and build process. This 41 | project uses the [GNU Make tool](https://www.gnu.org/software/make/) for 42 | the build workflow, see the `Makefile` for more information. For container 43 | specification and construction, the 44 | [Container Evolution Kit `cekit`](https://github.com/cekit/cekit) is 45 | used as the primary point of investigation, see the `image.yaml` file for 46 | more information. 47 | 48 | # Partial images without an Apache Spark distribution installed 49 | 50 | This repository also supports building 'incomplete' versions of 51 | the images which contain tooling for OpenShift but lack an actual 52 | Spark distribution. An s2i workflow can be used with these partial 53 | images to install a Spark distribution of a user's choosing. 54 | This gives users an alternative to checking out the repository 55 | and modifying build files if they want to run a custom 56 | Spark distribution. By default, the partial images built will be 57 | 58 | * `openshift-spark-inc`, Apache Spark, Python 3.6 59 | 60 | ## Build 61 | 62 | To build the partial images, use make with Makefile.inc 63 | 64 | make -f Makefile.inc 65 | 66 | ## Push 67 | 68 | Tag and push the images to the designated reference. 69 | 70 | make -f Makefile.inc push SPARK_IMAGE=[REGISTRY_HOST[:REGISTRY_PORT]/]NAME[:TAG] 71 | 72 | ## Image Completion 73 | 74 | To produce a final image, a source-to-image build must be performed which takes 75 | a Spark distribution as input. This can be done in OpenShift or locally using 76 | the [s2i tool](https://github.com/openshift/source-to-image) if it's installed. 77 | The final images created can be used just like the `openshfit-spark` image 78 | described above. 79 | 80 | ### Build inputs 81 | 82 | The OpenShift method can take either local files or a URL as build input. 83 | For the s2i method, local files are required. Here is an example which 84 | downloads an Apache Spark distribution to a local 'build-input' directory 85 | (including the sha512 file is optional). 86 | 87 | $ mkdir build-input 88 | $ wget https://archive.apache.org/dist/spark/spark-3.0.0/spark-3.0.0-bin-hadoop3.2.tgz -O build-input/spark-3.0.0-bin-hadoop3.2.tgz 89 | $ wget https://archive.apache.org/dist/spark/spark-3.0.0/spark-3.0.0-bin-hadoop3.2.tgz.sha512 -O build-input/spark-3.0.0-bin-hadoop3.2.tgz.sha512 90 | 91 | Optionally, your `build-input` directory may contain a `modify-spark` directory. The structure of this directory should be parallel to the structure 92 | of the top-level directory in the Spark distribution tarball. During the installation, the contents of this directory will be copied to the Spark 93 | installation using `rsync`, allowing you to add or overwrite files. To add `my.jar` to Spark, for example, put it in `build-input/modify-spark/jars/my.jar` 94 | 95 | ### Running the image completion 96 | 97 | To complete the image using the [s2i tool](https://github.com/openshift/source-to-image) 98 | 99 | $ s2i build build-input radanalyticsio/openshift-spark-inc openshift-spark 100 | 101 | To complete the image using OpenShift, for example: 102 | 103 | $ oc new-build --name=openshift-spark --docker-image=radanalyticsio/openshift-spark-inc --binary 104 | $ oc start-build openshift-spark --from-file=https://archive.apache.org/dist/spark/spark-3.0.0/spark-3.0.0-bin-hadoop3.2.tgz 105 | 106 | Note that the value of `--from-file` could also be the `build-input` directory from the s2i example above. 107 | 108 | This will write the completed image to an imagestream called `openshift-spark` in the current project 109 | 110 | # A 'usage' command for all images 111 | 112 | Note that all of the images described here will respond to a 'usage' command for reference. For example 113 | 114 | $ docker run --rm openshift-spark:latest usage 115 | -------------------------------------------------------------------------------- /change-yaml.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function usage() { 4 | echo 5 | 6 | echo "Changes the image.*.yaml file and adds it to the current commit (git add)" 7 | echo 8 | echo "Usage: change-yaml.sh [options] SPARK_VERSION" 9 | echo 10 | echo "required arguments" 11 | echo 12 | echo " SPARK_VERSION The spark version number, like 3.0.0" 13 | echo 14 | echo "optional arguments:" 15 | echo 16 | echo " -h Show this message" 17 | } 18 | 19 | # Set the hadoop version 20 | HVER=3.2 21 | 22 | while getopts h opt; do 23 | case $opt in 24 | h) 25 | usage 26 | exit 0 27 | ;; 28 | \?) 29 | echo "Invalid option: -$OPTARG" >&2 30 | exit 1 31 | ;; 32 | esac 33 | done 34 | 35 | shift "$((OPTIND-1))" 36 | 37 | if [ "$#" -lt 1 ]; then 38 | echo No spark version specified 39 | usage 40 | exit 1 41 | fi 42 | 43 | SPARK=$1 44 | 45 | # Extract the current spark version from the image.yaml file 46 | # Works by parsing the line following "name: sparkversion" 47 | VER=$(sed -n '\@name: sparkversion@!b;n;p' image.yaml | tr -d '[:space:]' | cut -d':' -f2) 48 | if [ "$VER" == "$SPARK" ]; then 49 | echo "Nothing to do, spark version in image.yaml is already $SPARK" 50 | exit 0 51 | fi 52 | 53 | # Change spark distro and download urls 54 | if [ ! -z ${SPARK+x} ]; then 55 | 56 | # TODO remove this download when sha512 support lands in upstream cekit (elmiko) 57 | if [ -f "/tmp/spark-${SPARK}-bin-hadoop${HVER}.tgz" ]; then 58 | echo 59 | echo Using existing "/tmp/spark-${SPARK}-bin-hadoop${HVER}.tgz", if this is not what you want delete it and run again 60 | echo 61 | else 62 | wget https://archive.apache.org/dist/spark/spark-${SPARK}/spark-${SPARK}-bin-hadoop${HVER}.tgz -O /tmp/spark-${SPARK}-bin-hadoop${HVER}.tgz 63 | if [ "$?" -ne 0 ]; then 64 | echo "Failed to download the specified version Spark archive" 65 | exit 1 66 | fi 67 | fi 68 | 69 | wget https://archive.apache.org/dist/spark/spark-${SPARK}/spark-${SPARK}-bin-hadoop${HVER}.tgz.sha512 -O /tmp/spark-${SPARK}-bin-hadoop${HVER}.tgz.sha512 70 | if [ "$?" -ne 0 ]; then 71 | echo "Failed to download the sha512 sum for the specified Spark version" 72 | exit 1 73 | fi 74 | 75 | # TODO remove this checksum calculation when sha512 support lands in upstream cekit (elmiko) 76 | calcsum=$(sha512sum /tmp/spark-${SPARK}-bin-hadoop${HVER}.tgz | cut -d" " -f1) 77 | sum=$(cat /tmp/spark-${SPARK}-bin-hadoop${HVER}.tgz.sha512 | tr -d [:space:] | cut -d: -f2 | tr [:upper:] [:lower:]) 78 | if [ "$calcsum" != "$sum" ]; then 79 | echo "Failed to confirm authenticity of Spark archive, checksum mismatch" 80 | echo "sha512sum : ${calcsum}" 81 | echo ".sha512 file: ${sum}" 82 | exit 1 83 | fi 84 | 85 | # Fix the url references 86 | sed -i "s@https://archive\.apache\.org/dist/spark/spark-.*/spark-.*-bin-hadoop.*\.tgz@https://archive\.apache\.org/dist/spark/spark-${SPARK}/spark-${SPARK}-bin-hadoop${HVER}\.tgz@" modules/spark/module.yaml 87 | 88 | # TODO replace this with sha512 when it lands in upstream cekit (elmiko) 89 | # Fix the md5 sum references on the line following the url 90 | calcsum=$(md5sum /tmp/spark-${SPARK}-bin-hadoop${HVER}.tgz | cut -d" " -f1) 91 | sed -i '\@url: https://archive.apache.org/dist/spark/@!b;n;s/md5.*/md5: '$calcsum'/' modules/spark/module.yaml 92 | 93 | # Fix the spark version label 94 | sed -i '\@name: sparkversion@!b;n;s/value.*/value: '$SPARK'/' image.yaml 95 | 96 | # Fix the image version value (do this for incomplete as well) 97 | V=$(echo $SPARK | cut -d'.' -f1,2) 98 | sed -i 's@^version:.*-latest$@version: '$V'-latest@' image*.yaml 99 | fi 100 | 101 | git add image.yaml 102 | -------------------------------------------------------------------------------- /docs/functional-testing.md: -------------------------------------------------------------------------------- 1 | # Functional testing 2 | 3 | This repository contains a set of end-to-end functional tests. These tests 4 | will create the images, deploy them, and run a few basic connectivity and 5 | application suites. 6 | 7 | These tests will run automatically on all proposed changes to the project 8 | repository, but it is often useful to run them locally to diagnose changes or 9 | hunt for bugs. Although the tests are automated, running them locally requires 10 | a very specific setup. These instructions will guide you through the process. 11 | 12 | ## Prerequisites 13 | 14 | * Access to an OpenShift cluster available. You will need to have basic access 15 | to a cluster with the ability to create new projects and objects within 16 | those projects. We recommend using a local deployment methodology for these 17 | tests, you can find more information about deploying OpenShift in 18 | [this upstream documentation](https://docs.okd.io/latest/getting_started/administrators.html). 19 | * Access to the `docker` tooling on the OpenShift cluster instance. The test 20 | tooling will create and push the images to a local container registry using 21 | `docker`. The test suite will need to build and push images, ensure that 22 | you have this access. 23 | * GNU `make` available. The tests are run through the `Makefile`, you will 24 | need this command to start the entire process. 25 | * Go language tooling available. As the tests will attempt to build certain 26 | Go specific applications, you will need to have the Go tooling installed on 27 | the machine where the tests will run. 28 | 29 | ## Procedure 30 | 31 | 1. Download the source code. You will need to clone this repository onto the 32 | host where the tests will run. 33 | 1. Login to OpenShift and create a new project. The test scripts will attempt 34 | to determine your project namespace, occasionally it is possible to have a 35 | login with no associated project. To avoid errors, create a project with 36 | any name or switch to a previously used project, the test suite will create 37 | a new project for its work. 38 | 1. Start the tests. Change directory to the root of the repository clone and 39 | run the make command, this will start the tests and you will see the output 40 | in your terminal. This command will run all the tests: 41 | ``` 42 | make test-e2e 43 | ``` 44 | 45 | ## Additional resources 46 | 47 | * [Makefile](/Makefile). This is where all the action starts, see the entry 48 | for the `test-e2e` target. 49 | * [test/run.sh](/test/run.sh). This script file is the primary entrypoint for 50 | all the test suites, you should examine this file to understand how the 51 | tests are structured and executed. 52 | -------------------------------------------------------------------------------- /docs/spark-version-update-process.md: -------------------------------------------------------------------------------- 1 | # Updating the base image Spark version 2 | 3 | This document describes the general workflow for updating the Apache Spark 4 | version present in the base image. This guide follows the process for 5 | installing the default binary archives as distributed by the 6 | [Spark project](https://spark.apache.org). 7 | 8 | ## prerequisites 9 | 10 | * shell access 11 | * an editor available 12 | * access to the `docker` command line tool and a registry (for testing) 13 | * [cekit](https://cekit.readthedocs.io/en/latest/) available 14 | 15 | ## procedure 16 | 17 | ### update the version numbers 18 | 19 | 1. update version and download link in `image.yaml` 20 | 1. update version in `image-inc.yaml` (to keep consistent versioning) 21 | 22 | There is a script name `change-yaml.sh` that will automate this process, 23 | invoke it by type the script name followed by the desired version. For 24 | example, if you were creating an update for version `3.0.0` of Spark, you 25 | would type the following: 26 | 27 | ``` 28 | ./change-yaml.sh 3.0.0 29 | ``` 30 | 31 | ### rebuild generated files 32 | 33 | 1. remove the generated cekit files for the previous version. 34 | ``` 35 | make clean-context 36 | make -f Makefile.inc clean-context 37 | ``` 38 | 1. generate the new cekit files. these will be the artifacts for image 39 | creation. 40 | ``` 41 | make context 42 | make -f Makefile.inc context 43 | ``` 44 | 1. zero the archive files. as these files are currently checked in to the 45 | repository it is important to zero out the archive files. they will be 46 | re-downloaded during the image constructions phase. 47 | ``` 48 | make zero-tarballs 49 | ``` 50 | 51 | This process is also captured in a script file named `make-build-dir.sh`, it 52 | automates the process of cleaning and then regenerating the cekit files 53 | and Spark binaries. The script requires no parameters and it will attempt to 54 | add the updated files to the current git staging process. 55 | 56 | At this point the files are ready for testing. You can create new images from 57 | the files available in the directory. You will want to check these files in 58 | to your working branch before testing. 59 | 60 | ## Build and test the images 61 | 62 | Build the images with the following command: 63 | 64 | ``` 65 | make build 66 | make -f Makefile.inc build 67 | ``` 68 | 69 | This will run an image build against the generated cekit files and store 70 | the image in the registry associated with your docker installation 71 | (usually localhost). 72 | 73 | The images are now ready for testing. 74 | -------------------------------------------------------------------------------- /hack/compress.awk: -------------------------------------------------------------------------------- 1 | # Helper functions 2 | function trim(s) { 3 | gsub(/^[ \t\r\n]+|[ \t\r\n]+$/, "", s); 4 | return s; 5 | } 6 | 7 | function printRecordAndCount(record, count) { 8 | print record; 9 | if (count > 1) { 10 | printf("... repeated %d times\n", count) 11 | } 12 | } 13 | 14 | BEGIN { 15 | # Before processing, set the record separator to the ASCII record separator character \x1e 16 | RS = "\x1e"; 17 | } 18 | 19 | # This action is executed for each record 20 | { 21 | # Build our current var from the trimmed record 22 | current = trim($0); 23 | 24 | # Bump the count of times we have seen it 25 | seen[current]++; 26 | 27 | # Print the previous record and its count (if it is not identical to the current record) 28 | if (previous && previous != current) { 29 | printRecordAndCount(previous, seen[previous]); 30 | } 31 | 32 | # Store the current record as the previous record 33 | previous = current; 34 | } 35 | 36 | END { 37 | # After processing, print the last record and count if it is non-empty 38 | if (previous) { 39 | printRecordAndCount(previous, seen[previous]); 40 | } 41 | } -------------------------------------------------------------------------------- /hack/lib/build/constants.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script provides constants for the Golang binary build process 4 | 5 | readonly OS_BUILD_ENV_GOLANG="${OS_BUILD_ENV_GOLANG:-1.7}" 6 | readonly OS_BUILD_ENV_IMAGE="${OS_BUILD_ENV_IMAGE:-openshift/origin-release:golang-${OS_BUILD_ENV_GOLANG}}" 7 | 8 | readonly OS_OUTPUT_SUBPATH="${OS_OUTPUT_SUBPATH:-_output/local}" 9 | readonly OS_OUTPUT="${OS_ROOT}/${OS_OUTPUT_SUBPATH}" 10 | readonly OS_LOCAL_RELEASEPATH="${OS_OUTPUT}/releases" 11 | readonly OS_OUTPUT_BINPATH="${OS_OUTPUT}/bin" 12 | readonly OS_OUTPUT_PKGDIR="${OS_OUTPUT}/pkgdir" 13 | 14 | readonly OS_GO_PACKAGE=github.com/openshift/origin 15 | 16 | readonly OS_SDN_COMPILE_TARGETS_LINUX=( 17 | pkg/sdn/plugin/sdn-cni-plugin 18 | vendor/github.com/containernetworking/cni/plugins/ipam/host-local 19 | vendor/github.com/containernetworking/cni/plugins/main/loopback 20 | ) 21 | readonly OS_IMAGE_COMPILE_TARGETS_LINUX=( 22 | images/pod 23 | cmd/dockerregistry 24 | cmd/gitserver 25 | "${OS_SDN_COMPILE_TARGETS_LINUX[@]}" 26 | ) 27 | readonly OS_SCRATCH_IMAGE_COMPILE_TARGETS_LINUX=( 28 | examples/hello-openshift 29 | examples/deployment 30 | ) 31 | readonly OS_IMAGE_COMPILE_BINARIES=("${OS_SCRATCH_IMAGE_COMPILE_TARGETS_LINUX[@]##*/}" "${OS_IMAGE_COMPILE_TARGETS_LINUX[@]##*/}") 32 | 33 | readonly OS_CROSS_COMPILE_TARGETS=( 34 | cmd/openshift 35 | cmd/oc 36 | ) 37 | readonly OS_CROSS_COMPILE_BINARIES=("${OS_CROSS_COMPILE_TARGETS[@]##*/}") 38 | 39 | readonly OS_TEST_TARGETS=( 40 | test/extended/extended.test 41 | ) 42 | 43 | #If you update this list, be sure to get the images/origin/Dockerfile 44 | readonly OPENSHIFT_BINARY_SYMLINKS=( 45 | openshift-router 46 | openshift-deploy 47 | openshift-recycle 48 | openshift-sti-build 49 | openshift-docker-build 50 | origin 51 | osc 52 | oadm 53 | osadm 54 | kubectl 55 | kubernetes 56 | kubelet 57 | kube-proxy 58 | kube-apiserver 59 | kube-controller-manager 60 | kube-scheduler 61 | ) 62 | readonly OPENSHIFT_BINARY_COPY=( 63 | oadm 64 | kubelet 65 | kube-proxy 66 | kube-apiserver 67 | kube-controller-manager 68 | kube-scheduler 69 | ) 70 | readonly OC_BINARY_COPY=( 71 | kubectl 72 | ) 73 | readonly OS_BINARY_RELEASE_CLIENT_WINDOWS=( 74 | oc.exe 75 | README.md 76 | ./LICENSE 77 | ) 78 | readonly OS_BINARY_RELEASE_CLIENT_MAC=( 79 | oc 80 | README.md 81 | ./LICENSE 82 | ) 83 | readonly OS_BINARY_RELEASE_CLIENT_LINUX=( 84 | ./oc 85 | ./README.md 86 | ./LICENSE 87 | ) 88 | readonly OS_BINARY_RELEASE_SERVER_LINUX=( 89 | './*' 90 | ) 91 | readonly OS_BINARY_RELEASE_CLIENT_EXTRA=( 92 | ${OS_ROOT}/README.md 93 | ${OS_ROOT}/LICENSE 94 | ) -------------------------------------------------------------------------------- /hack/lib/build/rpm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This library holds utilities for building RPMs from Origin. 4 | 5 | # os::build::rpm::generate_nevra_vars determines the NEVRA of the RPMs 6 | # that would be built from the current git state. 7 | # 8 | # Globals: 9 | # - OS_GIT_VERSION 10 | # Arguments: 11 | # - None 12 | # Exports: 13 | # - OS_RPM_NAME 14 | # - OS_RPM_VERSION 15 | # - OS_RPM_RELEASE 16 | # - OS_RPM_ARCHITECTURE 17 | function os::build::rpm::get_nvra_vars() { 18 | # the package name can be overwritten but is normally 'origin' 19 | OS_RPM_NAME="${OS_RPM_NAME:-"origin"}" 20 | OS_RPM_ARCHITECTURE="$(uname -i)" 21 | 22 | # we can extract the pacakge version from the build version 23 | os::build::get_version_vars 24 | if [[ "${OS_GIT_VERSION}" =~ ^v([0-9](\.[0-9]+)*)(.*) ]]; then 25 | OS_RPM_VERSION="${BASH_REMATCH[1]}" 26 | metadata="${BASH_REMATCH[3]}" 27 | else 28 | os::log::fatal "Malformed \$OS_GIT_VERSION: ${OS_GIT_VERSION}" 29 | fi 30 | 31 | # we can generate the package release from the git version metadata 32 | # OS_GIT_VERSION will always have metadata, but either contain 33 | # pre-release information _and_ build metadata, or only the latter 34 | # ex. 35 | # -alpha.0+shasums-123-dirty 36 | # -alpha.0+shasums-123 37 | # +shasums-123-dirty 38 | # +shasums-123 39 | if [[ "${metadata:0:1}" == "+" ]]; then 40 | # we only have build metadata, but need to massage it so 41 | # we can generate a valid RPM release from it 42 | if [[ "${metadata}" =~ ^\+([a-z0-9]{7})-([0-9]+)(-dirty)?$ ]]; then 43 | build_sha="${BASH_REMATCH[1]}" 44 | build_num="${BASH_REMATCH[2]}" 45 | else 46 | os::log::fatal "Malformed git version metadata: ${metadata}" 47 | fi 48 | OS_RPM_RELEASE="1.${build_num}.${build_sha}" 49 | elif [[ "${metadata:0:1}" == "-" ]]; then 50 | # we have both build metadata and pre-release info 51 | if [[ "${metadata}" =~ ^-([^\+]+)\+([a-z0-9]{7})-([0-9]+)(-dirty)?$ ]]; then 52 | pre_release="${BASH_REMATCH[1]}" 53 | build_sha="${BASH_REMATCH[2]}" 54 | build_num="${BASH_REMATCH[3]}" 55 | else 56 | os::log::fatal "Malformed git version metadata: ${metadata}" 57 | fi 58 | OS_RPM_RELEASE="0.${pre_release}.${build_num}.${build_sha}" 59 | else 60 | os::log::fatal "Malformed git version metadata: ${metadata}" 61 | fi 62 | 63 | export OS_RPM_NAME OS_RPM_VERSION OS_RPM_RELEASE OS_RPM_ARCHITECTURE 64 | } 65 | 66 | 67 | # os::build::rpm::format_nvra formats the rpm NVRA vars generated by 68 | # os::build::rpm::get_nvra_vars and will generate them if necessary 69 | # 70 | # Globals: 71 | # - OS_RPM_NAME 72 | # - OS_RPM_VERSION 73 | # - OS_RPM_RELEASE 74 | # - OS_RPM_ARCHITECTURE 75 | # Arguments: 76 | # None 77 | # Returns: 78 | # None 79 | function os::build::rpm::format_nvra() { 80 | if [[ -z "${OS_RPM_NAME:-}" || -z "${OS_RPM_VERSION:-}" || -z "${OS_RPM_RELEASE:-}" ]]; then 81 | os::build::rpm::get_nvra_vars 82 | fi 83 | 84 | echo "${OS_RPM_NAME}-${OS_RPM_VERSION}-${OS_RPM_RELEASE}.${OS_RPM_ARCHITECTURE}" 85 | } 86 | -------------------------------------------------------------------------------- /hack/lib/cleanup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This library holds functions that are used to clean up local 4 | # system state after other scripts have run. 5 | 6 | # os::cleanup::dump_etcd dumps the full contents of etcd to a file. 7 | # 8 | # Globals: 9 | # ARTIFACT_DIR 10 | # Arguments: 11 | # None 12 | # Returns: 13 | # None 14 | function os::cleanup::dump_etcd() { 15 | os::log::info "Dumping etcd contents to ${ARTIFACT_DIR}/etcd_dump.json" 16 | os::util::curl_etcd "/v2/keys/?recursive=true" > "${ARTIFACT_DIR}/etcd_dump.json" 17 | } -------------------------------------------------------------------------------- /hack/lib/init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script is meant to be the entrypoint for OpenShift Bash scripts to import all of the support 4 | # libraries at once in order to make Bash script preambles as minimal as possible. This script recur- 5 | # sively `source`s *.sh files in this directory tree. As such, no files should be `source`ed outside 6 | # of this script to ensure that we do not attempt to overwrite read-only variables. 7 | 8 | set -o errexit 9 | set -o nounset 10 | set -o pipefail 11 | 12 | # os::util::absolute_path returns the absolute path to the directory provided 13 | function os::util::absolute_path() { 14 | local relative_path="$1" 15 | local absolute_path 16 | 17 | pushd "${relative_path}" >/dev/null 18 | relative_path="$( pwd )" 19 | if [[ -h "${relative_path}" ]]; then 20 | absolute_path="$( readlink "${relative_path}" )" 21 | else 22 | absolute_path="${relative_path}" 23 | fi 24 | popd >/dev/null 25 | 26 | echo "${absolute_path}" 27 | } 28 | readonly -f os::util::absolute_path 29 | 30 | # find the absolute path to the root of the Origin source tree 31 | init_source="$( dirname "${BASH_SOURCE}" )/../.." 32 | OS_ROOT="$( os::util::absolute_path "${init_source}" )" 33 | export OS_ROOT 34 | cd "${OS_ROOT}" 35 | 36 | library_files=( $( find "${OS_ROOT}/hack/lib" -type f -name '*.sh' -not -path '*/hack/lib/init.sh' ) ) 37 | echo $library_files 38 | # TODO(skuzmets): Move the contents of the following files into respective library files. 39 | library_files+=( "${OS_ROOT}/hack/common.sh" ) 40 | library_files+=( "${OS_ROOT}/hack/util.sh" ) 41 | 42 | for library_file in "${library_files[@]}"; do 43 | source "${library_file}" 44 | done 45 | 46 | unset library_files library_file init_source 47 | 48 | # all of our Bash scripts need to have the stacktrace 49 | # handler installed to deal with errors 50 | os::log::stacktrace::install 51 | 52 | # All of our Bash scripts need to have access to the 53 | # binaries that we build so we don't have to find 54 | # them before every invocation. 55 | os::util::environment::update_path_var -------------------------------------------------------------------------------- /hack/lib/log/output.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This file contains functions used for writing log messages 4 | # to stdout and stderr from scripts while they run. 5 | 6 | # os::log::info writes the message to stdout. 7 | # 8 | # Arguments: 9 | # - all: message to write 10 | function os::log::info() { 11 | os::log::internal::prefix_lines "[INFO]" "$*" 12 | } 13 | readonly -f os::log::info 14 | 15 | # os::log::warn writes the message to stderr. 16 | # A warning indicates something went wrong but 17 | # not so wrong that we cannot recover. 18 | # 19 | # Arguments: 20 | # - all: message to write 21 | function os::log::warn() { 22 | os::text::print_yellow "$( os::log::internal::prefix_lines "[WARNING]" "$*" )" 1>&2 23 | } 24 | readonly -f os::log::warn 25 | 26 | # os::log::error writes the message to stderr. 27 | # An error indicates that something went wrong 28 | # and we will most likely fail after this. 29 | # 30 | # Arguments: 31 | # - all: message to write 32 | function os::log::error() { 33 | os::text::print_red "$( os::log::internal::prefix_lines "[ERROR]" "$*" )" 1>&2 34 | } 35 | readonly -f os::log::error 36 | 37 | # os::log::fatal writes the message to stderr and 38 | # returns a non-zero code to force a process exit. 39 | # A fatal error indicates that there is no chance 40 | # of recovery. 41 | # 42 | # Arguments: 43 | # - all: message to write 44 | function os::log::fatal() { 45 | os::text::print_red "$( os::log::internal::prefix_lines "[FATAL]" "$*" )" 1>&2 46 | exit 1 47 | } 48 | readonly -f os::log::fatal 49 | 50 | # os::log::debug writes the message to stderr if 51 | # the ${OS_DEBUG} variable is set. 52 | # 53 | # Arguments: 54 | # - all: message to write 55 | function os::log::debug() { 56 | if [[ -n "${OS_DEBUG:-}" ]]; then 57 | os::text::print_blue "$( os::log::internal::prefix_lines "[DEBUG]" "$*" )" 1>&2 58 | fi 59 | } 60 | readonly -f os::log::debug 61 | 62 | # os::log::internal::prefix_lines prints out the 63 | # original content with the given prefix at the 64 | # start of every line. 65 | # 66 | # Arguments: 67 | # - 1: prefix for lines 68 | # - 2: content to prefix 69 | function os::log::internal::prefix_lines() { 70 | local prefix="$1" 71 | local content="$2" 72 | 73 | local old_ifs="${IFS}" 74 | IFS=$'\n' 75 | for line in ${content}; do 76 | echo "${prefix} ${line}" 77 | done 78 | IFS="${old_ifs}" 79 | } -------------------------------------------------------------------------------- /hack/lib/log/stacktrace.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This library contains an implementation of a stack trace for Bash scripts. 4 | 5 | # os::log::stacktrace::install installs the stacktrace as a handler for the ERR signal if one 6 | # has not already been installed and sets `set -o errtrace` in order to propagate the handler 7 | # If the ERR trap is not initialized, installing this plugin will initialize it. 8 | # 9 | # Globals: 10 | # None 11 | # Arguments: 12 | # None 13 | # Returns: 14 | # - export OS_USE_STACKTRACE 15 | function os::log::stacktrace::install() { 16 | # setting 'errtrace' propagates our ERR handler to functions, expansions and subshells 17 | set -o errtrace 18 | 19 | # OS_USE_STACKTRACE is read by os::util::trap at runtime to request a stacktrace 20 | export OS_USE_STACKTRACE=true 21 | 22 | os::util::trap::init_err 23 | } 24 | readonly -f os::log::stacktrace::install 25 | 26 | # os::log::stacktrace::print prints the stacktrace and exits with the return code from the script that 27 | # called for a stack trace. This function will always return 0 if it is not handling the signal, and if it 28 | # is handling the signal, this function will always `exit`, not return, the return code it receives as 29 | # its first argument. 30 | # 31 | # Globals: 32 | # - BASH_SOURCE 33 | # - BASH_LINENO 34 | # - FUNCNAME 35 | # Arguments: 36 | # - 1: the return code of the command in the script that generated the ERR signal 37 | # - 2: the last command that ran before handlers were invoked 38 | # - 3: whether or not `set -o errexit` was set in the script that generated the ERR signal 39 | # Returns: 40 | # None 41 | function os::log::stacktrace::print() { 42 | local return_code=$1 43 | local last_command=$2 44 | local errexit_set=${3:-} 45 | 46 | if [[ "${return_code}" = "0" ]]; then 47 | # we're not supposed to respond when no error has occurred 48 | return 0 49 | fi 50 | 51 | if [[ -z "${errexit_set}" ]]; then 52 | # if errexit wasn't set in the shell when the ERR signal was issued, then we can ignore the signal 53 | # as this is not cause for failure 54 | return 0 55 | fi 56 | 57 | # iterate backwards through the stack until we leave library files, so we can be sure we start logging 58 | # actual script code and not this handler's call 59 | local stack_begin_index 60 | for (( stack_begin_index = 0; stack_begin_index < ${#BASH_SOURCE[@]}; stack_begin_index++ )); do 61 | if [[ ! "${BASH_SOURCE[${stack_begin_index}]}" =~ hack/lib/(log/stacktrace|util/trap)\.sh ]]; then 62 | break 63 | fi 64 | done 65 | 66 | local preamble_finished 67 | local stack_index=1 68 | local i 69 | for (( i = stack_begin_index; i < ${#BASH_SOURCE[@]}; i++ )); do 70 | local bash_source 71 | bash_source="$( os::util::repository_relative_path "${BASH_SOURCE[$i]}" )" 72 | if [[ -z "${preamble_finished:-}" ]]; then 73 | preamble_finished=true 74 | os::log::error "PID ${BASHPID:-$$}: ${bash_source}:${BASH_LINENO[$i-1]}: \`${last_command}\` exited with status ${return_code}." >&2 75 | os::log::info $'\t\t'"Stack Trace: " >&2 76 | os::log::info $'\t\t'" ${stack_index}: ${bash_source}:${BASH_LINENO[$i-1]}: \`${last_command}\`" >&2 77 | else 78 | os::log::info $'\t\t'" ${stack_index}: ${bash_source}:${BASH_LINENO[$i-1]}: ${FUNCNAME[$i-1]}" >&2 79 | fi 80 | stack_index=$(( stack_index + 1 )) 81 | done 82 | 83 | # we know we're the privileged handler in this chain, so we can safely exit the shell without 84 | # starving another handler of the privilege of reacting to this signal 85 | os::log::info " Exiting with code ${return_code}." >&2 86 | exit "${return_code}" 87 | } 88 | readonly -f os::log::stacktrace::print 89 | -------------------------------------------------------------------------------- /hack/lib/util/docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This library holds utility functions related to the generation 4 | # of manpages and docs. 5 | 6 | 7 | function generate_manual_pages() { 8 | local dest="$1" 9 | local cmdName="$2" 10 | local filestore=".files_generated_${cmdName}" 11 | local skipprefix="${3:-}" 12 | 13 | os::util::environment::setup_tmpdir_vars generate/manuals 14 | 15 | # We do this in a tmpdir in case the dest has other non-autogenned files 16 | # We don't want to include them in the list of gen'd files 17 | local tmpdir="${BASETMPDIR}/gen_man" 18 | mkdir -p "${tmpdir}" 19 | # generate the new files 20 | genman "${tmpdir}" "${cmdName}" 21 | # create the list of generated files 22 | ls "${tmpdir}" | LC_ALL=C sort > "${tmpdir}/${filestore}" 23 | 24 | # remove all old generated file from the destination 25 | while read file; do 26 | if [[ -e "${tmpdir}/${file}" && -n "${skipprefix}" ]]; then 27 | local original generated 28 | original=$(grep -v "^${skipprefix}" "${dest}/${file}") || : 29 | generated=$(grep -v "^${skipprefix}" "${tmpdir}/${file}") || : 30 | if [[ "${original}" == "${generated}" ]]; then 31 | # overwrite generated with original. 32 | mv "${dest}/${file}" "${tmpdir}/${file}" 33 | fi 34 | else 35 | rm "${dest}/${file}" || true 36 | fi 37 | done <"${dest}/${filestore}" 38 | 39 | # put the new generated file into the destination 40 | find "${tmpdir}" -exec rsync -pt {} "${dest}" \; >/dev/null 41 | #cleanup 42 | rm -rf "${tmpdir}" 43 | 44 | echo "Assets generated in ${dest}" 45 | } 46 | readonly -f generate_manual_pages 47 | 48 | function generate_documentation() { 49 | local dest="$1" 50 | local skipprefix="${1:-}" 51 | 52 | os::util::environment::setup_tmpdir_vars generate/docs 53 | 54 | # We do this in a tmpdir in case the dest has other non-autogenned files 55 | # We don't want to include them in the list of gen'd files 56 | local tmpdir="${BASETMPDIR}/gen_doc" 57 | mkdir -p "${tmpdir}" 58 | # generate the new files 59 | gendocs "${tmpdir}" 60 | # create the list of generated files 61 | ls "${tmpdir}" | LC_ALL=C sort > "${tmpdir}/.files_generated" 62 | 63 | # remove all old generated file from the destination 64 | while read file; do 65 | if [[ -e "${tmpdir}/${file}" && -n "${skipprefix}" ]]; then 66 | local original generated 67 | original=$(grep -v "^${skipprefix}" "${dest}/${file}") || : 68 | generated=$(grep -v "^${skipprefix}" "${tmpdir}/${file}") || : 69 | if [[ "${original}" == "${generated}" ]]; then 70 | # overwrite generated with original. 71 | mv "${dest}/${file}" "${tmpdir}/${file}" 72 | fi 73 | else 74 | rm "${dest}/${file}" || true 75 | fi 76 | done <"${dest}/.files_generated" 77 | 78 | # put the new generated file into the destination 79 | find "${tmpdir}" -exec rsync -pt {} "${dest}" \; >/dev/null 80 | #cleanup 81 | rm -rf "${tmpdir}" 82 | 83 | echo "Assets generated in ${dest}" 84 | } 85 | readonly -f generate_documentation 86 | 87 | # os::util::gen-docs generates docs and manpages for the all the binaries 88 | # created for Origin. 89 | function os::util::gen-docs() { 90 | os::util::ensure::built_binary_exists 'gendocs' 91 | os::util::ensure::built_binary_exists 'genman' 92 | 93 | OUTPUT_DIR_REL=${1:-""} 94 | OUTPUT_DIR="${OS_ROOT}/${OUTPUT_DIR_REL}/docs/generated" 95 | MAN_OUTPUT_DIR="${OS_ROOT}/${OUTPUT_DIR_REL}/docs/man/man1" 96 | 97 | mkdir -p "${OUTPUT_DIR}" 98 | mkdir -p "${MAN_OUTPUT_DIR}" 99 | 100 | generate_documentation "${OUTPUT_DIR}" 101 | generate_manual_pages "${MAN_OUTPUT_DIR}" "oc" 102 | generate_manual_pages "${MAN_OUTPUT_DIR}" "openshift" 103 | generate_manual_pages "${MAN_OUTPUT_DIR}" "oadm" 104 | } 105 | readonly -f os::util::gen-docs 106 | 107 | # os::util::set-man-placeholder puts a placeholder for every generated manpage. 108 | function os::util::set-man-placeholder() { 109 | MAN_OUTPUT_DIR="$1" 110 | declare -A generated_files=( ["${1}/.files_generated_oadm"]="${1}/.files_generated_oadm" 111 | ["${1}/.files_generated_oc"]="${1}/.files_generated_oc" 112 | ["${1}/.files_generated_openshift"]="${1}/.files_generated_openshift" ) 113 | 114 | # remove all of the old manpages; we don't want to check them in. 115 | for generated_file in "${generated_files[@]}"; do 116 | while read file; do 117 | generated_man="$MAN_OUTPUT_DIR/${file}" 118 | if [[ "${generated_man}" != "${generated_files[$generated_file]}" ]]; then 119 | cp "${OS_ROOT}/hack/autogenerated_placeholder.txt" "${generated_man}" 120 | fi 121 | done <"${generated_file}" 122 | done 123 | } 124 | readonly -f os::util::set-man-placeholder 125 | 126 | # os::util::set-docs-placeholder puts a placeholder for every generated doc. 127 | function os::util::set-docs-placeholder() { 128 | OUTPUT_DIR="$1" 129 | local generated_file="${OUTPUT_DIR}/.files_generated" 130 | 131 | # remove all of the old docs; we don't want to check them in. 132 | while read file; do 133 | generated_doc="$OUTPUT_DIR/${file}" 134 | if [[ "${generated_doc}" != "${generated_file}" ]]; then 135 | cp "${OS_ROOT}/hack/autogenerated_placeholder.txt" "${generated_doc}" 136 | fi 137 | done <"${generated_file}" 138 | } 139 | readonly -f os::util::set-docs-placeholder 140 | -------------------------------------------------------------------------------- /hack/lib/util/ensure.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script contains helper functions for ensuring that dependencies 4 | # exist on a host system that are required to run Origin scripts. 5 | 6 | # os::util::ensure::system_binary_exists ensures that the 7 | # given binary exists on the system in the $PATH. 8 | # 9 | # Globals: 10 | # None 11 | # Arguments: 12 | # - 1: binary to search for 13 | # Returns: 14 | # None 15 | function os::util::ensure::system_binary_exists() { 16 | local binary="$1" 17 | 18 | if ! os::util::find::system_binary "${binary}" >/dev/null 2>&1; then 19 | os::log::fatal "Required \`${binary}\` binary was not found in \$PATH." 20 | fi 21 | } 22 | readonly -f os::util::ensure::system_binary_exists 23 | 24 | # os::util::ensure::built_binary_exists ensures that the 25 | # given binary exists on the system in the local output 26 | # directory for the current platform. If it doesn't, we 27 | # will attempt to build it if we can determine the correct 28 | # hack/build-go.sh target for the binary. 29 | # 30 | # This function will attempt to determine the correct 31 | # hack/build-go.sh target for the binary, but may not 32 | # be able to do so if the target doesn't live under 33 | # cmd/ or tools/. In that case, one should be given. 34 | # 35 | # Globals: 36 | # - OS_ROOT 37 | # Arguments: 38 | # - 1: binary to search for 39 | # - 2: optional build target for this binary 40 | # Returns: 41 | # None 42 | function os::util::ensure::built_binary_exists() { 43 | local binary="$1" 44 | local target="${2:-}" 45 | 46 | if ! os::util::find::built_binary "${binary}" >/dev/null 2>&1; then 47 | if [[ -z "${target}" ]]; then 48 | if [[ -d "${OS_ROOT}/cmd/${binary}" ]]; then 49 | target="cmd/${binary}" 50 | elif [[ -d "${OS_ROOT}/tools/${binary}" ]]; then 51 | target="tools/${binary}" 52 | elif [[ -d "${OS_ROOT}/tools/rebasehelpers/${binary}" ]]; then 53 | target="tools/rebasehelpers/${binary}" 54 | fi 55 | fi 56 | 57 | if [[ -n "${target}" ]]; then 58 | os::log::warn "No compiled \`${binary}\` binary was found. Attempting to build one using: 59 | $ hack/build-go.sh ${target}" 60 | "${OS_ROOT}/hack/build-go.sh" "${target}" 61 | else 62 | os::log::fatal "No compiled \`${binary}\` binary was found and no build target could be determined. 63 | Provide the binary and try running $0 again." 64 | fi 65 | fi 66 | } 67 | readonly -f os::util::ensure::built_binary_exists 68 | 69 | # os::util::ensure::gopath_binary_exists ensures that the 70 | # given binary exists on the system in $GOPATH. 71 | # 72 | # Globals: 73 | # - GOPATH 74 | # Arguments: 75 | # - 1: binary to search for 76 | # Returns: 77 | # None 78 | function os::util::ensure::gopath_binary_exists() { 79 | local binary="$1" 80 | 81 | if ! os::util::find::gopath_binary "${binary}" >/dev/null 2>&1; then 82 | os::log::fatal "Required \`${binary}\` binary was not found in \$GOPATH." 83 | fi 84 | } 85 | readonly -f os::util::ensure::gopath_binary_exists 86 | 87 | # os::util::ensure::iptables_privileges_exist tests if the 88 | # testing machine has iptables available and in PATH. Also 89 | # tests that the user can list iptables rules, trying with 90 | # `sudo` if it fails without. 91 | # 92 | # Globals: 93 | # None 94 | # Arguments: 95 | # None 96 | # Returns: 97 | # None 98 | function os::util::ensure::iptables_privileges_exist() { 99 | os::util::ensure::system_binary_exists 'iptables' 100 | 101 | if ! iptables --list >/dev/null 2>&1 && ! sudo iptables --list >/dev/null 2>&1; then 102 | os::log::fatal "You do not have \`iptables\` or \`sudo\` privileges. Kubernetes services will not work 103 | without \`iptables\` access. See https://github.com/kubernetes/kubernetes/issues/1859." 104 | fi 105 | } 106 | readonly -f os::util::ensure::iptables_privileges_exist -------------------------------------------------------------------------------- /hack/lib/util/find.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script contains helper functions for finding components 4 | # in the Origin repository or on the host machine running scripts. 5 | 6 | # os::util::find::system_binary determines the absolute path to a 7 | # system binary, if it exists. 8 | # 9 | # Globals: 10 | # None 11 | # Arguments: 12 | # - 1: binary name 13 | # Returns: 14 | # - location of the binary 15 | function os::util::find::system_binary() { 16 | local binary_name="$1" 17 | 18 | command -v "${binary_name}" 19 | } 20 | readonly -f os::util::find::system_binary 21 | 22 | # os::util::find::built_binary determines the absolute path to a 23 | # built binary for the current platform, if it exists. 24 | # 25 | # Globals: 26 | # - OS_OUTPUT_BINPATH 27 | # Arguments: 28 | # - 1: binary name 29 | # Returns: 30 | # - location of the binary 31 | function os::util::find::built_binary() { 32 | local binary_name="$1" 33 | 34 | local binary_path; binary_path="${OS_OUTPUT_BINPATH}/$( os::build::host_platform )/${binary_name}" 35 | # we need to check that the path leads to a file 36 | # as directories also have the executable bit set 37 | if [[ -f "${binary_path}" && -x "${binary_path}" ]]; then 38 | echo "${binary_path}" 39 | return 0 40 | else 41 | return 1 42 | fi 43 | } 44 | readonly -f os::util::find::built_binary 45 | 46 | # os::util::find::gopath_binary determines the absolute path to a 47 | # binary installed through the go toolchain, if it exists. 48 | # 49 | # Globals: 50 | # - GOPATH 51 | # Arguments: 52 | # - 1: binary name 53 | # Returns: 54 | # - location of the binary 55 | function os::util::find::gopath_binary() { 56 | local binary_name="$1" 57 | 58 | local old_ifs="${IFS}" 59 | IFS=":" 60 | for part in ${GOPATH}; do 61 | local binary_path="${part}/bin/${binary_name}" 62 | # we need to check that the path leads to a file 63 | # as directories also have the executable bit set 64 | if [[ -f "${binary_path}" && -x "${binary_path}" ]]; then 65 | echo "${binary_path}" 66 | IFS="${old_ifs}" 67 | return 0 68 | fi 69 | done 70 | IFS="${old_ifs}" 71 | return 1 72 | } 73 | readonly -f os::util::find::gopath_binary -------------------------------------------------------------------------------- /hack/lib/util/golang.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This library holds golang related utility functions. 4 | 5 | # os::golang::verify_go_version ensure the go tool exists and is a viable version. 6 | function os::golang::verify_go_version() { 7 | os::util::ensure::system_binary_exists 'go' 8 | 9 | local go_version 10 | go_version=($(go version)) 11 | if [[ "${go_version[2]}" != go1.7* ]]; then 12 | os::log::info "Detected go version: ${go_version[*]}." 13 | if [[ -z "${PERMISSIVE_GO:-}" ]]; then 14 | os::log::error "Please install Go version 1.7 or use PERMISSIVE_GO=y to bypass this check." 15 | return 1 16 | else 17 | os::log::warn "Detected golang version doesn't match preferred Go version for Origin." 18 | os::log::warn "This version mismatch could lead to differences in execution between this run and the Origin CI systems." 19 | return 0 20 | fi 21 | fi 22 | } 23 | readonly -f os::golang::verify_go_version 24 | -------------------------------------------------------------------------------- /hack/lib/util/misc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This library holds miscellaneous utility functions. If there begin to be groups of functions in this 4 | # file that share intent or are thematically similar, they should be split into their own files. 5 | 6 | # os::util::describe_return_code describes an exit code 7 | # 8 | # Globals: 9 | # - OS_SCRIPT_START_TIME 10 | # Arguments: 11 | # - 1: exit code to describe 12 | # Returns: 13 | # None 14 | function os::util::describe_return_code() { 15 | local return_code=$1 16 | 17 | if [[ "${return_code}" = "0" ]]; then 18 | echo -n "[INFO] $0 succeeded " 19 | else 20 | echo -n "[ERROR] $0 failed " 21 | fi 22 | 23 | if [[ -n "${OS_SCRIPT_START_TIME:-}" ]]; then 24 | local end_time 25 | end_time="$(date +%s)" 26 | local elapsed_time 27 | elapsed_time="$(( end_time - OS_SCRIPT_START_TIME ))" 28 | local formatted_time 29 | formatted_time="$( os::util::format_seconds "${elapsed_time}" )" 30 | echo "after ${formatted_time}" 31 | else 32 | echo 33 | fi 34 | } 35 | readonly -f os::util::describe_return_code 36 | 37 | # os::util::install_describe_return_code installs the return code describer for the EXIT trap 38 | # If the EXIT trap is not initialized, installing this plugin will initialize it. 39 | # 40 | # Globals: 41 | # None 42 | # Arguments: 43 | # None 44 | # Returns: 45 | # - export OS_DESCRIBE_RETURN_CODE 46 | # - export OS_SCRIPT_START_TIME 47 | function os::util::install_describe_return_code() { 48 | export OS_DESCRIBE_RETURN_CODE="true" 49 | OS_SCRIPT_START_TIME="$( date +%s )"; export OS_SCRIPT_START_TIME 50 | os::util::trap::init_exit 51 | } 52 | readonly -f os::util::install_describe_return_code 53 | 54 | # OS_ORIGINAL_WD is the original working directory the script sourcing this utility file was called 55 | # from. This is an important directory as if $0 is a relative path, we cannot use the following path 56 | # utility without knowing from where $0 is relative. 57 | if [[ -z "${OS_ORIGINAL_WD:-}" ]]; then 58 | # since this could be sourced in a context where the utilities are already loaded, 59 | # we want to ensure that this is re-entrant, so we only set $OS_ORIGINAL_WD if it 60 | # is not set already 61 | OS_ORIGINAL_WD="$( pwd )" 62 | readonly OS_ORIGINAL_WD 63 | export OS_ORIGINAL_WD 64 | fi 65 | 66 | # os::util::repository_relative_path returns the relative path from the $OS_ROOT directory to the 67 | # given file, if the file is inside of the $OS_ROOT directory. If the file is outside of $OS_ROOT, 68 | # this function will return the absolute path to the file 69 | # 70 | # Globals: 71 | # - OS_ROOT 72 | # Arguments: 73 | # - 1: the path to relativize 74 | # Returns: 75 | # None 76 | function os::util::repository_relative_path() { 77 | local filename=$1 78 | local directory; directory="$( dirname "${filename}" )" 79 | filename="$( basename "${filename}" )" 80 | 81 | if [[ "${directory}" != "${OS_ROOT}"* ]]; then 82 | pushd "${OS_ORIGINAL_WD}" >/dev/null 2>&1 83 | directory="$( os::util::absolute_path "${directory}" )" 84 | popd >/dev/null 2>&1 85 | fi 86 | 87 | directory="${directory##*${OS_ROOT}/}" 88 | 89 | echo "${directory}/${filename}" 90 | } 91 | readonly -f os::util::repository_relative_path 92 | 93 | # os::util::format_seconds formats a duration of time in seconds to print in HHh MMm SSs 94 | # 95 | # Globals: 96 | # None 97 | # Arguments: 98 | # - 1: time in seconds to format 99 | # Return: 100 | # None 101 | function os::util::format_seconds() { 102 | local raw_seconds=$1 103 | 104 | local hours minutes seconds 105 | (( hours=raw_seconds/3600 )) 106 | (( minutes=(raw_seconds%3600)/60 )) 107 | (( seconds=raw_seconds%60 )) 108 | 109 | printf '%02dh %02dm %02ds' "${hours}" "${minutes}" "${seconds}" 110 | } 111 | readonly -f os::util::format_seconds 112 | 113 | # os::util::sed attempts to make our Bash scripts agnostic to the platform 114 | # on which they run `sed` by glossing over a discrepancy in flag use in GNU. 115 | # 116 | # Globals: 117 | # None 118 | # Arguments: 119 | # - all: arguments to pass to `sed -i` 120 | # Return: 121 | # None 122 | function os::util::sed() { 123 | if LANG=C sed --help 2>&1 | grep -q "GNU sed"; then 124 | sed -i'' "$@" 125 | else 126 | sed -i '' "$@" 127 | fi 128 | } 129 | readonly -f os::util::sed 130 | 131 | # os::util::base64decode attempts to make our Bash scripts agnostic to the platform 132 | # on which they run `base64decode` by glossing over a discrepancy in flag use in GNU. 133 | # 134 | # Globals: 135 | # None 136 | # Arguments: 137 | # - all: arguments to pass to `base64decode` 138 | # Return: 139 | # None 140 | function os::util::base64decode() { 141 | if [[ "$(go env GOHOSTOS)" == "darwin" ]]; then 142 | base64 -D "$@" 143 | else 144 | base64 -d "$@" 145 | fi 146 | } 147 | readonly -f os::util::base64decode 148 | 149 | # os::util::curl_etcd sends a request to the backing etcd store for the master. 150 | # We use the administrative client cert and key for access and re-encode them 151 | # as necessary for OSX clients. 152 | # 153 | # Globals: 154 | # MASTER_CONFIG_DIR 155 | # API_SCHEME 156 | # API_HOST 157 | # ETCD_PORT 158 | # Arguments: 159 | # - 1: etcd-relative URL to curl, with leading slash 160 | # Returns: 161 | # None 162 | function os::util::curl_etcd() { 163 | local url="$1" 164 | local full_url="${API_SCHEME}://${API_HOST}:${ETCD_PORT}${url}" 165 | 166 | local etcd_client_cert="${MASTER_CONFIG_DIR}/master.etcd-client.crt" 167 | local etcd_client_key="${MASTER_CONFIG_DIR}/master.etcd-client.key" 168 | local ca_bundle="${MASTER_CONFIG_DIR}/ca-bundle.crt" 169 | 170 | if curl -V | grep -q 'SecureTransport'; then 171 | # on newer OSX `curl` implementations, SSL is not used and client certs 172 | # and keys are expected to be encoded in P12 format instead of PEM format, 173 | # so we need to convert the secrets that the server wrote if we haven't 174 | # already done so 175 | local etcd_client_cert_p12="${MASTER_CONFIG_DIR}/master.etcd-client.crt.p12" 176 | local etcd_client_cert_p12_password="${CURL_CERT_P12_PASSWORD:-'password'}" 177 | if [[ ! -f "${etcd_client_cert_p12}" ]]; then 178 | openssl pkcs12 -export \ 179 | -in "${etcd_client_cert}" \ 180 | -inkey "${etcd_client_key}" \ 181 | -out "${etcd_client_cert_p12}" \ 182 | -password "pass:${etcd_client_cert_p12_password}" 183 | fi 184 | 185 | curl --fail --silent --cacert "${ca_bundle}" \ 186 | --cert "${etcd_client_cert_p12}:${etcd_client_cert_p12_password}" "${full_url}" 187 | else 188 | curl --fail --silent --cacert "${ca_bundle}" \ 189 | --cert "${etcd_client_cert}" --key "${etcd_client_key}" "${full_url}" 190 | fi 191 | } 192 | 193 | # os::util::host_platform determines what the host OS and architecture 194 | # are, as Golang sees it. The go tool chain does some slightly different 195 | # things when the target platform matches the host platform. 196 | # 197 | # Globals: 198 | # None 199 | # Arguments: 200 | # None 201 | # Returns: 202 | # None 203 | function os::util::host_platform() { 204 | echo "$(go env GOHOSTOS)/$(go env GOHOSTARCH)" 205 | } 206 | readonly -f os::util::host_platform -------------------------------------------------------------------------------- /hack/lib/util/text.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This file contains helpful aliases for manipulating the output text to the terminal as 4 | # well as functions for one-command augmented printing. 5 | 6 | # os::text::reset resets the terminal output to default if it is called in a TTY 7 | function os::text::reset() { 8 | if os::text::internal::is_tty; then 9 | tput sgr0 10 | fi 11 | } 12 | readonly -f os::text::reset 13 | 14 | # os::text::bold sets the terminal output to bold text if it is called in a TTY 15 | function os::text::bold() { 16 | if os::text::internal::is_tty; then 17 | tput bold 18 | fi 19 | } 20 | readonly -f os::text::bold 21 | 22 | # os::text::red sets the terminal output to red text if it is called in a TTY 23 | function os::text::red() { 24 | if os::text::internal::is_tty; then 25 | tput setaf 1 26 | fi 27 | } 28 | readonly -f os::text::red 29 | 30 | # os::text::green sets the terminal output to green text if it is called in a TTY 31 | function os::text::green() { 32 | if os::text::internal::is_tty; then 33 | tput setaf 2 34 | fi 35 | } 36 | readonly -f os::text::green 37 | 38 | # os::text::blue sets the terminal output to blue text if it is called in a TTY 39 | function os::text::blue() { 40 | if os::text::internal::is_tty; then 41 | tput setaf 4 42 | fi 43 | } 44 | readonly -f os::text::blue 45 | 46 | # os::text::yellow sets the terminal output to yellow text if it is called in a TTY 47 | function os::text::yellow() { 48 | if os::text::internal::is_tty; then 49 | tput setaf 11 50 | fi 51 | } 52 | readonly -f os::text::yellow 53 | 54 | # os::text::clear_last_line clears the text from the last line of output to the 55 | # terminal and leaves the cursor on that line to allow for overwriting that text 56 | # if it is called in a TTY 57 | function os::text::clear_last_line() { 58 | if os::text::internal::is_tty; then 59 | tput cuu 1 60 | tput el 61 | fi 62 | } 63 | readonly -f os::text::clear_last_line 64 | 65 | # os::text::clear_string attempts to clear the entirety of a string from the terminal. 66 | # If the string contains literal tabs or other characters that take up more than one 67 | # character space in output, or if the window size is changed before this function 68 | # is called, it will not function correctly. 69 | # No action is taken if this is called outside of a TTY 70 | function os::text::clear_string() { 71 | local -r string="$1" 72 | if os::text::internal::is_tty; then 73 | echo "${string}" | while read line; do 74 | # num_lines is the number of terminal lines this one line of output 75 | # would have taken up with the current terminal width in columns 76 | local num_lines=$(( ${#line} / $( tput cols ) )) 77 | for (( i = 0; i <= num_lines; i++ )); do 78 | os::text::clear_last_line 79 | done 80 | done 81 | fi 82 | } 83 | 84 | # os::text::internal::is_tty determines if we are outputting to a TTY 85 | function os::text::internal::is_tty() { 86 | [[ -t 1 && -n "${TERM:-}" ]] 87 | } 88 | readonly -f os::text::internal::is_tty 89 | 90 | # os::text::print_bold prints all input in bold text 91 | function os::text::print_bold() { 92 | os::text::bold 93 | echo "${*}" 94 | os::text::reset 95 | } 96 | readonly -f os::text::print_bold 97 | 98 | # os::text::print_red prints all input in red text 99 | function os::text::print_red() { 100 | os::text::red 101 | echo "${*}" 102 | os::text::reset 103 | } 104 | readonly -f os::text::print_red 105 | 106 | # os::text::print_red_bold prints all input in bold red text 107 | function os::text::print_red_bold() { 108 | os::text::red 109 | os::text::bold 110 | echo "${*}" 111 | os::text::reset 112 | } 113 | readonly -f os::text::print_red_bold 114 | 115 | # os::text::print_green prints all input in green text 116 | function os::text::print_green() { 117 | os::text::green 118 | echo "${*}" 119 | os::text::reset 120 | } 121 | readonly -f os::text::print_green 122 | 123 | # os::text::print_green_bold prints all input in bold green text 124 | function os::text::print_green_bold() { 125 | os::text::green 126 | os::text::bold 127 | echo "${*}" 128 | os::text::reset 129 | } 130 | readonly -f os::text::print_green_bold 131 | 132 | # os::text::print_blue prints all input in blue text 133 | function os::text::print_blue() { 134 | os::text::blue 135 | echo "${*}" 136 | os::text::reset 137 | } 138 | readonly -f os::text::print_blue 139 | 140 | # os::text::print_blue_bold prints all input in bold blue text 141 | function os::text::print_blue_bold() { 142 | os::text::blue 143 | os::text::bold 144 | echo "${*}" 145 | os::text::reset 146 | } 147 | readonly -f os::text::print_blue_bold 148 | 149 | # os::text::print_yellow prints all input in yellow text 150 | function os::text::print_yellow() { 151 | os::text::yellow 152 | echo "${*}" 153 | os::text::reset 154 | } 155 | readonly -f os::text::print_yellow 156 | 157 | # os::text::print_yellow_bold prints all input in bold yellow text 158 | function os::text::print_yellow_bold() { 159 | os::text::yellow 160 | os::text::bold 161 | echo "${*}" 162 | os::text::reset 163 | } 164 | readonly -f os::text::print_yellow_bold 165 | -------------------------------------------------------------------------------- /hack/lib/util/trap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This library defines the trap handlers for the ERR and EXIT signals. Any new handler for these signals 4 | # must be added to these handlers and activated by the environment variable mechanism that the rest use. 5 | # These functions ensure that no handler can ever alter the exit code that was emitted by a command 6 | # in a test script. 7 | 8 | # os::util::trap::init_err initializes the privileged handler for the ERR signal if it hasn't 9 | # been registered already. This will overwrite any other handlers registered on the signal. 10 | # 11 | # Globals: 12 | # None 13 | # Arguments: 14 | # None 15 | # Returns: 16 | # None 17 | function os::util::trap::init_err() { 18 | if ! trap -p ERR | grep -q 'os::util::trap::err_handler'; then 19 | trap 'os::util::trap::err_handler;' ERR 20 | fi 21 | } 22 | readonly -f os::util::trap::init_err 23 | 24 | # os::util::trap::init_exit initializes the privileged handler for the EXIT signal if it hasn't 25 | # been registered already. This will overwrite any other handlers registered on the signal. 26 | # 27 | # Globals: 28 | # None 29 | # Arguments: 30 | # None 31 | # Returns: 32 | # None 33 | function os::util::trap::init_exit() { 34 | if ! trap -p EXIT | grep -q 'os::util::trap::exit_handler'; then 35 | trap 'os::util::trap::exit_handler;' EXIT 36 | fi 37 | } 38 | readonly -f os::util::trap::init_exit 39 | 40 | # os::util::trap::err_handler is the handler for the ERR signal. 41 | # 42 | # Globals: 43 | # - OS_TRAP_DEBUG 44 | # - OS_USE_STACKTRACE 45 | # Arguments: 46 | # None 47 | # Returns: 48 | # - returns original return code, allows privileged handler to exit if necessary 49 | function os::util::trap::err_handler() { 50 | local -r return_code=$? 51 | local -r last_command="${BASH_COMMAND}" 52 | 53 | if set +o | grep -q '\-o errexit'; then 54 | local -r errexit_set=true 55 | fi 56 | 57 | if [[ "${OS_TRAP_DEBUG:-}" = "true" ]]; then 58 | echo "[DEBUG] Error handler executing with return code \`${return_code}\`, last command \`${last_command}\`, and errexit set \`${errexit_set:-}\`" 59 | fi 60 | 61 | if [[ "${OS_USE_STACKTRACE:-}" = "true" ]]; then 62 | # the OpenShift stacktrace function is treated as a privileged handler for this signal 63 | # and is therefore allowed to run outside of a subshell in order to allow it to `exit` 64 | # if necessary 65 | os::log::stacktrace::print "${return_code}" "${last_command}" "${errexit_set:-}" 66 | fi 67 | 68 | return "${return_code}" 69 | } 70 | readonly -f os::util::trap::err_handler 71 | 72 | # os::util::trap::exit_handler is the handler for the EXIT signal. 73 | # 74 | # Globals: 75 | # - OS_TRAP_DEBUG 76 | # - OS_DESCRIBE_RETURN_CODE 77 | # Arguments: 78 | # None 79 | # Returns: 80 | # - original exit code of the script that exited 81 | function os::util::trap::exit_handler() { 82 | local -r return_code=$? 83 | 84 | # we do not want these traps to be able to trigger more errors, we can let them fail silently 85 | set +o errexit 86 | 87 | if [[ "${OS_TRAP_DEBUG:-}" = "true" ]]; then 88 | echo "[DEBUG] Exit handler executing with return code \`${return_code}\`" 89 | fi 90 | 91 | # the following envars selectively enable optional exit traps, all of which are run inside of 92 | # a subshell in order to sandbox them and not allow them to influence how this script will exit 93 | if [[ "${OS_DESCRIBE_RETURN_CODE:-}" = "true" ]]; then 94 | ( os::util::describe_return_code "${return_code}" ) 95 | fi 96 | 97 | exit "${return_code}" 98 | } 99 | readonly -f os::util::trap::exit_handler 100 | -------------------------------------------------------------------------------- /hack/util.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Provides simple utility functions 4 | 5 | # kill_all_processes function will kill all 6 | # all processes created by the test script. 7 | function kill_all_processes() { 8 | local sudo="${USE_SUDO:+sudo}" 9 | 10 | pids=($(jobs -pr)) 11 | for i in ${pids[@]-}; do 12 | pgrep -P "${i}" | xargs $sudo kill &> /dev/null 13 | $sudo kill ${i} &> /dev/null 14 | done 15 | } 16 | readonly -f kill_all_processes 17 | 18 | # dump_container_logs writes container logs to $LOG_DIR 19 | function dump_container_logs() { 20 | if ! docker version >/dev/null 2>&1; then 21 | return 22 | fi 23 | 24 | mkdir -p ${LOG_DIR} 25 | 26 | os::log::info "Dumping container logs to ${LOG_DIR}" 27 | for container in $(docker ps -aq); do 28 | container_name=$(docker inspect -f "{{.Name}}" $container) 29 | # strip off leading / 30 | container_name=${container_name:1} 31 | if [[ "$container_name" =~ ^k8s_ ]]; then 32 | pod_name=$(echo $container_name | awk 'BEGIN { FS="[_.]+" }; { print $4 }') 33 | container_name=${pod_name}-$(echo $container_name | awk 'BEGIN { FS="[_.]+" }; { print $2 }') 34 | fi 35 | docker logs "$container" >&"${LOG_DIR}/container-${container_name}.log" 36 | done 37 | } 38 | readonly -f dump_container_logs 39 | 40 | # delete_empty_logs deletes empty logs 41 | function delete_empty_logs() { 42 | # Clean up zero byte log files 43 | find "${ARTIFACT_DIR}" "${LOG_DIR}" -type f -name '*.log' \( -empty \) -delete 44 | } 45 | readonly -f delete_empty_logs 46 | 47 | # truncate_large_logs truncates large logs 48 | function truncate_large_logs() { 49 | # Clean up large log files so they don't end up on jenkins 50 | local max_file_size="100M" 51 | local large_files=$(find "${ARTIFACT_DIR}" "${LOG_DIR}" -type f -name '*.log' \( -size +${max_file_size} \)) 52 | for file in ${large_files}; do 53 | mv "${file}" "${file}.tmp" 54 | echo "LOGFILE TOO LONG ($(du -h "${file}.tmp")), PREVIOUS BYTES TRUNCATED. LAST ${max_file_size} OF LOGFILE:" > "${file}" 55 | tail -c ${max_file_size} "${file}.tmp" >> "${file}" 56 | rm "${file}.tmp" 57 | done 58 | } 59 | readonly -f truncate_large_logs 60 | 61 | ###### 62 | # start of common functions for extended test group's run.sh scripts 63 | ###### 64 | 65 | # cleanup_openshift saves container logs, saves resources, and kills all processes and containers 66 | function cleanup_openshift() { 67 | LOG_DIR="${LOG_DIR:-${BASETMPDIR}/logs}" 68 | ARTIFACT_DIR="${ARTIFACT_DIR:-${LOG_DIR}}" 69 | API_HOST="${API_HOST:-127.0.0.1}" 70 | API_SCHEME="${API_SCHEME:-https}" 71 | ETCD_PORT="${ETCD_PORT:-4001}" 72 | 73 | set +e 74 | dump_container_logs 75 | 76 | # pull information out of the server log so that we can get failure management in jenkins to highlight it and 77 | # really have it smack people in their logs. This is a severe correctness problem 78 | grep -a5 "CACHE.*ALTERED" ${LOG_DIR}/openshift.log 79 | 80 | os::cleanup::dump_etcd 81 | 82 | if [[ -z "${SKIP_TEARDOWN-}" ]]; then 83 | os::log::info "Tearing down test" 84 | kill_all_processes 85 | 86 | if docker version >/dev/null 2>&1; then 87 | os::log::info "Stopping k8s docker containers"; docker ps | awk 'index($NF,"k8s_")==1 { print $1 }' | xargs -l -r docker stop -t 1 >/dev/null 88 | if [[ -z "${SKIP_IMAGE_CLEANUP-}" ]]; then 89 | os::log::info "Removing k8s docker containers"; docker ps -a | awk 'index($NF,"k8s_")==1 { print $1 }' | xargs -l -r docker rm -v >/dev/null 90 | fi 91 | fi 92 | 93 | os::log::info "Pruning etcd data directory..." 94 | local sudo="${USE_SUDO:+sudo}" 95 | ${sudo} rm -rf "${ETCD_DATA_DIR}" 96 | 97 | set -u 98 | fi 99 | 100 | if grep -q 'no Docker socket found' "${LOG_DIR}/openshift.log" && command -v journalctl >/dev/null 2>&1; then 101 | # the Docker daemon crashed, we need the logs 102 | journalctl --unit docker.service --since -4hours > "${LOG_DIR}/docker.log" 103 | fi 104 | 105 | delete_empty_logs 106 | truncate_large_logs 107 | 108 | os::log::info "Cleanup complete" 109 | set -e 110 | } 111 | readonly -f cleanup_openshift 112 | 113 | ###### 114 | # end of common functions for extended test group's run.sh scripts 115 | ###### 116 | 117 | function find_files() { 118 | find . -not \( \ 119 | \( \ 120 | -wholename './_output' \ 121 | -o -wholename './.*' \ 122 | -o -wholename './pkg/assets/bindata.go' \ 123 | -o -wholename './pkg/assets/*/bindata.go' \ 124 | -o -wholename './pkg/bootstrap/bindata.go' \ 125 | -o -wholename './openshift.local.*' \ 126 | -o -wholename '*/vendor/*' \ 127 | -o -wholename './assets/bower_components/*' \ 128 | \) -prune \ 129 | \) -name '*.go' | sort -u 130 | } 131 | readonly -f find_files 132 | -------------------------------------------------------------------------------- /image-inc.yaml: -------------------------------------------------------------------------------- 1 | schema_version: 1 2 | 3 | # The S2I bits are included to pick up the usage script and 4 | # to give a meaningful error on an attempt to install Spark in 5 | # a complete image via S2I 6 | 7 | version: 3.0 8 | release: community 9 | name: radanalyticsio/openshift-spark-inc 10 | from: centos:8 11 | labels: 12 | - name: maintainer 13 | value: Trevor McKay 14 | - name: "io.openshift.s2i.scripts-url" 15 | value: "image:///usr/libexec/s2i" 16 | 17 | modules: 18 | repositories: 19 | - path: modules 20 | 21 | install: 22 | - name: common 23 | - name: metrics 24 | - name: s2i 25 | packages: 26 | install: 27 | - java-11-openjdk 28 | - rsync 29 | run: 30 | user: 185 31 | entrypoint: 32 | - "/entrypoint" 33 | cmd: 34 | - "/usr/libexec/s2i/usage" 35 | -------------------------------------------------------------------------------- /image.yaml: -------------------------------------------------------------------------------- 1 | schema_version: 1 2 | 3 | # The S2I bits are included to pick up the usage script and 4 | # to give a meaningful error on an attempt to install Spark in 5 | # a complete image via S2I 6 | 7 | version: 3.0 8 | release: community 9 | name: radanalyticsio/openshift-spark 10 | from: centos:8 11 | labels: 12 | - name: maintainer 13 | value: Trevor McKay 14 | - name: sparkversion 15 | value: 3.0.1 16 | - name: "io.openshift.s2i.scripts-url" 17 | value: "image:///usr/libexec/s2i" 18 | 19 | packages: 20 | install: 21 | - java-11-openjdk 22 | # python36 added in the common module 23 | - python3-numpy 24 | 25 | modules: 26 | repositories: 27 | - path: modules 28 | install: 29 | - name: common 30 | - name: metrics 31 | - name: spark 32 | - name: s2i 33 | run: 34 | user: 185 35 | entrypoint: 36 | - "/entrypoint" 37 | cmd: 38 | - "/launch.sh" 39 | workdir: /tmp 40 | -------------------------------------------------------------------------------- /make-build-dir.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Regenerate the build directory based on image.*.yaml 4 | make clean-target 5 | make clean-context 6 | make -f Makefile.inc clean-context 7 | 8 | make context 9 | make -f Makefile.inc context 10 | 11 | make zero-tarballs 12 | make -f Makefile.inc zero-tarballs 13 | 14 | # Add any changes for a commit 15 | git add openshift-spark-build 16 | git add openshift-spark-build-inc 17 | -------------------------------------------------------------------------------- /modules/common/added/conf/agent-config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | lowercaseOutputName: true 3 | lowercaseOutputLabelNames: true 4 | whitelistObjectNames: 5 | - 'metrics:*' 6 | 7 | rules: 8 | 9 | # These come from the master 10 | # Example: master.aliveWorkers 11 | - pattern: "metrics<>Value" 12 | name: spark_master_$1 13 | 14 | # These come from the worker 15 | # Example: worker.coresFree 16 | - pattern: "metrics<>Value" 17 | name: spark_worker_$1 18 | 19 | # These come from the application driver 20 | # Example: app-20160809000059-0000.driver.DAGScheduler.stage.failedStages 21 | - pattern: "metrics<>Value" 22 | name: spark_driver_$2_$3 23 | labels: 24 | app_id: "$1" 25 | 26 | # These come from the application driver if it's a streaming application 27 | # Example: app-20160809000059-0000.driver.com.example.ClassName.StreamingMetrics.streaming.lastCompletedBatch_schedulingDelay 28 | - pattern: "metrics<>Value" 29 | name: spark_streaming_driver_$3 30 | labels: 31 | app_id: "$1" 32 | app_name: "$2" 33 | 34 | # These come from the application driver if it's a structured streaming application 35 | # Example: app-20160809000059-0000.driver.spark.streaming.QueryName.inputRate-total 36 | - pattern: "metrics<>Value" 37 | name: spark_structured_streaming_driver_$3 38 | labels: 39 | app_id: "$1" 40 | query_name: "$2" 41 | 42 | # These come from the application executors 43 | # Example: app-20160809000059-0000.0.executor.threadpool.activeTasks 44 | - pattern: "metrics<>Value" 45 | name: spark_executor_$3 46 | labels: 47 | app_id: "$1" 48 | executor_id: "$2" 49 | 50 | # These come from the master 51 | # Example: application.com.example.ClassName.1470700859054.cores 52 | - pattern: "metrics<>Value" 53 | name: spark_application_$3 54 | labels: 55 | app_name: "$1" 56 | app_start_epoch: "$2" 57 | -------------------------------------------------------------------------------- /modules/common/added/conf/agent.properties: -------------------------------------------------------------------------------- 1 | jmx_exporter=7777:/opt/spark/conf/agent-config.yaml 2 | -------------------------------------------------------------------------------- /modules/common/added/conf/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Set everything to be logged to the console 19 | log4j.rootCategory=INFO, console 20 | log4j.appender.console=org.apache.log4j.ConsoleAppender 21 | log4j.appender.console.target=System.err 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 24 | 25 | # Set the default spark-shell log level to WARN. When running the spark-shell, the 26 | # log level for this class is used to overwrite the root logger's log level, so that 27 | # the user can have different defaults for the shell and regular Spark apps. 28 | log4j.logger.org.apache.spark.repl.Main=WARN 29 | 30 | # Settings to quiet third party logs that are too verbose 31 | log4j.logger.org.spark_project.jetty=WARN 32 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 33 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 34 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 35 | log4j.logger.org.apache.parquet=ERROR 36 | log4j.logger.parquet=ERROR 37 | 38 | # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support 39 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 40 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 41 | -------------------------------------------------------------------------------- /modules/common/added/conf/metrics.properties: -------------------------------------------------------------------------------- 1 | *.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink 2 | master.source.jvm.class=org.apache.spark.metrics.source.JvmSource 3 | worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource 4 | driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource 5 | executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource 6 | application.source.jvm.class=org.apache.spark.metrics.source.JvmSource 7 | -------------------------------------------------------------------------------- /modules/common/added/conf/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Default system properties included when running spark-submit. 19 | # This is useful for setting default environmental settings. 20 | 21 | # Example: 22 | # spark.master spark://master:7077 23 | # spark.eventLog.enabled true 24 | # spark.eventLog.dir hdfs://namenode:8021/directory 25 | # spark.serializer org.apache.spark.serializer.KryoSerializer 26 | # spark.driver.memory 5g 27 | # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" 28 | spark.ui.reverseProxy true 29 | spark.ui.reverseProxyUrl / 30 | -------------------------------------------------------------------------------- /modules/common/added/scripts/entrypoint: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # If we get an s2i command and it's anything but "run" just do it 4 | # Otherwise we'll turn it into a launch 5 | if [[ -n "$STI_SCRIPTS_PATH" ]] && [[ $@ == *"$STI_SCRIPTS_PATH"* ]]; then 6 | if ! [[ $@ == *"$STI_SCRIPTS_PATH"/run* ]]; then 7 | exec "$@" 8 | exit $? 9 | fi 10 | CMD=/launch.sh 11 | 12 | # allow just a simple "usage" command to print the usage script 13 | elif [[ -n "$STI_SCRIPTS_PATH" ]] && [[ $@ == "usage" ]]; then 14 | exec $STI_SCRIPTS_PATH/usage 15 | exit $? 16 | else 17 | CMD=$@ 18 | fi 19 | 20 | trap handle_term TERM INT 21 | 22 | function handle_term { 23 | echo Received a termination signal 24 | 25 | local cnt 26 | local killed=1 27 | if [ -n "$PID" ]; then 28 | echo "Stopping subprocess $PID" 29 | kill -TERM $PID 30 | for cnt in {1..10} 31 | do 32 | kill -0 $PID >/dev/null 2>&1 33 | if [ "$?" -ne 0 ]; then 34 | killed=0 35 | break 36 | else 37 | sleep 1 38 | fi 39 | done 40 | if [ "$killed" -ne 0 ]; then 41 | echo Process is still running 10 seconds after TERM, sending KILL 42 | kill -9 $PID 43 | fi 44 | wait $PID 45 | echo "Subprocess stopped" 46 | fi 47 | exit 0 48 | } 49 | 50 | function patch_uid { 51 | # Check whether there is a passwd entry for the container UID 52 | myuid=$(id -u) 53 | mygid=$(id -g) 54 | uidentry=$(getent passwd $myuid) 55 | 56 | # If there is no passwd entry for the container UID, attempt to create one 57 | if [ -z "$uidentry" ] ; then 58 | if [ -w /etc/passwd ] ; then 59 | echo "$myuid:x:$myuid:$mygid:anonymous uid:$SPARK_HOME:/bin/false" >> /etc/passwd 60 | else 61 | echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID" 62 | fi 63 | fi 64 | } 65 | 66 | # If we receive a spark-on-kube command, hand it off to the 67 | # standard spark entrypoint 68 | case "$1" in 69 | driver | executor) 70 | $SPARK_INSTALL/entrypoint.sh $CMD & 71 | ;; 72 | *) 73 | patch_uid 74 | $CMD & 75 | ;; 76 | esac 77 | PID=$! 78 | wait $PID 79 | -------------------------------------------------------------------------------- /modules/common/added/scripts/launch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function check_reverse_proxy { 4 | grep -e "^spark\.ui\.reverseProxy" $SPARK_HOME/conf/spark-defaults.conf &> /dev/null 5 | if [ "$?" -ne 0 ]; then 6 | echo "Appending default reverse proxy config to spark-defaults.conf" 7 | echo "spark.ui.reverseProxy true" >> $SPARK_HOME/conf/spark-defaults.conf 8 | echo "spark.ui.reverseProxyUrl /" >> $SPARK_HOME/conf/spark-defaults.conf 9 | fi 10 | } 11 | 12 | # If the UPDATE_SPARK_CONF_DIR dir is non-empty, 13 | # copy the contents to $SPARK_HOME/conf 14 | if [ -d "$UPDATE_SPARK_CONF_DIR" ]; then 15 | sparkconfs=$(ls -1 $UPDATE_SPARK_CONF_DIR | wc -l) 16 | if [ "$sparkconfs" -ne "0" ]; then 17 | echo "Copying from $UPDATE_SPARK_CONF_DIR to $SPARK_HOME/conf" 18 | ls -1 $UPDATE_SPARK_CONF_DIR 19 | cp $UPDATE_SPARK_CONF_DIR/* $SPARK_HOME/conf 20 | fi 21 | elif [ -n "$UPDATE_SPARK_CONF_DIR" ]; then 22 | echo "Directory $UPDATE_SPARK_CONF_DIR does not exist, using default spark config" 23 | fi 24 | 25 | check_reverse_proxy 26 | 27 | if [ -z ${SPARK_METRICS_ON+_} ]; then 28 | JAVA_AGENT= 29 | metrics="" 30 | elif [ ${SPARK_METRICS_ON} == "prometheus" ]; then 31 | JAVA_AGENT=" -javaagent:/opt/metrics/agent-bond.jar=$SPARK_HOME/conf/agent.properties" 32 | metrics=" with prometheus metrics enabled" 33 | else 34 | JAVA_AGENT=" -javaagent:/opt/metrics/jolokia-jvm-1.3.6-agent.jar=port=7777,host=0.0.0.0" 35 | metrics=" with jolokia metrics enabled (deprecated, set SPARK_METRICS_ON to 'prometheus')" 36 | fi 37 | 38 | if [ -z ${SPARK_MASTER_ADDRESS+_} ]; then 39 | echo "Starting master$metrics" 40 | exec $SPARK_HOME/bin/spark-class$JAVA_AGENT org.apache.spark.deploy.master.Master 41 | else 42 | echo "Starting worker$metrics, will connect to: $SPARK_MASTER_ADDRESS" 43 | 44 | # spark://x.y.z:7077 -> x.y.z/7077 45 | _MASTER_HOST_AND_PORT=$(echo $SPARK_MASTER_ADDRESS | sed -r 's;.*//(.*):(.*);\1/\2;g') 46 | while true; do 47 | echo "Waiting for spark master to be available ..." 48 | timeout 1 sh -c "(/dev/null" 49 | if [ $? -eq 0 ]; then 50 | break 51 | fi 52 | sleep 1 53 | done 54 | exec $SPARK_HOME/bin/spark-class$JAVA_AGENT org.apache.spark.deploy.worker.Worker $SPARK_MASTER_ADDRESS 55 | fi 56 | -------------------------------------------------------------------------------- /modules/common/install: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | SCRIPT_DIR=$(dirname $0) 6 | ADDED_DIR=${SCRIPT_DIR}/added 7 | 8 | # Put entrypoint and launch.sh at the root 9 | cp $ADDED_DIR/scripts/* / 10 | 11 | # Set up a place for spark to go 12 | # We'll also stage our default spark config files here 13 | # so that when spark is installed they can be copied over 14 | # if the spark tarball itself does not include files of the same name 15 | # (ie, "copy if not overwrite") 16 | if ! [ -d $SPARK_INSTALL ]; then 17 | mkdir -p $SPARK_INSTALL 18 | mv $ADDED_DIR/conf $SPARK_INSTALL 19 | chown -R 185:0 $SPARK_INSTALL && chmod -R g+rwX $SPARK_INSTALL 20 | ln -sfn $SPARK_INSTALL/distro /opt/spark 21 | fi 22 | 23 | # Change the permissions on /etc/passwd so that anonymous user 24 | # can be added to satisfy Spark 25 | chgrp root /etc/passwd && chmod g+rw /etc/passwd 26 | 27 | # Make Python3 the default. This is important because it seems 28 | # that Spark still wants to invoke python scripts with "python" 29 | # in the executors, as opposed to "python3" 30 | alternatives --set python /usr/bin/python3 31 | -------------------------------------------------------------------------------- /modules/common/module.yaml: -------------------------------------------------------------------------------- 1 | version: 1.0 2 | 3 | name: common 4 | envs: 5 | - name: SPARK_INSTALL 6 | value: /opt/spark-distro 7 | packages: 8 | install: 9 | - python36 10 | execute: 11 | - script: install 12 | -------------------------------------------------------------------------------- /modules/metrics/added/agent-bond.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radanalyticsio/openshift-spark/f360e822b4c4859e717791cf39cb9682dc242c5f/modules/metrics/added/agent-bond.jar -------------------------------------------------------------------------------- /modules/metrics/added/jolokia-jvm-1.3.6-agent.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radanalyticsio/openshift-spark/f360e822b4c4859e717791cf39cb9682dc242c5f/modules/metrics/added/jolokia-jvm-1.3.6-agent.jar -------------------------------------------------------------------------------- /modules/metrics/install: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -u 3 | set -e 4 | 5 | SCRIPT_DIR=$(dirname $0) 6 | ADDED_DIR=${SCRIPT_DIR}/added 7 | mkdir -p /opt/metrics 8 | mv $ADDED_DIR/*.jar /opt/metrics 9 | 10 | chown -R 185:0 /opt/metrics && chmod g+rwX /opt/metrics 11 | -------------------------------------------------------------------------------- /modules/metrics/module.yaml: -------------------------------------------------------------------------------- 1 | version: 1.0 2 | 3 | name: metrics 4 | execute: 5 | - script: install 6 | -------------------------------------------------------------------------------- /modules/s2i/added/assemble: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source $STI_SCRIPTS_PATH/s2i-env-vars 4 | 5 | # Just a word about the directory structure 6 | # SPARK_HOME == /opt/spark 7 | # SPARK_INSTALL == /opt/spark-distro 8 | 9 | # Extra things like default configuration files and additional 10 | # boot scripts may be stored in SPARK_INSTALL 11 | 12 | # At runtime, /opt/spark is a symlink to /opt/spark-distro/distro 13 | # but /opt/spark-distro/distro does not actually exist yet 14 | 15 | # The Spark tarball will be expanded in /opt/spark-distro using 16 | # it's original name, for example /opt/spark-distro/spark-2.3.0-bin-hadoop2.7, 17 | # as a dev aid to tracking and version checking 18 | 19 | # Ultimately, /opt/spark-distro/distro is created as a symlink to the Spark root 20 | # directory. This double-hop from /opt/spark to the Spark root through symlinks 21 | # allows the Spark installation to be staged in the base image but completed in 22 | # the S2I build without expanding permissions 23 | 24 | function match_sum { 25 | local sumfile=$1 26 | local delim=$2 27 | local sha512=$3 28 | local initial=$(cat $sumfile | tr -d [:space:] | cut -d"$delim" -f1 | tr [:upper:] [:lower:]) 29 | local rest=$(cat $sumfile | tr -d [:space:] | cut -d"$delim" --complement -f1 | tr [:upper:] [:lower:]) 30 | if [ "$sha512" == "$initial" ] || [ "$sha512" == "$rest" ]; then 31 | return 0 32 | fi 33 | return 1 34 | } 35 | 36 | if [ -f $SPARK_HOME/bin/spark-submit ]; then 37 | echo "Spark is installed, nothing to do" 38 | exit 1 39 | else 40 | echo "Attempting to install Spark" 41 | # If a url has been specfified for spark use it 42 | if [ -n "$SPARK_URL" ]; then 43 | echo Downloading $SPARK_URL 44 | wget $SPARK_URL -P $S2I_SOURCE_DIR 45 | fi 46 | if [ -n "$SPARK_SHA512_URL" ]; then 47 | echo Downloading $SPARK_SHA512_URL 48 | wget $SPARK_SHA512_URL -P $S2I_SOURCE_DIR 49 | fi 50 | 51 | for spark in $(ls "$S2I_SOURCE_DIR"); do 52 | 53 | spark=$S2I_SOURCE_DIR/$spark 54 | echo Found $spark 55 | echo Checking for valid Spark archive 56 | 57 | # Is the file a directory? If it contains spark-submit, move it 58 | if [ -d "$spark" ]; then 59 | if ! [ -f $spark/bin/spark-submit ]; then 60 | echo Ignoring directory $spark, no spark-submit 61 | continue 62 | fi 63 | echo Installing from directory $spark 64 | sparkdir=$SPARK_INSTALL/$(basename $spark) 65 | mv $spark $SPARK_INSTALL 66 | else 67 | # If we can get the table of contents, it's a tar archive, otherwise ignore 68 | tar -tf $spark &> /dev/null 69 | if [ "$?" -ne 0 ]; then 70 | echo Ignoring $spark, not a tar archive 71 | continue 72 | fi 73 | echo Validating tar archive $spark 74 | 75 | # Does the tarball contain a spark-submit? 76 | name=$(tar -tzf $spark | grep -e "^[^/]*/bin/spark-submit$") 77 | if [ "$?" -ne 0 ]; then 78 | echo Ignoring tarball $spark, no spark-submit 79 | continue 80 | else 81 | echo Found valid tar archive, matching checksums 82 | # See if we have an sha512 file to match against 83 | if [ -f "$spark".sha512 ]; then 84 | calcvalue=$(sha512sum "$spark" | cut -d\ -f1) 85 | # split the sha512 file using a colon 86 | match_sum "$spark".sha512 \: $calcvalue 87 | matched="$?" 88 | if [ "$matched" -ne 0 ]; then 89 | # split the sha512 file using equals sign in case it's BSD 90 | match_sum "$spark".sha512 \= $calcvalue 91 | matched="$?" 92 | fi 93 | if [ "$matched" -ne 0 ]; then 94 | echo Ignoring tarball $spark, sha512sum did not match 95 | continue 96 | fi 97 | fi 98 | 99 | # dname will be the intial directory from the path of spark-submit 100 | # we found in the tarball, ie the dir created by tar 101 | echo Installing from tarball $spark 102 | dname=$(dirname $name | cut -d/ -f 1) 103 | sparkdir=$SPARK_INSTALL/$dname 104 | tar -xzf $spark -C $SPARK_INSTALL 105 | fi 106 | fi 107 | 108 | ln -s $sparkdir $SPARK_INSTALL/distro 109 | 110 | # Search for the spark entrypoint file and copy it to $SPARK_INSTALL 111 | entry=$(find $sparkdir/kubernetes -name entrypoint.sh) 112 | if [ -n "$entry" ]; then 113 | echo Installing spark native entrypoint for use with spark-on-k8s commands 114 | cp $entry $SPARK_INSTALL 115 | 116 | # We want to get rid of the tini invocation 117 | sed -i "s@exec .*/tini -s --@exec@" $SPARK_INSTALL/entrypoint.sh 118 | else 119 | echo No spark native entrypoint found for use with spark-on-k8s commands 120 | fi 121 | 122 | # Include the default spark configuration files 123 | mv --no-clobber "$SPARK_INSTALL"/conf/* "$SPARK_HOME"/conf/ 124 | 125 | # If someone included mods in a parallel directory, install them with rsync 126 | # Don't try to preserve permisions, owner, or group because we don't have 127 | # any control over how s2i uploaded the files, so there's no use preserving. 128 | if [ -x /usr/bin/rsync ] && [ -d "$S2I_SOURCE_DIR/modify-spark" ]; then 129 | echo Found a modify-spark directory, running rsync to install changes 130 | rsync -vrltD "$S2I_SOURCE_DIR/modify-spark/" $SPARK_HOME 131 | fi 132 | 133 | # Spark workers need to write to the spark directory to track apps 134 | chmod -R g+rwX $sparkdir 135 | 136 | # Can we run spark-submit? 137 | $SPARK_HOME/bin/spark-submit --version 138 | if [ "$?" -eq 0 ]; then 139 | echo Spark installed successfully 140 | exit 0 141 | else 142 | echo Cannot run spark-submit, Spark install failed 143 | fi 144 | 145 | # Just in case there is more than one tarball, clean up 146 | rm -rf $sparkdir 147 | done 148 | 149 | echo no valid Spark distribution found 150 | 151 | if [ -n "$DEBUG_ASSEMBLE" ]; then 152 | echo Looping forever so you can \'oc rsh\' 153 | while true; do 154 | sleep 5 155 | done 156 | fi 157 | exit 1 158 | fi 159 | -------------------------------------------------------------------------------- /modules/s2i/added/s2i-env-vars: -------------------------------------------------------------------------------- 1 | # Local vars setup with defaults 2 | S2I_DESTINATION=${S2I_DESTINATION:-/tmp} 3 | S2I_SOURCE_DIR="${S2I_DESTINATION}/src" 4 | -------------------------------------------------------------------------------- /modules/s2i/added/usage: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ -f "$SPARK_HOME"/bin/spark-submit ]; then 3 | cat <> /etc/passwd 33 | else 34 | echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID" 35 | fi 36 | fi 37 | 38 | SPARK_K8S_CMD="$1" 39 | if [ -z "$SPARK_K8S_CMD" ]; then 40 | echo "No command to execute has been provided." 1>&2 41 | exit 1 42 | fi 43 | shift 1 44 | 45 | SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*" 46 | env | grep SPARK_JAVA_OPT_ | sed 's/[^=]*=\(.*\)/\1/g' > /tmp/java_opts.txt 47 | readarray -t SPARK_JAVA_OPTS < /tmp/java_opts.txt 48 | if [ -n "$SPARK_MOUNTED_CLASSPATH" ]; then 49 | SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_MOUNTED_CLASSPATH" 50 | fi 51 | if [ -n "$SPARK_MOUNTED_FILES_DIR" ]; then 52 | cp -R "$SPARK_MOUNTED_FILES_DIR/." . 53 | fi 54 | 55 | case "$SPARK_K8S_CMD" in 56 | driver) 57 | CMD=( 58 | ${JAVA_HOME}/bin/java 59 | "${SPARK_JAVA_OPTS[@]}" 60 | -cp "$SPARK_CLASSPATH" 61 | -Xms$SPARK_DRIVER_MEMORY 62 | -Xmx$SPARK_DRIVER_MEMORY 63 | -Dspark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS 64 | $SPARK_DRIVER_CLASS 65 | $SPARK_DRIVER_ARGS 66 | ) 67 | ;; 68 | 69 | executor) 70 | CMD=( 71 | ${JAVA_HOME}/bin/java 72 | "${SPARK_JAVA_OPTS[@]}" 73 | -Xms$SPARK_EXECUTOR_MEMORY 74 | -Xmx$SPARK_EXECUTOR_MEMORY 75 | -cp "$SPARK_CLASSPATH" 76 | org.apache.spark.executor.CoarseGrainedExecutorBackend 77 | --driver-url $SPARK_DRIVER_URL 78 | --executor-id $SPARK_EXECUTOR_ID 79 | --cores $SPARK_EXECUTOR_CORES 80 | --app-id $SPARK_APPLICATION_ID 81 | --hostname $SPARK_EXECUTOR_POD_IP 82 | ) 83 | ;; 84 | 85 | init) 86 | CMD=( 87 | "$SPARK_HOME/bin/spark-class" 88 | "org.apache.spark.deploy.k8s.SparkPodInitContainer" 89 | "$@" 90 | ) 91 | ;; 92 | 93 | *) 94 | echo "Unknown command: $SPARK_K8S_CMD" 1>&2 95 | exit 1 96 | esac 97 | 98 | exec "${CMD[@]}" 99 | -------------------------------------------------------------------------------- /modules/spark/check_for_download: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | echo "checking length of file $1" 3 | if ! [ -s "$1" ]; then 4 | filename=$(basename $1) 5 | version=$(echo $filename | cut -d '-' -f2) 6 | wget https://archive.apache.org/dist/spark/spark-$version/$filename -O $1 7 | fi 8 | -------------------------------------------------------------------------------- /modules/spark/install: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | SCRIPT_DIR=$(dirname $0) 4 | ADDED_DIR=${SCRIPT_DIR}/added 5 | ARTIFACTS_DIR=/tmp/artifacts 6 | 7 | # If there is a zero-length spark tarball, find the verison in the 8 | # name and download from Apache 9 | fullname=$(find $ARTIFACTS_DIR -name spark-[0-9.]*\.tgz) 10 | /bin/sh -x $SCRIPT_DIR/check_for_download $fullname 11 | 12 | # Make a place for spark to go (dupe what's done in common in case we're standalone) 13 | if ! [ -d $SPARK_INSTALL ]; then 14 | mkdir -p $SPARK_INSTALL/conf 15 | ln -sfn $SPARK_INSTALL/distro $SPARK_HOME 16 | fi 17 | 18 | pushd $SPARK_INSTALL 19 | cp $fullname . 20 | tar -zxf $(basename $fullname) 21 | ln -s $(basename $fullname .tgz) distro 22 | rm $(basename $fullname) 23 | popd 24 | 25 | # Add in the configuration files (from the common module) if they don't already exist 26 | mv --no-clobber "$SPARK_INSTALL"/conf/* "$SPARK_HOME"/conf/ 27 | 28 | # Make everything under the spark directory accessible to the group 29 | chown 185:0 $SPARK_INSTALL/distro && chmod g+rwX $SPARK_INSTALL/distro 30 | 31 | # Search for the spark entrypoint file and copy it to $SPARK_INSTALL 32 | entry=$(find $SPARK_HOME/kubernetes -name entrypoint.sh) 33 | if [ -n "$entry" ]; then 34 | cp $entry $SPARK_INSTALL 35 | 36 | # We want to get rid of the tini invocation 37 | sed -i "s@exec .*/tini -s --@exec@" $SPARK_INSTALL/entrypoint.sh 38 | fi 39 | -------------------------------------------------------------------------------- /modules/spark/module.yaml: -------------------------------------------------------------------------------- 1 | version: 1.0 2 | 3 | name: spark 4 | envs: 5 | - name: PATH 6 | value: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin 7 | - name: SPARK_HOME 8 | value: /opt/spark 9 | - name: SPARK_INSTALL 10 | value: /opt/spark-distro 11 | packages: 12 | install: 13 | - wget 14 | artifacts: 15 | - url: https://archive.apache.org/dist/spark/spark-3.0.1/spark-3.0.1-bin-hadoop3.2.tgz 16 | md5: 31e019e35e75a4c55c7efa4464641bf1 17 | execute: 18 | - script: install 19 | -------------------------------------------------------------------------------- /openshift-spark-build-inc/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Red Hat 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # ------------------------------------------------------------------------ 16 | # 17 | # This is a Dockerfile for the radanalyticsio/openshift-spark-inc:3.0 image. 18 | 19 | 20 | ## START target image radanalyticsio/openshift-spark-inc:3.0 21 | ## \ 22 | FROM centos:8 23 | 24 | USER root 25 | 26 | ###### START module 'common:1.0' 27 | ###### \ 28 | # Copy 'common' module content 29 | COPY modules/common /tmp/scripts/common 30 | # Switch to 'root' user to install 'common' module defined packages 31 | USER root 32 | # Install packages defined in the 'common' module 33 | RUN yum --setopt=tsflags=nodocs install -y python36 \ 34 | && rpm -q python36 35 | # Set 'common' module defined environment variables 36 | ENV \ 37 | SPARK_INSTALL="/opt/spark-distro" 38 | # Custom scripts from 'common' module 39 | USER root 40 | RUN [ "sh", "-x", "/tmp/scripts/common/install" ] 41 | ###### / 42 | ###### END module 'common:1.0' 43 | 44 | ###### START module 'metrics:1.0' 45 | ###### \ 46 | # Copy 'metrics' module content 47 | COPY modules/metrics /tmp/scripts/metrics 48 | # Custom scripts from 'metrics' module 49 | USER root 50 | RUN [ "sh", "-x", "/tmp/scripts/metrics/install" ] 51 | ###### / 52 | ###### END module 'metrics:1.0' 53 | 54 | ###### START module 's2i:1.0' 55 | ###### \ 56 | # Copy 's2i' module content 57 | COPY modules/s2i /tmp/scripts/s2i 58 | # Switch to 'root' user to install 's2i' module defined packages 59 | USER root 60 | # Install packages defined in the 's2i' module 61 | RUN yum --setopt=tsflags=nodocs install -y wget \ 62 | && rpm -q wget 63 | # Set 's2i' module defined environment variables 64 | ENV \ 65 | PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin" \ 66 | SPARK_HOME="/opt/spark" \ 67 | SPARK_INSTALL="/opt/spark-distro" \ 68 | STI_SCRIPTS_PATH="/usr/libexec/s2i" 69 | # Custom scripts from 's2i' module 70 | USER root 71 | RUN [ "sh", "-x", "/tmp/scripts/s2i/install" ] 72 | ###### / 73 | ###### END module 's2i:1.0' 74 | 75 | ###### START image 'radanalyticsio/openshift-spark-inc:3.0' 76 | ###### \ 77 | # Switch to 'root' user to install 'radanalyticsio/openshift-spark-inc' image defined packages 78 | USER root 79 | # Install packages defined in the 'radanalyticsio/openshift-spark-inc' image 80 | RUN yum --setopt=tsflags=nodocs install -y java-11-openjdk rsync \ 81 | && rpm -q java-11-openjdk rsync 82 | # Set 'radanalyticsio/openshift-spark-inc' image defined environment variables 83 | ENV \ 84 | JBOSS_IMAGE_NAME="radanalyticsio/openshift-spark-inc" \ 85 | JBOSS_IMAGE_VERSION="3.0" 86 | # Set 'radanalyticsio/openshift-spark-inc' image defined labels 87 | LABEL \ 88 | io.cekit.version="3.6.0" \ 89 | io.openshift.s2i.scripts-url="image:///usr/libexec/s2i" \ 90 | maintainer="Trevor McKay " \ 91 | name="radanalyticsio/openshift-spark-inc" \ 92 | version="3.0" 93 | ###### / 94 | ###### END image 'radanalyticsio/openshift-spark-inc:3.0' 95 | 96 | 97 | # Switch to 'root' user and remove artifacts and modules 98 | USER root 99 | RUN [ ! -d /tmp/scripts ] || rm -rf /tmp/scripts 100 | RUN [ ! -d /tmp/artifacts ] || rm -rf /tmp/artifacts 101 | 102 | # Clear package manager metadata 103 | RUN yum clean all && [ ! -d /var/cache/yum ] || rm -rf /var/cache/yum 104 | 105 | # Define the user 106 | USER 185 107 | # Define entrypoint 108 | ENTRYPOINT ["/entrypoint"] 109 | # Define run cmd 110 | CMD ["/usr/libexec/s2i/usage"] 111 | ## / 112 | ## END target image -------------------------------------------------------------------------------- /openshift-spark-build-inc/modules/common/added/conf/agent-config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | lowercaseOutputName: true 3 | lowercaseOutputLabelNames: true 4 | whitelistObjectNames: 5 | - 'metrics:*' 6 | 7 | rules: 8 | 9 | # These come from the master 10 | # Example: master.aliveWorkers 11 | - pattern: "metrics<>Value" 12 | name: spark_master_$1 13 | 14 | # These come from the worker 15 | # Example: worker.coresFree 16 | - pattern: "metrics<>Value" 17 | name: spark_worker_$1 18 | 19 | # These come from the application driver 20 | # Example: app-20160809000059-0000.driver.DAGScheduler.stage.failedStages 21 | - pattern: "metrics<>Value" 22 | name: spark_driver_$2_$3 23 | labels: 24 | app_id: "$1" 25 | 26 | # These come from the application driver if it's a streaming application 27 | # Example: app-20160809000059-0000.driver.com.example.ClassName.StreamingMetrics.streaming.lastCompletedBatch_schedulingDelay 28 | - pattern: "metrics<>Value" 29 | name: spark_streaming_driver_$3 30 | labels: 31 | app_id: "$1" 32 | app_name: "$2" 33 | 34 | # These come from the application driver if it's a structured streaming application 35 | # Example: app-20160809000059-0000.driver.spark.streaming.QueryName.inputRate-total 36 | - pattern: "metrics<>Value" 37 | name: spark_structured_streaming_driver_$3 38 | labels: 39 | app_id: "$1" 40 | query_name: "$2" 41 | 42 | # These come from the application executors 43 | # Example: app-20160809000059-0000.0.executor.threadpool.activeTasks 44 | - pattern: "metrics<>Value" 45 | name: spark_executor_$3 46 | labels: 47 | app_id: "$1" 48 | executor_id: "$2" 49 | 50 | # These come from the master 51 | # Example: application.com.example.ClassName.1470700859054.cores 52 | - pattern: "metrics<>Value" 53 | name: spark_application_$3 54 | labels: 55 | app_name: "$1" 56 | app_start_epoch: "$2" 57 | -------------------------------------------------------------------------------- /openshift-spark-build-inc/modules/common/added/conf/agent.properties: -------------------------------------------------------------------------------- 1 | jmx_exporter=7777:/opt/spark/conf/agent-config.yaml 2 | -------------------------------------------------------------------------------- /openshift-spark-build-inc/modules/common/added/conf/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Set everything to be logged to the console 19 | log4j.rootCategory=INFO, console 20 | log4j.appender.console=org.apache.log4j.ConsoleAppender 21 | log4j.appender.console.target=System.err 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 24 | 25 | # Set the default spark-shell log level to WARN. When running the spark-shell, the 26 | # log level for this class is used to overwrite the root logger's log level, so that 27 | # the user can have different defaults for the shell and regular Spark apps. 28 | log4j.logger.org.apache.spark.repl.Main=WARN 29 | 30 | # Settings to quiet third party logs that are too verbose 31 | log4j.logger.org.spark_project.jetty=WARN 32 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 33 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 34 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 35 | log4j.logger.org.apache.parquet=ERROR 36 | log4j.logger.parquet=ERROR 37 | 38 | # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support 39 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 40 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 41 | -------------------------------------------------------------------------------- /openshift-spark-build-inc/modules/common/added/conf/metrics.properties: -------------------------------------------------------------------------------- 1 | *.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink 2 | master.source.jvm.class=org.apache.spark.metrics.source.JvmSource 3 | worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource 4 | driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource 5 | executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource 6 | application.source.jvm.class=org.apache.spark.metrics.source.JvmSource 7 | -------------------------------------------------------------------------------- /openshift-spark-build-inc/modules/common/added/conf/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Default system properties included when running spark-submit. 19 | # This is useful for setting default environmental settings. 20 | 21 | # Example: 22 | # spark.master spark://master:7077 23 | # spark.eventLog.enabled true 24 | # spark.eventLog.dir hdfs://namenode:8021/directory 25 | # spark.serializer org.apache.spark.serializer.KryoSerializer 26 | # spark.driver.memory 5g 27 | # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" 28 | spark.ui.reverseProxy true 29 | spark.ui.reverseProxyUrl / 30 | -------------------------------------------------------------------------------- /openshift-spark-build-inc/modules/common/added/scripts/entrypoint: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # If we get an s2i command and it's anything but "run" just do it 4 | # Otherwise we'll turn it into a launch 5 | if [[ -n "$STI_SCRIPTS_PATH" ]] && [[ $@ == *"$STI_SCRIPTS_PATH"* ]]; then 6 | if ! [[ $@ == *"$STI_SCRIPTS_PATH"/run* ]]; then 7 | exec "$@" 8 | exit $? 9 | fi 10 | CMD=/launch.sh 11 | 12 | # allow just a simple "usage" command to print the usage script 13 | elif [[ -n "$STI_SCRIPTS_PATH" ]] && [[ $@ == "usage" ]]; then 14 | exec $STI_SCRIPTS_PATH/usage 15 | exit $? 16 | else 17 | CMD=$@ 18 | fi 19 | 20 | trap handle_term TERM INT 21 | 22 | function handle_term { 23 | echo Received a termination signal 24 | 25 | local cnt 26 | local killed=1 27 | if [ -n "$PID" ]; then 28 | echo "Stopping subprocess $PID" 29 | kill -TERM $PID 30 | for cnt in {1..10} 31 | do 32 | kill -0 $PID >/dev/null 2>&1 33 | if [ "$?" -ne 0 ]; then 34 | killed=0 35 | break 36 | else 37 | sleep 1 38 | fi 39 | done 40 | if [ "$killed" -ne 0 ]; then 41 | echo Process is still running 10 seconds after TERM, sending KILL 42 | kill -9 $PID 43 | fi 44 | wait $PID 45 | echo "Subprocess stopped" 46 | fi 47 | exit 0 48 | } 49 | 50 | function patch_uid { 51 | # Check whether there is a passwd entry for the container UID 52 | myuid=$(id -u) 53 | mygid=$(id -g) 54 | uidentry=$(getent passwd $myuid) 55 | 56 | # If there is no passwd entry for the container UID, attempt to create one 57 | if [ -z "$uidentry" ] ; then 58 | if [ -w /etc/passwd ] ; then 59 | echo "$myuid:x:$myuid:$mygid:anonymous uid:$SPARK_HOME:/bin/false" >> /etc/passwd 60 | else 61 | echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID" 62 | fi 63 | fi 64 | } 65 | 66 | # If we receive a spark-on-kube command, hand it off to the 67 | # standard spark entrypoint 68 | case "$1" in 69 | driver | executor) 70 | $SPARK_INSTALL/entrypoint.sh $CMD & 71 | ;; 72 | *) 73 | patch_uid 74 | $CMD & 75 | ;; 76 | esac 77 | PID=$! 78 | wait $PID 79 | -------------------------------------------------------------------------------- /openshift-spark-build-inc/modules/common/added/scripts/launch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function check_reverse_proxy { 4 | grep -e "^spark\.ui\.reverseProxy" $SPARK_HOME/conf/spark-defaults.conf &> /dev/null 5 | if [ "$?" -ne 0 ]; then 6 | echo "Appending default reverse proxy config to spark-defaults.conf" 7 | echo "spark.ui.reverseProxy true" >> $SPARK_HOME/conf/spark-defaults.conf 8 | echo "spark.ui.reverseProxyUrl /" >> $SPARK_HOME/conf/spark-defaults.conf 9 | fi 10 | } 11 | 12 | # If the UPDATE_SPARK_CONF_DIR dir is non-empty, 13 | # copy the contents to $SPARK_HOME/conf 14 | if [ -d "$UPDATE_SPARK_CONF_DIR" ]; then 15 | sparkconfs=$(ls -1 $UPDATE_SPARK_CONF_DIR | wc -l) 16 | if [ "$sparkconfs" -ne "0" ]; then 17 | echo "Copying from $UPDATE_SPARK_CONF_DIR to $SPARK_HOME/conf" 18 | ls -1 $UPDATE_SPARK_CONF_DIR 19 | cp $UPDATE_SPARK_CONF_DIR/* $SPARK_HOME/conf 20 | fi 21 | elif [ -n "$UPDATE_SPARK_CONF_DIR" ]; then 22 | echo "Directory $UPDATE_SPARK_CONF_DIR does not exist, using default spark config" 23 | fi 24 | 25 | check_reverse_proxy 26 | 27 | if [ -z ${SPARK_METRICS_ON+_} ]; then 28 | JAVA_AGENT= 29 | metrics="" 30 | elif [ ${SPARK_METRICS_ON} == "prometheus" ]; then 31 | JAVA_AGENT=" -javaagent:/opt/metrics/agent-bond.jar=$SPARK_HOME/conf/agent.properties" 32 | metrics=" with prometheus metrics enabled" 33 | else 34 | JAVA_AGENT=" -javaagent:/opt/metrics/jolokia-jvm-1.3.6-agent.jar=port=7777,host=0.0.0.0" 35 | metrics=" with jolokia metrics enabled (deprecated, set SPARK_METRICS_ON to 'prometheus')" 36 | fi 37 | 38 | if [ -z ${SPARK_MASTER_ADDRESS+_} ]; then 39 | echo "Starting master$metrics" 40 | exec $SPARK_HOME/bin/spark-class$JAVA_AGENT org.apache.spark.deploy.master.Master 41 | else 42 | echo "Starting worker$metrics, will connect to: $SPARK_MASTER_ADDRESS" 43 | 44 | # spark://x.y.z:7077 -> x.y.z/7077 45 | _MASTER_HOST_AND_PORT=$(echo $SPARK_MASTER_ADDRESS | sed -r 's;.*//(.*):(.*);\1/\2;g') 46 | while true; do 47 | echo "Waiting for spark master to be available ..." 48 | timeout 1 sh -c "(/dev/null" 49 | if [ $? -eq 0 ]; then 50 | break 51 | fi 52 | sleep 1 53 | done 54 | exec $SPARK_HOME/bin/spark-class$JAVA_AGENT org.apache.spark.deploy.worker.Worker $SPARK_MASTER_ADDRESS 55 | fi 56 | -------------------------------------------------------------------------------- /openshift-spark-build-inc/modules/common/install: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | SCRIPT_DIR=$(dirname $0) 6 | ADDED_DIR=${SCRIPT_DIR}/added 7 | 8 | # Put entrypoint and launch.sh at the root 9 | cp $ADDED_DIR/scripts/* / 10 | 11 | # Set up a place for spark to go 12 | # We'll also stage our default spark config files here 13 | # so that when spark is installed they can be copied over 14 | # if the spark tarball itself does not include files of the same name 15 | # (ie, "copy if not overwrite") 16 | if ! [ -d $SPARK_INSTALL ]; then 17 | mkdir -p $SPARK_INSTALL 18 | mv $ADDED_DIR/conf $SPARK_INSTALL 19 | chown -R 185:0 $SPARK_INSTALL && chmod -R g+rwX $SPARK_INSTALL 20 | ln -sfn $SPARK_INSTALL/distro /opt/spark 21 | fi 22 | 23 | # Change the permissions on /etc/passwd so that anonymous user 24 | # can be added to satisfy Spark 25 | chgrp root /etc/passwd && chmod g+rw /etc/passwd 26 | 27 | # Make Python3 the default. This is important because it seems 28 | # that Spark still wants to invoke python scripts with "python" 29 | # in the executors, as opposed to "python3" 30 | alternatives --set python /usr/bin/python3 31 | -------------------------------------------------------------------------------- /openshift-spark-build-inc/modules/common/module.yaml: -------------------------------------------------------------------------------- 1 | artifacts: [] 2 | envs: 3 | - name: SPARK_INSTALL 4 | value: /opt/spark-distro 5 | execute: 6 | - directory: common 7 | module_name: common 8 | name: common/install 9 | script: install 10 | labels: [] 11 | modules: 12 | install: [] 13 | repositories: [] 14 | name: common 15 | osbs: 16 | configuration: {} 17 | repository: {} 18 | packages: 19 | install: 20 | - python36 21 | repositories: [] 22 | ports: [] 23 | version: 1.0 24 | volumes: [] 25 | -------------------------------------------------------------------------------- /openshift-spark-build-inc/modules/metrics/added/agent-bond.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radanalyticsio/openshift-spark/f360e822b4c4859e717791cf39cb9682dc242c5f/openshift-spark-build-inc/modules/metrics/added/agent-bond.jar -------------------------------------------------------------------------------- /openshift-spark-build-inc/modules/metrics/added/jolokia-jvm-1.3.6-agent.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radanalyticsio/openshift-spark/f360e822b4c4859e717791cf39cb9682dc242c5f/openshift-spark-build-inc/modules/metrics/added/jolokia-jvm-1.3.6-agent.jar -------------------------------------------------------------------------------- /openshift-spark-build-inc/modules/metrics/install: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -u 3 | set -e 4 | 5 | SCRIPT_DIR=$(dirname $0) 6 | ADDED_DIR=${SCRIPT_DIR}/added 7 | mkdir -p /opt/metrics 8 | mv $ADDED_DIR/*.jar /opt/metrics 9 | 10 | chown -R 185:0 /opt/metrics && chmod g+rwX /opt/metrics 11 | -------------------------------------------------------------------------------- /openshift-spark-build-inc/modules/metrics/module.yaml: -------------------------------------------------------------------------------- 1 | artifacts: [] 2 | envs: [] 3 | execute: 4 | - directory: metrics 5 | module_name: metrics 6 | name: metrics/install 7 | script: install 8 | labels: [] 9 | modules: 10 | install: [] 11 | repositories: [] 12 | name: metrics 13 | osbs: 14 | configuration: {} 15 | repository: {} 16 | packages: 17 | install: [] 18 | repositories: [] 19 | ports: [] 20 | version: 1.0 21 | volumes: [] 22 | -------------------------------------------------------------------------------- /openshift-spark-build-inc/modules/s2i/added/assemble: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source $STI_SCRIPTS_PATH/s2i-env-vars 4 | 5 | # Just a word about the directory structure 6 | # SPARK_HOME == /opt/spark 7 | # SPARK_INSTALL == /opt/spark-distro 8 | 9 | # Extra things like default configuration files and additional 10 | # boot scripts may be stored in SPARK_INSTALL 11 | 12 | # At runtime, /opt/spark is a symlink to /opt/spark-distro/distro 13 | # but /opt/spark-distro/distro does not actually exist yet 14 | 15 | # The Spark tarball will be expanded in /opt/spark-distro using 16 | # it's original name, for example /opt/spark-distro/spark-2.3.0-bin-hadoop2.7, 17 | # as a dev aid to tracking and version checking 18 | 19 | # Ultimately, /opt/spark-distro/distro is created as a symlink to the Spark root 20 | # directory. This double-hop from /opt/spark to the Spark root through symlinks 21 | # allows the Spark installation to be staged in the base image but completed in 22 | # the S2I build without expanding permissions 23 | 24 | function match_sum { 25 | local sumfile=$1 26 | local delim=$2 27 | local sha512=$3 28 | local initial=$(cat $sumfile | tr -d [:space:] | cut -d"$delim" -f1 | tr [:upper:] [:lower:]) 29 | local rest=$(cat $sumfile | tr -d [:space:] | cut -d"$delim" --complement -f1 | tr [:upper:] [:lower:]) 30 | if [ "$sha512" == "$initial" ] || [ "$sha512" == "$rest" ]; then 31 | return 0 32 | fi 33 | return 1 34 | } 35 | 36 | if [ -f $SPARK_HOME/bin/spark-submit ]; then 37 | echo "Spark is installed, nothing to do" 38 | exit 1 39 | else 40 | echo "Attempting to install Spark" 41 | # If a url has been specfified for spark use it 42 | if [ -n "$SPARK_URL" ]; then 43 | echo Downloading $SPARK_URL 44 | wget $SPARK_URL -P $S2I_SOURCE_DIR 45 | fi 46 | if [ -n "$SPARK_SHA512_URL" ]; then 47 | echo Downloading $SPARK_SHA512_URL 48 | wget $SPARK_SHA512_URL -P $S2I_SOURCE_DIR 49 | fi 50 | 51 | for spark in $(ls "$S2I_SOURCE_DIR"); do 52 | 53 | spark=$S2I_SOURCE_DIR/$spark 54 | echo Found $spark 55 | echo Checking for valid Spark archive 56 | 57 | # Is the file a directory? If it contains spark-submit, move it 58 | if [ -d "$spark" ]; then 59 | if ! [ -f $spark/bin/spark-submit ]; then 60 | echo Ignoring directory $spark, no spark-submit 61 | continue 62 | fi 63 | echo Installing from directory $spark 64 | sparkdir=$SPARK_INSTALL/$(basename $spark) 65 | mv $spark $SPARK_INSTALL 66 | else 67 | # If we can get the table of contents, it's a tar archive, otherwise ignore 68 | tar -tf $spark &> /dev/null 69 | if [ "$?" -ne 0 ]; then 70 | echo Ignoring $spark, not a tar archive 71 | continue 72 | fi 73 | echo Validating tar archive $spark 74 | 75 | # Does the tarball contain a spark-submit? 76 | name=$(tar -tzf $spark | grep -e "^[^/]*/bin/spark-submit$") 77 | if [ "$?" -ne 0 ]; then 78 | echo Ignoring tarball $spark, no spark-submit 79 | continue 80 | else 81 | echo Found valid tar archive, matching checksums 82 | # See if we have an sha512 file to match against 83 | if [ -f "$spark".sha512 ]; then 84 | calcvalue=$(sha512sum "$spark" | cut -d\ -f1) 85 | # split the sha512 file using a colon 86 | match_sum "$spark".sha512 \: $calcvalue 87 | matched="$?" 88 | if [ "$matched" -ne 0 ]; then 89 | # split the sha512 file using equals sign in case it's BSD 90 | match_sum "$spark".sha512 \= $calcvalue 91 | matched="$?" 92 | fi 93 | if [ "$matched" -ne 0 ]; then 94 | echo Ignoring tarball $spark, sha512sum did not match 95 | continue 96 | fi 97 | fi 98 | 99 | # dname will be the intial directory from the path of spark-submit 100 | # we found in the tarball, ie the dir created by tar 101 | echo Installing from tarball $spark 102 | dname=$(dirname $name | cut -d/ -f 1) 103 | sparkdir=$SPARK_INSTALL/$dname 104 | tar -xzf $spark -C $SPARK_INSTALL 105 | fi 106 | fi 107 | 108 | ln -s $sparkdir $SPARK_INSTALL/distro 109 | 110 | # Search for the spark entrypoint file and copy it to $SPARK_INSTALL 111 | entry=$(find $sparkdir/kubernetes -name entrypoint.sh) 112 | if [ -n "$entry" ]; then 113 | echo Installing spark native entrypoint for use with spark-on-k8s commands 114 | cp $entry $SPARK_INSTALL 115 | 116 | # We want to get rid of the tini invocation 117 | sed -i "s@exec .*/tini -s --@exec@" $SPARK_INSTALL/entrypoint.sh 118 | else 119 | echo No spark native entrypoint found for use with spark-on-k8s commands 120 | fi 121 | 122 | # Include the default spark configuration files 123 | mv --no-clobber "$SPARK_INSTALL"/conf/* "$SPARK_HOME"/conf/ 124 | 125 | # If someone included mods in a parallel directory, install them with rsync 126 | # Don't try to preserve permisions, owner, or group because we don't have 127 | # any control over how s2i uploaded the files, so there's no use preserving. 128 | if [ -x /usr/bin/rsync ] && [ -d "$S2I_SOURCE_DIR/modify-spark" ]; then 129 | echo Found a modify-spark directory, running rsync to install changes 130 | rsync -vrltD "$S2I_SOURCE_DIR/modify-spark/" $SPARK_HOME 131 | fi 132 | 133 | # Spark workers need to write to the spark directory to track apps 134 | chmod -R g+rwX $sparkdir 135 | 136 | # Can we run spark-submit? 137 | $SPARK_HOME/bin/spark-submit --version 138 | if [ "$?" -eq 0 ]; then 139 | echo Spark installed successfully 140 | exit 0 141 | else 142 | echo Cannot run spark-submit, Spark install failed 143 | fi 144 | 145 | # Just in case there is more than one tarball, clean up 146 | rm -rf $sparkdir 147 | done 148 | 149 | echo no valid Spark distribution found 150 | 151 | if [ -n "$DEBUG_ASSEMBLE" ]; then 152 | echo Looping forever so you can \'oc rsh\' 153 | while true; do 154 | sleep 5 155 | done 156 | fi 157 | exit 1 158 | fi 159 | -------------------------------------------------------------------------------- /openshift-spark-build-inc/modules/s2i/added/s2i-env-vars: -------------------------------------------------------------------------------- 1 | # Local vars setup with defaults 2 | S2I_DESTINATION=${S2I_DESTINATION:-/tmp} 3 | S2I_SOURCE_DIR="${S2I_DESTINATION}/src" 4 | -------------------------------------------------------------------------------- /openshift-spark-build-inc/modules/s2i/added/usage: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ -f "$SPARK_HOME"/bin/spark-submit ]; then 3 | cat <<>Value" 12 | name: spark_master_$1 13 | 14 | # These come from the worker 15 | # Example: worker.coresFree 16 | - pattern: "metrics<>Value" 17 | name: spark_worker_$1 18 | 19 | # These come from the application driver 20 | # Example: app-20160809000059-0000.driver.DAGScheduler.stage.failedStages 21 | - pattern: "metrics<>Value" 22 | name: spark_driver_$2_$3 23 | labels: 24 | app_id: "$1" 25 | 26 | # These come from the application driver if it's a streaming application 27 | # Example: app-20160809000059-0000.driver.com.example.ClassName.StreamingMetrics.streaming.lastCompletedBatch_schedulingDelay 28 | - pattern: "metrics<>Value" 29 | name: spark_streaming_driver_$3 30 | labels: 31 | app_id: "$1" 32 | app_name: "$2" 33 | 34 | # These come from the application driver if it's a structured streaming application 35 | # Example: app-20160809000059-0000.driver.spark.streaming.QueryName.inputRate-total 36 | - pattern: "metrics<>Value" 37 | name: spark_structured_streaming_driver_$3 38 | labels: 39 | app_id: "$1" 40 | query_name: "$2" 41 | 42 | # These come from the application executors 43 | # Example: app-20160809000059-0000.0.executor.threadpool.activeTasks 44 | - pattern: "metrics<>Value" 45 | name: spark_executor_$3 46 | labels: 47 | app_id: "$1" 48 | executor_id: "$2" 49 | 50 | # These come from the master 51 | # Example: application.com.example.ClassName.1470700859054.cores 52 | - pattern: "metrics<>Value" 53 | name: spark_application_$3 54 | labels: 55 | app_name: "$1" 56 | app_start_epoch: "$2" 57 | -------------------------------------------------------------------------------- /openshift-spark-build/modules/common/added/conf/agent.properties: -------------------------------------------------------------------------------- 1 | jmx_exporter=7777:/opt/spark/conf/agent-config.yaml 2 | -------------------------------------------------------------------------------- /openshift-spark-build/modules/common/added/conf/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Set everything to be logged to the console 19 | log4j.rootCategory=INFO, console 20 | log4j.appender.console=org.apache.log4j.ConsoleAppender 21 | log4j.appender.console.target=System.err 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 24 | 25 | # Set the default spark-shell log level to WARN. When running the spark-shell, the 26 | # log level for this class is used to overwrite the root logger's log level, so that 27 | # the user can have different defaults for the shell and regular Spark apps. 28 | log4j.logger.org.apache.spark.repl.Main=WARN 29 | 30 | # Settings to quiet third party logs that are too verbose 31 | log4j.logger.org.spark_project.jetty=WARN 32 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 33 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 34 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 35 | log4j.logger.org.apache.parquet=ERROR 36 | log4j.logger.parquet=ERROR 37 | 38 | # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support 39 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 40 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 41 | -------------------------------------------------------------------------------- /openshift-spark-build/modules/common/added/conf/metrics.properties: -------------------------------------------------------------------------------- 1 | *.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink 2 | master.source.jvm.class=org.apache.spark.metrics.source.JvmSource 3 | worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource 4 | driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource 5 | executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource 6 | application.source.jvm.class=org.apache.spark.metrics.source.JvmSource 7 | -------------------------------------------------------------------------------- /openshift-spark-build/modules/common/added/conf/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Default system properties included when running spark-submit. 19 | # This is useful for setting default environmental settings. 20 | 21 | # Example: 22 | # spark.master spark://master:7077 23 | # spark.eventLog.enabled true 24 | # spark.eventLog.dir hdfs://namenode:8021/directory 25 | # spark.serializer org.apache.spark.serializer.KryoSerializer 26 | # spark.driver.memory 5g 27 | # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" 28 | spark.ui.reverseProxy true 29 | spark.ui.reverseProxyUrl / 30 | -------------------------------------------------------------------------------- /openshift-spark-build/modules/common/added/scripts/entrypoint: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # If we get an s2i command and it's anything but "run" just do it 4 | # Otherwise we'll turn it into a launch 5 | if [[ -n "$STI_SCRIPTS_PATH" ]] && [[ $@ == *"$STI_SCRIPTS_PATH"* ]]; then 6 | if ! [[ $@ == *"$STI_SCRIPTS_PATH"/run* ]]; then 7 | exec "$@" 8 | exit $? 9 | fi 10 | CMD=/launch.sh 11 | 12 | # allow just a simple "usage" command to print the usage script 13 | elif [[ -n "$STI_SCRIPTS_PATH" ]] && [[ $@ == "usage" ]]; then 14 | exec $STI_SCRIPTS_PATH/usage 15 | exit $? 16 | else 17 | CMD=$@ 18 | fi 19 | 20 | trap handle_term TERM INT 21 | 22 | function handle_term { 23 | echo Received a termination signal 24 | 25 | local cnt 26 | local killed=1 27 | if [ -n "$PID" ]; then 28 | echo "Stopping subprocess $PID" 29 | kill -TERM $PID 30 | for cnt in {1..10} 31 | do 32 | kill -0 $PID >/dev/null 2>&1 33 | if [ "$?" -ne 0 ]; then 34 | killed=0 35 | break 36 | else 37 | sleep 1 38 | fi 39 | done 40 | if [ "$killed" -ne 0 ]; then 41 | echo Process is still running 10 seconds after TERM, sending KILL 42 | kill -9 $PID 43 | fi 44 | wait $PID 45 | echo "Subprocess stopped" 46 | fi 47 | exit 0 48 | } 49 | 50 | function patch_uid { 51 | # Check whether there is a passwd entry for the container UID 52 | myuid=$(id -u) 53 | mygid=$(id -g) 54 | uidentry=$(getent passwd $myuid) 55 | 56 | # If there is no passwd entry for the container UID, attempt to create one 57 | if [ -z "$uidentry" ] ; then 58 | if [ -w /etc/passwd ] ; then 59 | echo "$myuid:x:$myuid:$mygid:anonymous uid:$SPARK_HOME:/bin/false" >> /etc/passwd 60 | else 61 | echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID" 62 | fi 63 | fi 64 | } 65 | 66 | # If we receive a spark-on-kube command, hand it off to the 67 | # standard spark entrypoint 68 | case "$1" in 69 | driver | executor) 70 | $SPARK_INSTALL/entrypoint.sh $CMD & 71 | ;; 72 | *) 73 | patch_uid 74 | $CMD & 75 | ;; 76 | esac 77 | PID=$! 78 | wait $PID 79 | -------------------------------------------------------------------------------- /openshift-spark-build/modules/common/added/scripts/launch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function check_reverse_proxy { 4 | grep -e "^spark\.ui\.reverseProxy" $SPARK_HOME/conf/spark-defaults.conf &> /dev/null 5 | if [ "$?" -ne 0 ]; then 6 | echo "Appending default reverse proxy config to spark-defaults.conf" 7 | echo "spark.ui.reverseProxy true" >> $SPARK_HOME/conf/spark-defaults.conf 8 | echo "spark.ui.reverseProxyUrl /" >> $SPARK_HOME/conf/spark-defaults.conf 9 | fi 10 | } 11 | 12 | # If the UPDATE_SPARK_CONF_DIR dir is non-empty, 13 | # copy the contents to $SPARK_HOME/conf 14 | if [ -d "$UPDATE_SPARK_CONF_DIR" ]; then 15 | sparkconfs=$(ls -1 $UPDATE_SPARK_CONF_DIR | wc -l) 16 | if [ "$sparkconfs" -ne "0" ]; then 17 | echo "Copying from $UPDATE_SPARK_CONF_DIR to $SPARK_HOME/conf" 18 | ls -1 $UPDATE_SPARK_CONF_DIR 19 | cp $UPDATE_SPARK_CONF_DIR/* $SPARK_HOME/conf 20 | fi 21 | elif [ -n "$UPDATE_SPARK_CONF_DIR" ]; then 22 | echo "Directory $UPDATE_SPARK_CONF_DIR does not exist, using default spark config" 23 | fi 24 | 25 | check_reverse_proxy 26 | 27 | if [ -z ${SPARK_METRICS_ON+_} ]; then 28 | JAVA_AGENT= 29 | metrics="" 30 | elif [ ${SPARK_METRICS_ON} == "prometheus" ]; then 31 | JAVA_AGENT=" -javaagent:/opt/metrics/agent-bond.jar=$SPARK_HOME/conf/agent.properties" 32 | metrics=" with prometheus metrics enabled" 33 | else 34 | JAVA_AGENT=" -javaagent:/opt/metrics/jolokia-jvm-1.3.6-agent.jar=port=7777,host=0.0.0.0" 35 | metrics=" with jolokia metrics enabled (deprecated, set SPARK_METRICS_ON to 'prometheus')" 36 | fi 37 | 38 | if [ -z ${SPARK_MASTER_ADDRESS+_} ]; then 39 | echo "Starting master$metrics" 40 | exec $SPARK_HOME/bin/spark-class$JAVA_AGENT org.apache.spark.deploy.master.Master 41 | else 42 | echo "Starting worker$metrics, will connect to: $SPARK_MASTER_ADDRESS" 43 | 44 | # spark://x.y.z:7077 -> x.y.z/7077 45 | _MASTER_HOST_AND_PORT=$(echo $SPARK_MASTER_ADDRESS | sed -r 's;.*//(.*):(.*);\1/\2;g') 46 | while true; do 47 | echo "Waiting for spark master to be available ..." 48 | timeout 1 sh -c "(/dev/null" 49 | if [ $? -eq 0 ]; then 50 | break 51 | fi 52 | sleep 1 53 | done 54 | exec $SPARK_HOME/bin/spark-class$JAVA_AGENT org.apache.spark.deploy.worker.Worker $SPARK_MASTER_ADDRESS 55 | fi 56 | -------------------------------------------------------------------------------- /openshift-spark-build/modules/common/install: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | SCRIPT_DIR=$(dirname $0) 6 | ADDED_DIR=${SCRIPT_DIR}/added 7 | 8 | # Put entrypoint and launch.sh at the root 9 | cp $ADDED_DIR/scripts/* / 10 | 11 | # Set up a place for spark to go 12 | # We'll also stage our default spark config files here 13 | # so that when spark is installed they can be copied over 14 | # if the spark tarball itself does not include files of the same name 15 | # (ie, "copy if not overwrite") 16 | if ! [ -d $SPARK_INSTALL ]; then 17 | mkdir -p $SPARK_INSTALL 18 | mv $ADDED_DIR/conf $SPARK_INSTALL 19 | chown -R 185:0 $SPARK_INSTALL && chmod -R g+rwX $SPARK_INSTALL 20 | ln -sfn $SPARK_INSTALL/distro /opt/spark 21 | fi 22 | 23 | # Change the permissions on /etc/passwd so that anonymous user 24 | # can be added to satisfy Spark 25 | chgrp root /etc/passwd && chmod g+rw /etc/passwd 26 | 27 | # Make Python3 the default. This is important because it seems 28 | # that Spark still wants to invoke python scripts with "python" 29 | # in the executors, as opposed to "python3" 30 | alternatives --set python /usr/bin/python3 31 | -------------------------------------------------------------------------------- /openshift-spark-build/modules/common/module.yaml: -------------------------------------------------------------------------------- 1 | artifacts: [] 2 | envs: 3 | - name: SPARK_INSTALL 4 | value: /opt/spark-distro 5 | execute: 6 | - directory: common 7 | module_name: common 8 | name: common/install 9 | script: install 10 | labels: [] 11 | modules: 12 | install: [] 13 | repositories: [] 14 | name: common 15 | osbs: 16 | configuration: {} 17 | repository: {} 18 | packages: 19 | install: 20 | - python36 21 | repositories: [] 22 | ports: [] 23 | version: 1.0 24 | volumes: [] 25 | -------------------------------------------------------------------------------- /openshift-spark-build/modules/metrics/added/agent-bond.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radanalyticsio/openshift-spark/f360e822b4c4859e717791cf39cb9682dc242c5f/openshift-spark-build/modules/metrics/added/agent-bond.jar -------------------------------------------------------------------------------- /openshift-spark-build/modules/metrics/added/jolokia-jvm-1.3.6-agent.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radanalyticsio/openshift-spark/f360e822b4c4859e717791cf39cb9682dc242c5f/openshift-spark-build/modules/metrics/added/jolokia-jvm-1.3.6-agent.jar -------------------------------------------------------------------------------- /openshift-spark-build/modules/metrics/install: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -u 3 | set -e 4 | 5 | SCRIPT_DIR=$(dirname $0) 6 | ADDED_DIR=${SCRIPT_DIR}/added 7 | mkdir -p /opt/metrics 8 | mv $ADDED_DIR/*.jar /opt/metrics 9 | 10 | chown -R 185:0 /opt/metrics && chmod g+rwX /opt/metrics 11 | -------------------------------------------------------------------------------- /openshift-spark-build/modules/metrics/module.yaml: -------------------------------------------------------------------------------- 1 | artifacts: [] 2 | envs: [] 3 | execute: 4 | - directory: metrics 5 | module_name: metrics 6 | name: metrics/install 7 | script: install 8 | labels: [] 9 | modules: 10 | install: [] 11 | repositories: [] 12 | name: metrics 13 | osbs: 14 | configuration: {} 15 | repository: {} 16 | packages: 17 | install: [] 18 | repositories: [] 19 | ports: [] 20 | version: 1.0 21 | volumes: [] 22 | -------------------------------------------------------------------------------- /openshift-spark-build/modules/s2i/added/assemble: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source $STI_SCRIPTS_PATH/s2i-env-vars 4 | 5 | # Just a word about the directory structure 6 | # SPARK_HOME == /opt/spark 7 | # SPARK_INSTALL == /opt/spark-distro 8 | 9 | # Extra things like default configuration files and additional 10 | # boot scripts may be stored in SPARK_INSTALL 11 | 12 | # At runtime, /opt/spark is a symlink to /opt/spark-distro/distro 13 | # but /opt/spark-distro/distro does not actually exist yet 14 | 15 | # The Spark tarball will be expanded in /opt/spark-distro using 16 | # it's original name, for example /opt/spark-distro/spark-2.3.0-bin-hadoop2.7, 17 | # as a dev aid to tracking and version checking 18 | 19 | # Ultimately, /opt/spark-distro/distro is created as a symlink to the Spark root 20 | # directory. This double-hop from /opt/spark to the Spark root through symlinks 21 | # allows the Spark installation to be staged in the base image but completed in 22 | # the S2I build without expanding permissions 23 | 24 | function match_sum { 25 | local sumfile=$1 26 | local delim=$2 27 | local sha512=$3 28 | local initial=$(cat $sumfile | tr -d [:space:] | cut -d"$delim" -f1 | tr [:upper:] [:lower:]) 29 | local rest=$(cat $sumfile | tr -d [:space:] | cut -d"$delim" --complement -f1 | tr [:upper:] [:lower:]) 30 | if [ "$sha512" == "$initial" ] || [ "$sha512" == "$rest" ]; then 31 | return 0 32 | fi 33 | return 1 34 | } 35 | 36 | if [ -f $SPARK_HOME/bin/spark-submit ]; then 37 | echo "Spark is installed, nothing to do" 38 | exit 1 39 | else 40 | echo "Attempting to install Spark" 41 | # If a url has been specfified for spark use it 42 | if [ -n "$SPARK_URL" ]; then 43 | echo Downloading $SPARK_URL 44 | wget $SPARK_URL -P $S2I_SOURCE_DIR 45 | fi 46 | if [ -n "$SPARK_SHA512_URL" ]; then 47 | echo Downloading $SPARK_SHA512_URL 48 | wget $SPARK_SHA512_URL -P $S2I_SOURCE_DIR 49 | fi 50 | 51 | for spark in $(ls "$S2I_SOURCE_DIR"); do 52 | 53 | spark=$S2I_SOURCE_DIR/$spark 54 | echo Found $spark 55 | echo Checking for valid Spark archive 56 | 57 | # Is the file a directory? If it contains spark-submit, move it 58 | if [ -d "$spark" ]; then 59 | if ! [ -f $spark/bin/spark-submit ]; then 60 | echo Ignoring directory $spark, no spark-submit 61 | continue 62 | fi 63 | echo Installing from directory $spark 64 | sparkdir=$SPARK_INSTALL/$(basename $spark) 65 | mv $spark $SPARK_INSTALL 66 | else 67 | # If we can get the table of contents, it's a tar archive, otherwise ignore 68 | tar -tf $spark &> /dev/null 69 | if [ "$?" -ne 0 ]; then 70 | echo Ignoring $spark, not a tar archive 71 | continue 72 | fi 73 | echo Validating tar archive $spark 74 | 75 | # Does the tarball contain a spark-submit? 76 | name=$(tar -tzf $spark | grep -e "^[^/]*/bin/spark-submit$") 77 | if [ "$?" -ne 0 ]; then 78 | echo Ignoring tarball $spark, no spark-submit 79 | continue 80 | else 81 | echo Found valid tar archive, matching checksums 82 | # See if we have an sha512 file to match against 83 | if [ -f "$spark".sha512 ]; then 84 | calcvalue=$(sha512sum "$spark" | cut -d\ -f1) 85 | # split the sha512 file using a colon 86 | match_sum "$spark".sha512 \: $calcvalue 87 | matched="$?" 88 | if [ "$matched" -ne 0 ]; then 89 | # split the sha512 file using equals sign in case it's BSD 90 | match_sum "$spark".sha512 \= $calcvalue 91 | matched="$?" 92 | fi 93 | if [ "$matched" -ne 0 ]; then 94 | echo Ignoring tarball $spark, sha512sum did not match 95 | continue 96 | fi 97 | fi 98 | 99 | # dname will be the intial directory from the path of spark-submit 100 | # we found in the tarball, ie the dir created by tar 101 | echo Installing from tarball $spark 102 | dname=$(dirname $name | cut -d/ -f 1) 103 | sparkdir=$SPARK_INSTALL/$dname 104 | tar -xzf $spark -C $SPARK_INSTALL 105 | fi 106 | fi 107 | 108 | ln -s $sparkdir $SPARK_INSTALL/distro 109 | 110 | # Search for the spark entrypoint file and copy it to $SPARK_INSTALL 111 | entry=$(find $sparkdir/kubernetes -name entrypoint.sh) 112 | if [ -n "$entry" ]; then 113 | echo Installing spark native entrypoint for use with spark-on-k8s commands 114 | cp $entry $SPARK_INSTALL 115 | 116 | # We want to get rid of the tini invocation 117 | sed -i "s@exec .*/tini -s --@exec@" $SPARK_INSTALL/entrypoint.sh 118 | else 119 | echo No spark native entrypoint found for use with spark-on-k8s commands 120 | fi 121 | 122 | # Include the default spark configuration files 123 | mv --no-clobber "$SPARK_INSTALL"/conf/* "$SPARK_HOME"/conf/ 124 | 125 | # If someone included mods in a parallel directory, install them with rsync 126 | # Don't try to preserve permisions, owner, or group because we don't have 127 | # any control over how s2i uploaded the files, so there's no use preserving. 128 | if [ -x /usr/bin/rsync ] && [ -d "$S2I_SOURCE_DIR/modify-spark" ]; then 129 | echo Found a modify-spark directory, running rsync to install changes 130 | rsync -vrltD "$S2I_SOURCE_DIR/modify-spark/" $SPARK_HOME 131 | fi 132 | 133 | # Spark workers need to write to the spark directory to track apps 134 | chmod -R g+rwX $sparkdir 135 | 136 | # Can we run spark-submit? 137 | $SPARK_HOME/bin/spark-submit --version 138 | if [ "$?" -eq 0 ]; then 139 | echo Spark installed successfully 140 | exit 0 141 | else 142 | echo Cannot run spark-submit, Spark install failed 143 | fi 144 | 145 | # Just in case there is more than one tarball, clean up 146 | rm -rf $sparkdir 147 | done 148 | 149 | echo no valid Spark distribution found 150 | 151 | if [ -n "$DEBUG_ASSEMBLE" ]; then 152 | echo Looping forever so you can \'oc rsh\' 153 | while true; do 154 | sleep 5 155 | done 156 | fi 157 | exit 1 158 | fi 159 | -------------------------------------------------------------------------------- /openshift-spark-build/modules/s2i/added/s2i-env-vars: -------------------------------------------------------------------------------- 1 | # Local vars setup with defaults 2 | S2I_DESTINATION=${S2I_DESTINATION:-/tmp} 3 | S2I_SOURCE_DIR="${S2I_DESTINATION}/src" 4 | -------------------------------------------------------------------------------- /openshift-spark-build/modules/s2i/added/usage: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ -f "$SPARK_HOME"/bin/spark-submit ]; then 3 | cat <> /etc/passwd 33 | else 34 | echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID" 35 | fi 36 | fi 37 | 38 | SPARK_K8S_CMD="$1" 39 | if [ -z "$SPARK_K8S_CMD" ]; then 40 | echo "No command to execute has been provided." 1>&2 41 | exit 1 42 | fi 43 | shift 1 44 | 45 | SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*" 46 | env | grep SPARK_JAVA_OPT_ | sed 's/[^=]*=\(.*\)/\1/g' > /tmp/java_opts.txt 47 | readarray -t SPARK_JAVA_OPTS < /tmp/java_opts.txt 48 | if [ -n "$SPARK_MOUNTED_CLASSPATH" ]; then 49 | SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_MOUNTED_CLASSPATH" 50 | fi 51 | if [ -n "$SPARK_MOUNTED_FILES_DIR" ]; then 52 | cp -R "$SPARK_MOUNTED_FILES_DIR/." . 53 | fi 54 | 55 | case "$SPARK_K8S_CMD" in 56 | driver) 57 | CMD=( 58 | ${JAVA_HOME}/bin/java 59 | "${SPARK_JAVA_OPTS[@]}" 60 | -cp "$SPARK_CLASSPATH" 61 | -Xms$SPARK_DRIVER_MEMORY 62 | -Xmx$SPARK_DRIVER_MEMORY 63 | -Dspark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS 64 | $SPARK_DRIVER_CLASS 65 | $SPARK_DRIVER_ARGS 66 | ) 67 | ;; 68 | 69 | executor) 70 | CMD=( 71 | ${JAVA_HOME}/bin/java 72 | "${SPARK_JAVA_OPTS[@]}" 73 | -Xms$SPARK_EXECUTOR_MEMORY 74 | -Xmx$SPARK_EXECUTOR_MEMORY 75 | -cp "$SPARK_CLASSPATH" 76 | org.apache.spark.executor.CoarseGrainedExecutorBackend 77 | --driver-url $SPARK_DRIVER_URL 78 | --executor-id $SPARK_EXECUTOR_ID 79 | --cores $SPARK_EXECUTOR_CORES 80 | --app-id $SPARK_APPLICATION_ID 81 | --hostname $SPARK_EXECUTOR_POD_IP 82 | ) 83 | ;; 84 | 85 | init) 86 | CMD=( 87 | "$SPARK_HOME/bin/spark-class" 88 | "org.apache.spark.deploy.k8s.SparkPodInitContainer" 89 | "$@" 90 | ) 91 | ;; 92 | 93 | *) 94 | echo "Unknown command: $SPARK_K8S_CMD" 1>&2 95 | exit 1 96 | esac 97 | 98 | exec "${CMD[@]}" 99 | -------------------------------------------------------------------------------- /openshift-spark-build/modules/spark/check_for_download: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | echo "checking length of file $1" 3 | if ! [ -s "$1" ]; then 4 | filename=$(basename $1) 5 | version=$(echo $filename | cut -d '-' -f2) 6 | wget https://archive.apache.org/dist/spark/spark-$version/$filename -O $1 7 | fi 8 | -------------------------------------------------------------------------------- /openshift-spark-build/modules/spark/install: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | SCRIPT_DIR=$(dirname $0) 4 | ADDED_DIR=${SCRIPT_DIR}/added 5 | ARTIFACTS_DIR=/tmp/artifacts 6 | 7 | # If there is a zero-length spark tarball, find the verison in the 8 | # name and download from Apache 9 | fullname=$(find $ARTIFACTS_DIR -name spark-[0-9.]*\.tgz) 10 | /bin/sh -x $SCRIPT_DIR/check_for_download $fullname 11 | 12 | # Make a place for spark to go (dupe what's done in common in case we're standalone) 13 | if ! [ -d $SPARK_INSTALL ]; then 14 | mkdir -p $SPARK_INSTALL/conf 15 | ln -sfn $SPARK_INSTALL/distro $SPARK_HOME 16 | fi 17 | 18 | pushd $SPARK_INSTALL 19 | cp $fullname . 20 | tar -zxf $(basename $fullname) 21 | ln -s $(basename $fullname .tgz) distro 22 | rm $(basename $fullname) 23 | popd 24 | 25 | # Add in the configuration files (from the common module) if they don't already exist 26 | mv --no-clobber "$SPARK_INSTALL"/conf/* "$SPARK_HOME"/conf/ 27 | 28 | # Make everything under the spark directory accessible to the group 29 | chown 185:0 $SPARK_INSTALL/distro && chmod g+rwX $SPARK_INSTALL/distro 30 | 31 | # Search for the spark entrypoint file and copy it to $SPARK_INSTALL 32 | entry=$(find $SPARK_HOME/kubernetes -name entrypoint.sh) 33 | if [ -n "$entry" ]; then 34 | cp $entry $SPARK_INSTALL 35 | 36 | # We want to get rid of the tini invocation 37 | sed -i "s@exec .*/tini -s --@exec@" $SPARK_INSTALL/entrypoint.sh 38 | fi 39 | -------------------------------------------------------------------------------- /openshift-spark-build/modules/spark/module.yaml: -------------------------------------------------------------------------------- 1 | artifacts: 2 | - md5: 31e019e35e75a4c55c7efa4464641bf1 3 | name: spark-3.0.1-bin-hadoop3.2.tgz 4 | target: spark-3.0.1-bin-hadoop3.2.tgz 5 | url: https://archive.apache.org/dist/spark/spark-3.0.1/spark-3.0.1-bin-hadoop3.2.tgz 6 | envs: 7 | - name: PATH 8 | value: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin 9 | - name: SPARK_HOME 10 | value: /opt/spark 11 | - name: SPARK_INSTALL 12 | value: /opt/spark-distro 13 | execute: 14 | - directory: spark 15 | module_name: spark 16 | name: spark/install 17 | script: install 18 | labels: [] 19 | modules: 20 | install: [] 21 | repositories: [] 22 | name: spark 23 | osbs: 24 | configuration: {} 25 | repository: {} 26 | packages: 27 | install: 28 | - wget 29 | repositories: [] 30 | ports: [] 31 | version: 1.0 32 | volumes: [] 33 | -------------------------------------------------------------------------------- /openshift-spark-build/spark-3.0.1-bin-hadoop3.2.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radanalyticsio/openshift-spark/f360e822b4c4859e717791cf39cb9682dc242c5f/openshift-spark-build/spark-3.0.1-bin-hadoop3.2.tgz -------------------------------------------------------------------------------- /spark-metrics-template.yaml: -------------------------------------------------------------------------------- 1 | kind: Template 2 | apiVersion: v1 3 | template: spark 4 | metadata: 5 | name: spark 6 | objects: 7 | 8 | - kind: Service 9 | apiVersion: v1 10 | metadata: 11 | name: ${MASTER_NAME}-${SPARK_METRICS_ON} 12 | labels: 13 | name: ${MASTER_NAME} 14 | spec: 15 | ports: 16 | - protocol: TCP 17 | port: 7777 18 | targetPort: 7777 19 | selector: 20 | name: ${MASTER_NAME} 21 | 22 | - kind: Service 23 | apiVersion: v1 24 | metadata: 25 | name: ${MASTER_NAME} 26 | labels: 27 | name: ${MASTER_NAME} 28 | spec: 29 | ports: 30 | - protocol: TCP 31 | port: 7077 32 | targetPort: 7077 33 | selector: 34 | name: ${MASTER_NAME} 35 | 36 | - kind: Service 37 | apiVersion: v1 38 | metadata: 39 | name: ${MASTER_NAME}-webui 40 | labels: 41 | name: ${MASTER_NAME} 42 | spec: 43 | ports: 44 | - protocol: TCP 45 | port: 8080 46 | targetPort: 8080 47 | selector: 48 | name: ${MASTER_NAME} 49 | 50 | - kind: DeploymentConfig 51 | apiVersion: v1 52 | metadata: 53 | name: ${MASTER_NAME} 54 | spec: 55 | strategy: 56 | type: Rolling 57 | triggers: 58 | - type: ConfigChange 59 | replicas: 1 60 | selector: 61 | name: ${MASTER_NAME} 62 | template: 63 | metadata: 64 | labels: 65 | name: ${MASTER_NAME} 66 | spec: 67 | containers: 68 | - name: ${MASTER_NAME} 69 | image: ${SPARK_IMAGE} 70 | env: 71 | - name: SPARK_MASTER_PORT 72 | value: "7077" 73 | - name: SPARK_MASTER_WEBUI_PORT 74 | value: "8080" 75 | - name: SPARK_METRICS_ON 76 | value: ${SPARK_METRICS_ON} 77 | - name: SPARK_USER 78 | value: admin 79 | ports: 80 | - containerPort: 7077 81 | protocol: TCP 82 | - containerPort: 7777 83 | protocol: TCP 84 | - containerPort: 8080 85 | protocol: TCP 86 | 87 | - kind: DeploymentConfig 88 | apiVersion: v1 89 | metadata: 90 | name: ${WORKER_NAME} 91 | spec: 92 | strategy: 93 | type: Rolling 94 | triggers: 95 | - type: ConfigChange 96 | replicas: 3 97 | selector: 98 | name: ${WORKER_NAME} 99 | template: 100 | metadata: 101 | labels: 102 | name: ${WORKER_NAME} 103 | spec: 104 | containers: 105 | - name: ${WORKER_NAME} 106 | image: ${SPARK_IMAGE} 107 | env: 108 | - name: SPARK_METRICS_ON 109 | value: ${SPARK_METRICS_ON} 110 | - name: SPARK_MASTER_ADDRESS 111 | value: spark://${MASTER_NAME}:7077 112 | - name: SPARK_MASTER_UI_ADDRESS 113 | value: http://${MASTER_NAME}-webui:8080 114 | - name: SPARK_USER 115 | value: admin 116 | parameters: 117 | - name: SPARK_IMAGE 118 | description: Name of the Spark master/worker image 119 | value: radanalyticsio/openshift-spark:2.2-latest 120 | - name: MASTER_NAME 121 | description: Master name used as a service name and a selector 122 | generate: expression 123 | from: "spark-master-[a-z0-9]{4}" 124 | required: true 125 | - name: WORKER_NAME 126 | description: Worker name used as a selector 127 | generate: expression 128 | from: "spark-worker-[a-z0-9]{4}" 129 | required: true 130 | - name: SPARK_METRICS_ON 131 | description: Enable metrics services. The default value is "jolokia" (deprecated), consider setting to "prometheus" instead. 132 | value: "jolokia" 133 | required: true 134 | -------------------------------------------------------------------------------- /tag.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function usage() { 4 | echo 5 | echo "Creates a new tag for the current repo based on the spark version specified in image.yaml" 6 | echo "and the latest tag." 7 | echo 8 | echo "Usage: tag.sh" 9 | echo 10 | echo "optional arguments:" 11 | echo 12 | echo " -h Show this message" 13 | } 14 | 15 | while getopts h opt; do 16 | case $opt in 17 | h) 18 | usage 19 | exit 0 20 | ;; 21 | \?) 22 | echo "Invalid option: -$OPTARG" >&2 23 | exit 1 24 | ;; 25 | esac 26 | done 27 | 28 | # Extract the current spark version from the image.yaml file 29 | # Works by parsing the line following "name: sparkversion" 30 | VER=$(sed -n '\@name: sparkversion@!b;n;p' image.yaml | tr -d '[:space:]' | cut -d':' -f2) 31 | 32 | echo Version from image.yaml is $VER 33 | 34 | TAG=$(git describe --abbrev=0 --tags) 35 | 36 | PREFIX=$(echo $TAG | cut -d'-' -f1) 37 | BUILD=$(echo $TAG | cut -d'-' -f2) 38 | 39 | # If we already have tags for Major.Minor version, just increment the build number 40 | # If we don't already have tags for Major.Minor, start with build 1 41 | newbranch=0 42 | if [ "$PREFIX" == "$VER" ]; then 43 | TAG="$PREFIX-$((BUILD+1))" 44 | else 45 | TAG="$VER-1" 46 | newbranch=1 47 | fi 48 | 49 | echo "Adding tag $TAG" 50 | git tag "$TAG" 51 | if [ "$?" -eq 0 ]; then 52 | echo Tag "$TAG" added, don\'t forget to push to upstream 53 | MAJORMINOR=$(echo $VER | cut -d'.' -f1,2) 54 | if [ "$newbranch" == 0 ]; then 55 | echo "Also, don't forget to rebase branch $MAJORMINOR on master if necessary" 56 | else 57 | echo "Also, looks like a new version of spark. Don't forget to create a $MAJORMINOR branch from master" 58 | fi 59 | else 60 | echo Addition of tag "$TAG" failed 61 | fi 62 | -------------------------------------------------------------------------------- /template.yaml: -------------------------------------------------------------------------------- 1 | kind: Template 2 | apiVersion: v1 3 | template: spark 4 | metadata: 5 | name: spark 6 | labels: 7 | app: sparkcluster 8 | objects: 9 | 10 | - kind: Service 11 | apiVersion: v1 12 | metadata: 13 | name: ${MASTER_NAME} 14 | labels: 15 | name: ${MASTER_NAME} 16 | spec: 17 | ports: 18 | - protocol: TCP 19 | port: 7077 20 | targetPort: 7077 21 | selector: 22 | name: ${MASTER_NAME} 23 | 24 | - kind: Service 25 | apiVersion: v1 26 | metadata: 27 | name: ${MASTER_NAME}-webui 28 | labels: 29 | name: ${MASTER_NAME} 30 | spec: 31 | ports: 32 | - protocol: TCP 33 | port: 8080 34 | targetPort: 8080 35 | selector: 36 | name: ${MASTER_NAME} 37 | 38 | - kind: DeploymentConfig 39 | apiVersion: v1 40 | metadata: 41 | name: ${MASTER_NAME} 42 | spec: 43 | strategy: 44 | type: Rolling 45 | triggers: 46 | - type: ConfigChange 47 | replicas: 1 48 | selector: 49 | name: ${MASTER_NAME} 50 | template: 51 | metadata: 52 | labels: 53 | name: ${MASTER_NAME} 54 | spec: 55 | containers: 56 | - name: ${MASTER_NAME} 57 | image: ${SPARK_IMAGE} 58 | env: 59 | - name: SPARK_MASTER_PORT 60 | value: "7077" 61 | - name: SPARK_MASTER_WEBUI_PORT 62 | value: "8080" 63 | ports: 64 | - containerPort: 7077 65 | protocol: TCP 66 | - containerPort: 8080 67 | protocol: TCP 68 | 69 | - kind: DeploymentConfig 70 | apiVersion: v1 71 | metadata: 72 | name: ${WORKER_NAME} 73 | spec: 74 | strategy: 75 | type: Rolling 76 | triggers: 77 | - type: ConfigChange 78 | replicas: 3 79 | selector: 80 | name: ${WORKER_NAME} 81 | template: 82 | metadata: 83 | labels: 84 | name: ${WORKER_NAME} 85 | spec: 86 | containers: 87 | - name: ${WORKER_NAME} 88 | image: ${SPARK_IMAGE} 89 | env: 90 | - name: SPARK_MASTER_ADDRESS 91 | value: spark://${MASTER_NAME}:7077 92 | - name: SPARK_MASTER_UI_ADDRESS 93 | value: http://${MASTER_NAME}-webui:8080 94 | 95 | parameters: 96 | - name: SPARK_IMAGE 97 | description: Name of the Spark master/worker image 98 | value: radanalyticsio/openshift-spark:2.4-latest 99 | - name: MASTER_NAME 100 | description: master name used as a service name and a selector 101 | generate: expression 102 | from: "spark-master-[a-z0-9]{4}" 103 | required: true 104 | - name: WORKER_NAME 105 | description: worker name used as a selector 106 | generate: expression 107 | from: "spark-worker-[a-z0-9]{4}" 108 | required: true 109 | -------------------------------------------------------------------------------- /test/common.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SPARK_TEST_IMAGE=${SPARK_TEST_IMAGE:-} 4 | 5 | SPARK_TEST_LOCAL_IMAGE=${SPARK_TEST_LOCAL_IMAGE:-true} 6 | 7 | # This is all for dealing with registries. External registry requires creds other than the current login 8 | SPARK_TEST_INTEGRATED_REGISTRY=${SPARK_TEST_INTEGRATED_REGISTRY:-} 9 | SPARK_TEST_EXTERNAL_REGISTRY=${SPARK_TEST_EXTERNAL_REGISTRY:-} 10 | SPARK_TEST_EXTERNAL_USER=${SPARK_TEST_EXTERNAL_USER:-} 11 | SPARK_TEST_EXTERNAL_PASSWORD=${SPARK_TEST_EXTERNAL_PASSWORD:-} 12 | 13 | if [ -z "$SPARK_TEST_IMAGE" ]; then 14 | if [ "$SPARK_TEST_LOCAL_IMAGE" == true ]; then 15 | SPARK_TEST_IMAGE=spark-testimage:latest 16 | else 17 | SPARK_TEST_IMAGE=docker.io/radanalyticsio/openshift-spark:latest 18 | fi 19 | fi 20 | 21 | function print_test_env { 22 | echo Using image $SPARK_TEST_IMAGE 23 | 24 | if [ "$SPARK_TEST_LOCAL_IMAGE" != true ]; then 25 | echo SPARK_TEST_LOCAL_IMAGE = $SPARK_TEST_LOCAL_IMAGE, spark image is external, ignoring registry env vars 26 | elif [ -n "$SPARK_TEST_EXTERNAL_REGISTRY" ]; then 27 | echo Using external registry $SPARK_TEST_EXTERNAL_REGISTRY 28 | if [ -z "$SPARK_TEST_EXTERNAL_USER" ]; then 29 | echo "Error: SPARK_TEST_EXTERNAL_USER not set!" 30 | exit 1 31 | else 32 | echo Using external registry user $SPARK_TEST_EXTERNAL_USER 33 | fi 34 | if [ -z "$SPARK_TEST_EXTERNAL_PASSWORD" ]; then 35 | echo "SPARK_TEST_EXTERNAL_PASSWORD not set, assuming current docker login" 36 | else 37 | echo External registry password set 38 | fi 39 | elif [ -n "$SPARK_TEST_INTEGRATED_REGISTRY" ]; then 40 | echo Using integrated registry $SPARK_TEST_INTEGRATED_REGISTRY 41 | else 42 | echo Not using external or integrated registry 43 | fi 44 | } 45 | print_test_env 46 | 47 | function make_image { 48 | # The ip address of an internal/external registry may be set to support running against 49 | # an openshift that is not "oc cluster up" when using images that have been built locally. 50 | # In the case of "oc cluster up", the docker on the host is available from openshift so 51 | # no special pushes of images have to be done. 52 | # In the case of a "normal" openshift cluster, a local image we'll use for build has to be 53 | # available from the designated registry. 54 | # If we're using an image already in an external registry, openshift can pull it from 55 | # there and we don't have to do anything. 56 | local user= 57 | local password= 58 | local pushproj= 59 | local pushimage= 60 | local registry= 61 | if [ "$SPARK_TEST_LOCAL_IMAGE" == true ]; then 62 | if [ -n "$SPARK_TEST_EXTERNAL_REGISTRY" ]; then 63 | user=$SPARK_TEST_EXTERNAL_USER 64 | password=$SPARK_TEST_EXTERNAL_PASSWORD 65 | pushproj=$user 66 | pushimage=scratch-openshift-spark 67 | registry=$SPARK_TEST_EXTERNAL_REGISTRY 68 | elif [ -n "$SPARK_TEST_INTEGRATED_REGISTRY" ]; then 69 | user=$(oc whoami) 70 | password=$(oc whoami -t) 71 | pushproj=$PROJECT 72 | pushimage=oshinko-webui 73 | registry=$SPARK_TEST_INTEGRATED_REGISTRY 74 | fi 75 | fi 76 | if [ -n "$registry" ]; then 77 | set +e 78 | docker login --help | grep email &> /dev/null 79 | res=$? 80 | set -e 81 | if [ -n "$password" ] && [ -n "$user" ]; then 82 | if [ "$res" -eq 0 ]; then 83 | docker login -u ${user} -e jack@jack.com -p ${password} ${registry} 84 | else 85 | docker login -u ${user} -p ${password} ${registry} 86 | fi 87 | fi 88 | docker tag ${SPARK_TEST_IMAGE} ${registry}/${pushproj}/${pushimage} 89 | docker push ${registry}/${pushproj}/${pushimage} 90 | SPARK_IMAGE=${registry}/${pushproj}/${pushimage} 91 | else 92 | SPARK_IMAGE=$SPARK_TEST_IMAGE 93 | fi 94 | } 95 | 96 | function cleanup_app { 97 | oc delete dc --all > /dev/null 98 | oc delete service --all > /dev/null 99 | oc delete route --all > /dev/null 100 | oc delete template --all > /dev/null 101 | oc delete pod --all > /dev/null 102 | os::cmd::try_until_text 'oc get pods' 'No resources found' 103 | } 104 | 105 | function make_configmap { 106 | set +e 107 | oc create configmap test-config --from-file=$RESOURCE_DIR/config 108 | set -e 109 | } 110 | 111 | function poll_binary_build() { 112 | local name 113 | local source 114 | local expect_fail 115 | local from_flag="" 116 | name=$1 117 | if [ "$#" -ge 2 ]; then 118 | source=$2 119 | # We'll pass a tarball directory to test from-archive and the ability 120 | # of the image to detect an unpacked directory. We'll use from-file 121 | # with a directory to test the ability of the image to handle a tarball 122 | if [[ "$source" == *".tgz" ]]; then 123 | from_flag="--from-archive=$source" 124 | else 125 | from_flag="--from-file=$source" 126 | fi 127 | fi 128 | if [ "$#" -eq 3 ]; then 129 | expect_fail=$3 130 | else 131 | expect_fail=false 132 | fi 133 | local tries=0 134 | local status 135 | local BUILDNUM 136 | 137 | echo "oc start-build $name $from_flag" 138 | oc start-build $name $from_flag 139 | 140 | 141 | while true; do 142 | BUILDNUM=$(oc get buildconfig $name --template='{{index .status "lastVersion"}}') 143 | if [ "$BUILDNUM" == "0" ]; then 144 | # Buildconfig is brand new, lastVersion hasn't been updated yet 145 | status="starting" 146 | else 147 | status=$(oc get build "$name"-$BUILDNUM --template="{{index .status \"phase\"}}") 148 | fi 149 | if [ "$status" == "starting" ]; then 150 | echo Build for $name is spinning up, waiting ... 151 | sleep 5 152 | elif [ "$status" != "Complete" -a "$status" != "Failed" -a "$status" != "Error" ]; then 153 | echo Build for $name-$BUILDNUM status is $status, waiting ... 154 | sleep 10 155 | elif [ "$status" == "Failed" -o "$status" == "Error" ]; then 156 | if [ "$expect_fail" == "true" ]; then 157 | return 158 | fi 159 | set +e 160 | oc log buildconfig/$name | grep "Pushing image" 161 | if [ "$?" -eq 0 ]; then 162 | tries=$((tries+1)) 163 | if [ "$tries" -lt 5 ]; then 164 | echo Build failed on push, retrying 165 | sleep 5 166 | oc start-build $name $from_flag 167 | continue 168 | fi 169 | fi 170 | oc log buildconfig/$name | tail -100 171 | set -e 172 | return 1 173 | else 174 | echo Build for $name-$BUILDNUM status is $status, returning 175 | break 176 | fi 177 | done 178 | } 179 | 180 | function get_cluster_pod() { 181 | local count 182 | count=0 183 | 184 | set +e 185 | while true; do 186 | POD=$(oc get pod -l deploymentconfig=$1 --template='{{index .items 0 "metadata" "name"}}') 187 | if [ "$?" -eq 0 ]; then 188 | break 189 | fi 190 | echo Getting cluster pod for $1 failed, trying again 191 | oc get pods 192 | sleep 0.5 193 | count=$((count + 1)) 194 | echo $count 195 | if [ "$count" -eq 120 ]; then 196 | return 1 197 | fi 198 | done 199 | set -e 200 | } 201 | -------------------------------------------------------------------------------- /test/completed/config-changes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | THIS=$(readlink -f `dirname "${BASH_SOURCE[0]}"`) 3 | TOP_DIR=$(echo $THIS | grep -o '.*/openshift-spark') 4 | 5 | source $TOP_DIR/hack/lib/init.sh 6 | trap os::test::junit::reconcile_output EXIT 7 | 8 | source $TOP_DIR/test/common.sh 9 | RESOURCE_DIR=$TOP_DIR/test/resources 10 | 11 | os::test::junit::declare_suite_start "config-changes" 12 | 13 | # Handles registries, etc, and sets SPARK_IMAGE to the right value 14 | make_image 15 | make_configmap 16 | 17 | os::cmd::expect_success 'oc new-app --file=$RESOURCE_DIR/test-template.yaml -p MASTER_NAME=master -p WORKER_NAME=worker -p SPARK_IMAGE="$SPARK_IMAGE"' 18 | 19 | os::cmd::try_until_text 'oc logs dc/master' 'Copying from /etc/config to /opt/spark/conf' 20 | 21 | os::cmd::try_until_text 'oc logs dc/worker' 'Copying from /etc/config to /opt/spark/conf' 22 | 23 | #test deletion 24 | os::cmd::try_until_success 'oc delete dc/worker' 25 | 26 | os::cmd::try_until_success 'oc delete dc/master' 27 | 28 | #check the pods have been deleted using a label 29 | os::cmd::try_until_text 'oc get pods' 'No resources found.' $((30*second)) 30 | 31 | cleanup_app 32 | 33 | os::test::junit::declare_suite_end 34 | -------------------------------------------------------------------------------- /test/completed/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | THIS=$(readlink -f `dirname "${BASH_SOURCE[0]}"`) 3 | TOP_DIR=$(echo $THIS | grep -o '.*/openshift-spark') 4 | 5 | source $TOP_DIR/hack/lib/init.sh 6 | trap os::test::junit::reconcile_output EXIT 7 | 8 | source $TOP_DIR/test/common.sh 9 | RESOURCE_DIR=$TOP_DIR/test/resources 10 | 11 | os::test::junit::declare_suite_start "deploy" 12 | 13 | # Handles registries, etc, and sets SPARK_IMAGE to the right value 14 | make_image 15 | make_configmap 16 | 17 | os::cmd::expect_success 'oc new-app --file=$RESOURCE_DIR/test-template.yaml -p MASTER_NAME=master -p WORKER_NAME=worker -p SPARK_IMAGE="$SPARK_IMAGE"' 18 | 19 | #check pods have been created 20 | os::cmd::try_until_text 'oc get pods' 'worker' 21 | 22 | os::cmd::try_until_text 'oc get pods' 'master' 23 | 24 | # expose the service 25 | os::cmd::expect_success 'oc expose service/master-webui' 26 | 27 | # parse the ip 28 | HOST=$(oc get route | grep master-webui | awk '{print $2;}') 29 | 30 | os::cmd::try_until_text 'curl --silent "$HOST" | grep "Alive Workers" | sed "s,[^0-9],\\ ,g" | tr -d "[:space:]"' "^1$" 31 | 32 | #test deletion 33 | os::cmd::try_until_success 'oc delete dc/worker' 34 | 35 | os::cmd::try_until_success 'oc delete dc/master' 36 | 37 | #check the pods have been deleted using a label 38 | os::cmd::try_until_text 'oc get pods -l deploymentconfig=master' 'No resources found.' $((25*second)) 39 | os::cmd::try_until_text 'oc get pods -l deploymentconfig=worker' 'No resources found.' $((25*second)) 40 | 41 | cleanup_app 42 | 43 | os::test::junit::declare_suite_end 44 | -------------------------------------------------------------------------------- /test/completed/deploy_jolokia.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | THIS=$(readlink -f `dirname "${BASH_SOURCE[0]}"`) 3 | TOP_DIR=$(echo $THIS | grep -o '.*/openshift-spark') 4 | 5 | source $TOP_DIR/hack/lib/init.sh 6 | trap os::test::junit::reconcile_output EXIT 7 | 8 | source $TOP_DIR/test/common.sh 9 | RESOURCE_DIR=$TOP_DIR/test/resources 10 | 11 | os::test::junit::declare_suite_start "deploy_jokokia" 12 | 13 | # Handles registries, etc, and sets SPARK_IMAGE to the right value 14 | make_image 15 | make_configmap 16 | 17 | os::cmd::expect_success 'oc new-app --file=$RESOURCE_DIR/test-spark-metrics-template.yaml -p MASTER_NAME=master -p WORKER_NAME=worker -p SPARK_IMAGE="$SPARK_IMAGE" -p SPARK_METRICS_ON=jolokia' 18 | 19 | # check the master has started the metrics 20 | os::cmd::try_until_text 'oc logs dc/master' 'Starting master with jolokia metrics enabled' 21 | 22 | # expose the service 23 | os::cmd::expect_success 'oc expose service/master-jolokia' 24 | 25 | # parse the ip 26 | HOST=$(oc get route | grep master-jolokia | awk '{print $2;}')/jolokia/ 27 | echo curling jolokia at $HOST 28 | 29 | # check its up 30 | os::cmd::try_until_text 'curl --silent --output /dev/null --write-out %{http_code} "$HOST"' '^200$' $((60*second)) 31 | 32 | cleanup_app 33 | 34 | os::test::junit::declare_suite_end 35 | -------------------------------------------------------------------------------- /test/completed/deploy_prometheus.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | THIS=$(readlink -f `dirname "${BASH_SOURCE[0]}"`) 3 | TOP_DIR=$(echo $THIS | grep -o '.*/openshift-spark') 4 | 5 | source $TOP_DIR/hack/lib/init.sh 6 | trap os::test::junit::reconcile_output EXIT 7 | 8 | source $TOP_DIR/test/common.sh 9 | RESOURCE_DIR=$TOP_DIR/test/resources 10 | 11 | os::test::junit::declare_suite_start "deploy_prom" 12 | 13 | # Handles registries, etc, and sets SPARK_IMAGE to the right value 14 | make_image 15 | make_configmap 16 | 17 | os::cmd::expect_success 'oc new-app --file=$RESOURCE_DIR/test-spark-metrics-template.yaml -p MASTER_NAME=master -p WORKER_NAME=worker -p SPARK_IMAGE="$SPARK_IMAGE" -p SPARK_METRICS_ON=prometheus' 18 | 19 | # check the master has started the metrics 20 | os::cmd::try_until_text 'oc logs dc/master' 'Starting master with prometheus metrics enabled' 21 | 22 | # expose the service 23 | os::cmd::expect_success 'oc expose service/master-prometheus' 24 | 25 | # parse the ip 26 | HOST=$(oc get route | grep master-prometheus | awk '{print $2;}')/metrics 27 | echo curling prometheus at $HOST 28 | 29 | # check its up 30 | os::cmd::try_until_text 'curl --silent --output /dev/null --write-out %{http_code} "$HOST"' '^200$' $((60*second)) 31 | 32 | cleanup_app 33 | 34 | os::test::junit::declare_suite_end 35 | -------------------------------------------------------------------------------- /test/incomplete/app_fail.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | THIS=$(readlink -f `dirname "${BASH_SOURCE[0]}"`) 3 | TOP_DIR=$(echo $THIS | grep -o '.*/openshift-spark') 4 | 5 | source $TOP_DIR/hack/lib/init.sh 6 | trap os::test::junit::reconcile_output EXIT 7 | 8 | source $TOP_DIR/test/common.sh 9 | RESOURCE_DIR=$TOP_DIR/test/resources 10 | 11 | os::test::junit::declare_suite_start "app_fail" 12 | 13 | # Handles registries, etc, and sets SPARK_IMAGE to the right value 14 | make_image 15 | make_configmap 16 | 17 | os::cmd::expect_success 'oc new-app --file=$RESOURCE_DIR/test-template.yaml -p MASTER_NAME=master -p WORKER_NAME=worker -p SPARK_IMAGE="$SPARK_IMAGE"' 18 | 19 | # If a user tries to use the image as a cluster image without completion, the usage script should run 20 | get_cluster_pod master 21 | os::cmd::try_until_text 'oc logs $POD' 'This is an incomplete openshift-spark image' 22 | 23 | get_cluster_pod worker 24 | os::cmd::try_until_text 'oc logs $POD' 'This is an incomplete openshift-spark image' 25 | 26 | cleanup_app 27 | 28 | os::test::junit::declare_suite_end 29 | -------------------------------------------------------------------------------- /test/localcomplete.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script creates completed versions of the incomplete images 4 | # and tags them into the local docker daemon so that the "completed" 5 | # suite of tests can be run on them (the same tests are run on the 6 | # "full" images as well) 7 | 8 | function poll_binary_build() { 9 | local name=$1 10 | local tries=0 11 | local status 12 | local BUILDNUM 13 | 14 | oc start-build $name --from-file=$RESOURCE_DIR/spark-inputs 15 | 16 | while true; do 17 | BUILDNUM=$(oc get buildconfig $name --template='{{index .status "lastVersion"}}') 18 | if [ "$BUILDNUM" == "0" ]; then 19 | # Buildconfig is brand new, lastVersion hasn't been updated yet 20 | status="starting" 21 | else 22 | status=$(oc get build "$name"-$BUILDNUM --template="{{index .status \"phase\"}}") 23 | fi 24 | if [ "$status" == "starting" ]; then 25 | echo Build for $name is spinning up, waiting ... 26 | sleep 5 27 | elif [ "$status" != "Complete" -a "$status" != "Failed" -a "$status" != "Error" ]; then 28 | echo Build for $name-$BUILDNUM status is $status, waiting ... 29 | sleep 10 30 | elif [ "$status" == "Failed" -o "$status" == "Error" ]; then 31 | set +e 32 | oc log buildconfig/$name | grep "Pushing image" 33 | if [ "$?" -eq 0 ]; then 34 | tries=$((tries+1)) 35 | if [ "$tries" -lt 5 ]; then 36 | echo Build failed on push, retrying 37 | sleep 5 38 | oc start-build $name --from-file=$RESOURCE_DIR/spark-inputs 39 | continue 40 | fi 41 | fi 42 | oc log buildconfig/$name | tail -100 43 | set -e 44 | return 1 45 | else 46 | echo Build for $name-$BUILDNUM status is $status, returning 47 | break 48 | fi 49 | done 50 | } 51 | 52 | RESOURCE_DIR=$(readlink -f `dirname "${BASH_SOURCE[0]}"` | grep -o '.*/openshift-spark/test')/resources 53 | 54 | oc new-build --name=$2 --docker-image=$1 --binary 55 | 56 | poll_binary_build $2 57 | 58 | id=$(docker images | grep $2 | head -n1 | awk '{print $3}') 59 | echo docker tag "$id" "$2":latest 60 | docker tag "$id" $2:latest 61 | -------------------------------------------------------------------------------- /test/prepare.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copies oc binary out of official openshift origin image 4 | # Note: this expects the OPENSHIFT_VERSION env variable to be set. 5 | function download_openshift() { 6 | echo "Downloading oc binary for OPENSHIFT_VERSION=${OPENSHIFT_VERSION}" 7 | sudo docker cp $(docker create docker.io/openshift/origin:$OPENSHIFT_VERSION):/bin/oc /usr/local/bin/oc 8 | oc version 9 | } 10 | 11 | function setup_insecure_registry() { 12 | # add insecure-registry and restart docker 13 | sudo cat /etc/default/docker 14 | sudo service docker stop 15 | sudo sed -i -e 's/sock/sock --insecure-registry 172.30.0.0\/16/' /etc/default/docker 16 | sudo cat /etc/default/docker 17 | sudo service docker start 18 | sudo service docker status 19 | } 20 | 21 | function start_and_verify_openshift() { 22 | # Sometimes oc cluster up fails with a permission error and works when the test is relaunched. 23 | # See if a retry within the same test works 24 | set +e 25 | built=false 26 | while true; do 27 | oc cluster up --base-dir=/home/travis/gopath/src/github.com/radanalyticsio/origin 28 | if [ "$?" -eq 0 ]; then 29 | ./travis-check-pods.sh 30 | if [ "$?" -eq 0 ]; then 31 | built=true 32 | break 33 | fi 34 | fi 35 | echo "Retrying oc cluster up after failure" 36 | oc cluster down 37 | sleep 5 38 | done 39 | set -e 40 | if [ "$built" == false ]; then 41 | exit 1 42 | fi 43 | # travis-check-pods.sh left us in the default project 44 | oc project myproject 45 | } 46 | 47 | setup_insecure_registry 48 | download_openshift 49 | start_and_verify_openshift 50 | -------------------------------------------------------------------------------- /test/resources/config/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Set everything to be logged to the console 19 | log4j.rootCategory=DEBUG, console 20 | log4j.appender.console=org.apache.log4j.ConsoleAppender 21 | log4j.appender.console.target=System.err 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 24 | 25 | # Set the default spark-shell log level to WARN. When running the spark-shell, the 26 | # log level for this class is used to overwrite the root logger's log level, so that 27 | # the user can have different defaults for the shell and regular Spark apps. 28 | log4j.logger.org.apache.spark.repl.Main=DEBUG 29 | 30 | # Settings to quiet third party logs that are too verbose 31 | log4j.logger.org.spark_project.jetty=DEBUG 32 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=DEBUG 33 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=DEBUG 34 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=DEBUG 35 | log4j.logger.org.apache.parquet=DEBUG 36 | log4j.logger.parquet=DEBUG 37 | 38 | # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support 39 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 40 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 41 | -------------------------------------------------------------------------------- /test/resources/config/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Default system properties included when running spark-submit. 19 | # This is useful for setting default environmental settings. 20 | 21 | # Example: 22 | # spark.master spark://master:7077 23 | # spark.eventLog.enabled true 24 | # spark.eventLog.dir hdfs://namenode:8021/directory 25 | # spark.serializer org.apache.spark.serializer.KryoSerializer 26 | # spark.driver.memory 5g 27 | # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" 28 | spark.ui.reverseProxy false 29 | spark.ui.reverseProxyUrl / 30 | -------------------------------------------------------------------------------- /test/resources/test-configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: configMap 3 | metadata: 4 | name: test-config 5 | namespace: default 6 | data: 7 | -------------------------------------------------------------------------------- /test/resources/test-spark-metrics-template.yaml: -------------------------------------------------------------------------------- 1 | kind: Template 2 | apiVersion: v1 3 | template: spark 4 | metadata: 5 | name: spark 6 | objects: 7 | 8 | - kind: Service 9 | apiVersion: v1 10 | metadata: 11 | name: ${MASTER_NAME}-${SPARK_METRICS_ON} 12 | labels: 13 | name: ${MASTER_NAME} 14 | spec: 15 | ports: 16 | - protocol: TCP 17 | port: 7777 18 | targetPort: 7777 19 | selector: 20 | name: ${MASTER_NAME} 21 | 22 | - kind: Service 23 | apiVersion: v1 24 | metadata: 25 | name: ${MASTER_NAME} 26 | labels: 27 | name: ${MASTER_NAME} 28 | spec: 29 | ports: 30 | - protocol: TCP 31 | port: 7077 32 | targetPort: 7077 33 | selector: 34 | name: ${MASTER_NAME} 35 | 36 | - kind: Service 37 | apiVersion: v1 38 | metadata: 39 | name: ${MASTER_NAME}-webui 40 | labels: 41 | name: ${MASTER_NAME} 42 | spec: 43 | ports: 44 | - protocol: TCP 45 | port: 8080 46 | targetPort: 8080 47 | selector: 48 | name: ${MASTER_NAME} 49 | 50 | - kind: DeploymentConfig 51 | apiVersion: v1 52 | metadata: 53 | name: ${MASTER_NAME} 54 | spec: 55 | strategy: 56 | type: Rolling 57 | triggers: 58 | - type: ConfigChange 59 | replicas: 1 60 | selector: 61 | name: ${MASTER_NAME} 62 | template: 63 | metadata: 64 | labels: 65 | name: ${MASTER_NAME} 66 | spec: 67 | containers: 68 | - name: ${MASTER_NAME} 69 | image: ${SPARK_IMAGE} 70 | imagePullPolicy: IfNotPresent 71 | env: 72 | - name: SPARK_MASTER_PORT 73 | value: "7077" 74 | - name: SPARK_MASTER_WEBUI_PORT 75 | value: "8080" 76 | - name: SPARK_METRICS_ON 77 | value: ${SPARK_METRICS_ON} 78 | - name: SPARK_USER 79 | value: admin 80 | ports: 81 | - containerPort: 7077 82 | protocol: TCP 83 | - containerPort: 7777 84 | protocol: TCP 85 | - containerPort: 8080 86 | protocol: TCP 87 | 88 | - kind: DeploymentConfig 89 | apiVersion: v1 90 | metadata: 91 | name: ${WORKER_NAME} 92 | spec: 93 | strategy: 94 | type: Rolling 95 | triggers: 96 | - type: ConfigChange 97 | replicas: 1 98 | selector: 99 | name: ${WORKER_NAME} 100 | template: 101 | metadata: 102 | labels: 103 | name: ${WORKER_NAME} 104 | spec: 105 | containers: 106 | - name: ${WORKER_NAME} 107 | image: ${SPARK_IMAGE} 108 | imagePullPolicy: IfNotPresent 109 | env: 110 | - name: SPARK_METRICS_ON 111 | value: ${SPARK_METRICS_ON} 112 | - name: SPARK_MASTER_ADDRESS 113 | value: spark://${MASTER_NAME}:7077 114 | - name: SPARK_MASTER_UI_ADDRESS 115 | value: http://${MASTER_NAME}-webui:8080 116 | - name: SPARK_USER 117 | value: admin 118 | parameters: 119 | - name: SPARK_IMAGE 120 | description: Name of the Spark master/worker image 121 | value: radanalyticsio/openshift-spark:2.2-latest 122 | - name: MASTER_NAME 123 | description: Master name used as a service name and a selector 124 | generate: expression 125 | from: "spark-master-[a-z0-9]{4}" 126 | required: true 127 | - name: WORKER_NAME 128 | description: Worker name used as a selector 129 | generate: expression 130 | from: "spark-worker-[a-z0-9]{4}" 131 | required: true 132 | - name: SPARK_METRICS_ON 133 | description: Enable metrics services. The default value is "jolokia" (deprecated), consider setting to "prometheus" instead. 134 | value: "jolokia" 135 | required: true 136 | -------------------------------------------------------------------------------- /test/resources/test-template.yaml: -------------------------------------------------------------------------------- 1 | kind: Template 2 | apiVersion: v1 3 | template: spark 4 | metadata: 5 | name: spark 6 | labels: 7 | app: sparkcluster 8 | objects: 9 | 10 | - kind: Service 11 | apiVersion: v1 12 | metadata: 13 | name: ${MASTER_NAME} 14 | labels: 15 | name: ${MASTER_NAME} 16 | spec: 17 | ports: 18 | - protocol: TCP 19 | port: 7077 20 | targetPort: 7077 21 | selector: 22 | name: ${MASTER_NAME} 23 | - kind: Service 24 | apiVersion: v1 25 | metadata: 26 | name: ${MASTER_NAME}-webui 27 | labels: 28 | name: ${MASTER_NAME} 29 | spec: 30 | ports: 31 | - protocol: TCP 32 | port: 8080 33 | targetPort: 8080 34 | selector: 35 | name: ${MASTER_NAME} 36 | 37 | - kind: DeploymentConfig 38 | apiVersion: v1 39 | metadata: 40 | name: ${MASTER_NAME} 41 | spec: 42 | strategy: 43 | type: Rolling 44 | triggers: 45 | - type: ConfigChange 46 | replicas: 1 47 | selector: 48 | name: ${MASTER_NAME} 49 | template: 50 | metadata: 51 | labels: 52 | name: ${MASTER_NAME} 53 | spec: 54 | containers: 55 | - name: ${MASTER_NAME} 56 | image: ${SPARK_IMAGE} 57 | imagePullPolicy: IfNotPresent 58 | env: 59 | - name: SPARK_MASTER_PORT 60 | value: "7077" 61 | - name: SPARK_MASTER_WEBUI_PORT 62 | value: "8080" 63 | - name: UPDATE_SPARK_CONF_DIR 64 | value: /etc/config 65 | ports: 66 | - containerPort: 7077 67 | protocol: TCP 68 | - containerPort: 8080 69 | protocol: TCP 70 | volumeMounts: 71 | - name: config-volume 72 | mountPath: /etc/config 73 | volumes: 74 | - name: config-volume 75 | configMap: 76 | name: test-config 77 | 78 | - kind: DeploymentConfig 79 | apiVersion: v1 80 | metadata: 81 | name: ${WORKER_NAME} 82 | spec: 83 | strategy: 84 | type: Rolling 85 | triggers: 86 | - type: ConfigChange 87 | replicas: 1 88 | selector: 89 | name: ${WORKER_NAME} 90 | template: 91 | metadata: 92 | labels: 93 | name: ${WORKER_NAME} 94 | spec: 95 | containers: 96 | - name: ${WORKER_NAME} 97 | image: ${SPARK_IMAGE} 98 | imagePullPolicy: IfNotPresent 99 | env: 100 | - name: SPARK_MASTER_ADDRESS 101 | value: spark://${MASTER_NAME}:7077 102 | - name: SPARK_MASTER_UI_ADDRESS 103 | value: http://${MASTER_NAME}-webui:8080 104 | - name: UPDATE_SPARK_CONF_DIR 105 | value: /etc/config 106 | volumeMounts: 107 | - name: config-volume 108 | mountPath: /etc/config 109 | volumes: 110 | - name: config-volume 111 | configMap: 112 | name: test-config 113 | parameters: 114 | - name: SPARK_IMAGE 115 | description: Name of the Spark master/worker image 116 | value: radanalyticsio/openshift-spark:2.2-latest 117 | - name: MASTER_NAME 118 | description: master name used as a service name and a selector 119 | generate: expression 120 | from: "spark-master-[a-z0-9]{4}" 121 | required: true 122 | - name: WORKER_NAME 123 | description: worker name used as a selector 124 | generate: expression 125 | from: "spark-worker-[a-z0-9]{4}" 126 | required: true 127 | -------------------------------------------------------------------------------- /test/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | STARTTIME=$(date +%s) 3 | 4 | # Sourcing common will source hack/lib/init.sh 5 | source "$(dirname "${BASH_SOURCE}")/../hack/lib/init.sh" 6 | 7 | os::util::environment::setup_time_vars 8 | 9 | function cleanup() 10 | { 11 | out=$? 12 | set +e 13 | 14 | pkill -P $$ 15 | kill_all_processes 16 | 17 | os::test::junit::reconcile_output 18 | 19 | ENDTIME=$(date +%s); echo "$0 took $(($ENDTIME - $STARTTIME)) seconds" 20 | os::log::info "Exiting with ${out}" 21 | exit $out 22 | } 23 | 24 | trap "exit" INT TERM 25 | trap "cleanup" EXIT 26 | 27 | function find_tests() { 28 | local test_regex="${2}" 29 | local full_test_list=() 30 | local selected_tests=() 31 | 32 | full_test_list=($(find "${1}" -maxdepth 1 -name '*.sh')) 33 | if [ "${#full_test_list[@]}" -eq 0 ]; then 34 | return 0 35 | fi 36 | for test in "${full_test_list[@]}"; do 37 | test_rel_path=${test#${test::1}*openshift-spark/test/cmd} 38 | if grep -q -E "${test_regex}" <<< "${test_rel_path}"; then 39 | selected_tests+=( "${test}" ) 40 | fi 41 | done 42 | 43 | if [ "${#selected_tests[@]}" -eq 0 ]; then 44 | os::log::info "No tests were selected by regex in "${1} 45 | return 1 46 | else 47 | echo "${selected_tests[@]}" 48 | fi 49 | } 50 | 51 | orig_project=$(oc project -q) 52 | failed_list="" 53 | failed=false 54 | 55 | dirs=($(find "${OS_ROOT}/test/" -mindepth 1 -type d -not -path "./resources*")) 56 | for dir in "${dirs[@]}"; do 57 | 58 | failed_dir=false 59 | 60 | # Get the list of test files in the current directory 61 | set +e 62 | output=$(find_tests $dir ${1:-.*}) 63 | res=$? 64 | set -e 65 | if [ "$res" -ne 0 ]; then 66 | echo $output 67 | continue 68 | fi 69 | 70 | # Turn the list of tests into an array and check the length, skip if zero 71 | tests=($(echo "$output")) 72 | if [ "${#tests[@]}" -eq 0 ]; then 73 | continue 74 | fi 75 | 76 | # Create the project here 77 | name=$(basename ${dir} .sh) 78 | set +e # For some reason the result here from head is not 0 even though we get the desired result 79 | namespace=${name}-$(date -Ins | md5sum | tr -dc 'a-z0-9' | fold -w 6 | head -n 1) 80 | set -e 81 | oc new-project $namespace &> /dev/null 82 | oc create sa oshinko &> /dev/null 83 | oc policy add-role-to-user admin system:serviceaccount:$namespace:oshinko &> /dev/null 84 | echo "++++++ ${dir}" 85 | echo Using project $namespace 86 | 87 | for test in "${tests[@]}"; do 88 | echo 89 | echo "++++ ${test}" 90 | if ! ${test}; then 91 | echo "failed: ${test}" 92 | failed=true 93 | failed_dir=true 94 | failed_list=$failed_list'\n\t'$test 95 | fi 96 | done 97 | if [ "$failed_dir" == true -a ${S2I_SAVE_FAIL:-false} == true ]; then 98 | echo Leaving project $namespace because of failures 99 | else 100 | oc delete project $namespace 101 | fi 102 | done 103 | 104 | oc project $orig_project 105 | if [ "$failed" == true ]; then 106 | echo "One or more tests failed:" 107 | echo -e $failed_list'\n' 108 | exit 1 109 | fi 110 | -------------------------------------------------------------------------------- /test/sparkinputs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -x 3 | TOP_DIR=$(readlink -f `dirname "${BASH_SOURCE[0]}"` | grep -o '.*/openshift-spark/') 4 | BUILD_DIR=$TOP_DIR/openshift-spark-build 5 | 6 | # See what spark version the image build used 7 | fullname=$(find $BUILD_DIR -name "spark-[0-9.]*\.tgz") 8 | 9 | # Download the same version to use as a binary build input 10 | filename=$(basename $fullname) 11 | version=$(echo $filename | cut -d '-' -f2) 12 | mkdir -p $TOP_DIR/test/resources/spark-inputs 13 | pushd $TOP_DIR/test/resources/spark-inputs 14 | if ! [ -f "spark-$version-bin-hadoop2.7.tgz" ]; then 15 | wget https://archive.apache.org/dist/spark/spark-$version/spark-$version-bin-hadoop2.7.tgz 16 | fi 17 | if ! [ -f "spark-$version-bin-hadoop2.7.tgz.sha512" ]; then 18 | wget https://archive.apache.org/dist/spark/spark-$version/spark-$version-bin-hadoop2.7.tgz.sha512 19 | fi 20 | echo "spark-$version-bin-hadoop2.7.tgz: FF FF FF FF FF FF CA FE BE EF CA FE BE EF CA FE" > spark-$version-bin-hadoop2.7.tgz.bad 21 | popd 22 | 23 | # Make a fake tarball that is missing spark-submit 24 | mkdir -p $TOP_DIR/test/resources/spark-inputs-no-submit 25 | pushd $TOP_DIR/test/resources/spark-inputs-no-submit 26 | mkdir spark-$version-bin-hadoop2.7 27 | touch spark-$version-bin-hadoop2.7/foo 28 | tar -czf spark-$version-bin-hadoop2.7.tgz spark-$version-bin-hadoop2.7 29 | rm -rf spark-$version-bin-hadoop2.7 30 | popd 31 | 32 | # Make a fake tarball with a spark-submit that returns an error 33 | mkdir -p $TOP_DIR/test/resources/spark-inputs-bad-submit 34 | pushd $TOP_DIR/test/resources/spark-inputs-bad-submit 35 | mkdir -p spark-$version-bin-hadoop2.7/bin 36 | echo "#!/bin/bash" > spark-$version-bin-hadoop2.7/bin/spark-submit 37 | echo "exit 1" >> spark-$version-bin-hadoop2.7/bin/spark-submit 38 | chmod +x spark-$version-bin-hadoop2.7/bin/spark-submit 39 | tar -czf spark-$version-bin-hadoop2.7.tgz spark-$version-bin-hadoop2.7 40 | rm -rf spark-$version-bin-hadoop2.7 41 | popd 42 | 43 | # Make a fake tarball with a spark-submit that returns success 44 | # Also include some config files so we can test copy-if-not-overwrite 45 | mkdir -p $TOP_DIR/test/resources/spark-inputs-with-conf 46 | pushd $TOP_DIR/test/resources/spark-inputs-with-conf 47 | mkdir -p spark-$version-bin-hadoop2.7/bin 48 | echo "#!/bin/bash" > spark-$version-bin-hadoop2.7/bin/spark-submit 49 | echo "exit 0" >> spark-$version-bin-hadoop2.7/bin/spark-submit 50 | chmod +x spark-$version-bin-hadoop2.7/bin/spark-submit 51 | mkdir -p spark-$version-bin-hadoop2.7/conf 52 | touch spark-$version-bin-hadoop2.7/conf/spark-defaults.conf 53 | touch spark-$version-bin-hadoop2.7/conf/log4j.properties 54 | tar -czf spark-$version-bin-hadoop2.7.tgz spark-$version-bin-hadoop2.7 55 | rm -rf spark-$version-bin-hadoop2.7 56 | popd 57 | -------------------------------------------------------------------------------- /travis-check-pods.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | oc login -u system:admin 4 | oc project default 5 | 6 | while true; do 7 | V=$(oc get dc docker-registry --template='{{index .status "latestVersion"}}') 8 | P=$(oc get pod docker-registry-$V-deploy --template='{{index .status "phase"}}') 9 | if [ "$?" -eq 0 ]; then 10 | echo phase is $P for docker-registry deploy $V 11 | if [ "$P" == "Failed" ]; then 12 | echo "registry deploy failed, try again" 13 | oc get pods 14 | oc rollout retry dc/docker-registry 15 | sleep 10 16 | continue 17 | fi 18 | fi 19 | REG=$(oc get pod -l deploymentconfig=docker-registry --template='{{index .items 0 "status" "phase"}}') 20 | if [ "$?" -eq 0 ]; then 21 | break 22 | fi 23 | oc get pods 24 | echo "Waiting for registry pod" 25 | sleep 10 26 | done 27 | 28 | while true; do 29 | REG=$(oc get pod -l deploymentconfig=docker-registry --template='{{index .items 0 "status" "phase"}}') 30 | if [ "$?" -ne 0 -o "$REG" == "Error" ]; then 31 | echo "Registy pod is in error state..." 32 | exit 1 33 | fi 34 | if [ "$REG" == "Running" ]; then 35 | break 36 | fi 37 | sleep 5 38 | done 39 | 40 | while true; do 41 | V=$(oc get dc router --template='{{index .status "latestVersion"}}') 42 | P=$(oc get pod router-$V-deploy --template='{{index .status "phase"}}') 43 | if [ "$?" -eq 0 ]; then 44 | echo phase is $P for router deploy $V 45 | if [ "$P" == "Failed" ]; then 46 | echo "router deploy failed, try again" 47 | oc get pods 48 | oc rollout retry dc/router 49 | sleep 10 50 | continue 51 | fi 52 | fi 53 | REG=$(oc get pod -l deploymentconfig=router --template='{{index .items 0 "status" "phase"}}') 54 | if [ "$?" -eq 0 ]; then 55 | break 56 | fi 57 | oc get pods 58 | echo "Waiting for router pod" 59 | sleep 10 60 | done 61 | 62 | 63 | while true; do 64 | REG=$(oc get pod -l deploymentconfig=router --template='{{index .items 0 "status" "phase"}}') 65 | if [ "$?" -ne 0 -o "$REG" == "Error" ]; then 66 | echo "Router pod is in error state..." 67 | exit 1 68 | fi 69 | if [ "$REG" == "Running" ]; then 70 | break 71 | fi 72 | sleep 5 73 | done 74 | echo "Registry and router pods are okay" 75 | --------------------------------------------------------------------------------