├── .gitignore
├── .travis.before.install.sh
├── .travis.release.images.sh
├── .travis.yml
├── Jenkinsfile
├── Makefile
├── Makefile.inc
├── README.md
├── change-yaml.sh
├── docs
    ├── functional-testing.md
    └── spark-version-update-process.md
├── hack
    ├── common.sh
    ├── compress.awk
    ├── lib
    │   ├── build
    │   │   ├── constants.sh
    │   │   ├── environment.sh
    │   │   └── rpm.sh
    │   ├── cleanup.sh
    │   ├── cmd.sh
    │   ├── init.sh
    │   ├── log
    │   │   ├── output.sh
    │   │   ├── stacktrace.sh
    │   │   └── system.sh
    │   ├── start.sh
    │   ├── test
    │   │   └── junit.sh
    │   └── util
    │   │   ├── docs.sh
    │   │   ├── ensure.sh
    │   │   ├── environment.sh
    │   │   ├── find.sh
    │   │   ├── golang.sh
    │   │   ├── misc.sh
    │   │   ├── text.sh
    │   │   └── trap.sh
    ├── test-cmd.sh
    ├── test-util.sh
    └── util.sh
├── image-inc.yaml
├── image.yaml
├── make-build-dir.sh
├── modules
    ├── common
    │   ├── added
    │   │   ├── conf
    │   │   │   ├── agent-config.yaml
    │   │   │   ├── agent.properties
    │   │   │   ├── log4j.properties
    │   │   │   ├── metrics.properties
    │   │   │   └── spark-defaults.conf
    │   │   └── scripts
    │   │   │   ├── entrypoint
    │   │   │   └── launch.sh
    │   ├── install
    │   └── module.yaml
    ├── metrics
    │   ├── added
    │   │   ├── agent-bond.jar
    │   │   └── jolokia-jvm-1.3.6-agent.jar
    │   ├── install
    │   └── module.yaml
    ├── s2i
    │   ├── added
    │   │   ├── assemble
    │   │   ├── s2i-env-vars
    │   │   └── usage
    │   ├── install
    │   └── module.yaml
    └── spark
    │   ├── added
    │       └── spark-entrypoint.sh
    │   ├── check_for_download
    │   ├── install
    │   └── module.yaml
├── openshift-spark-build-inc
    ├── Dockerfile
    └── modules
    │   ├── common
    │       ├── added
    │       │   ├── conf
    │       │   │   ├── agent-config.yaml
    │       │   │   ├── agent.properties
    │       │   │   ├── log4j.properties
    │       │   │   ├── metrics.properties
    │       │   │   └── spark-defaults.conf
    │       │   └── scripts
    │       │   │   ├── entrypoint
    │       │   │   └── launch.sh
    │       ├── install
    │       └── module.yaml
    │   ├── metrics
    │       ├── added
    │       │   ├── agent-bond.jar
    │       │   └── jolokia-jvm-1.3.6-agent.jar
    │       ├── install
    │       └── module.yaml
    │   └── s2i
    │       ├── added
    │           ├── assemble
    │           ├── s2i-env-vars
    │           └── usage
    │       ├── install
    │       └── module.yaml
├── openshift-spark-build
    ├── Dockerfile
    ├── modules
    │   ├── common
    │   │   ├── added
    │   │   │   ├── conf
    │   │   │   │   ├── agent-config.yaml
    │   │   │   │   ├── agent.properties
    │   │   │   │   ├── log4j.properties
    │   │   │   │   ├── metrics.properties
    │   │   │   │   └── spark-defaults.conf
    │   │   │   └── scripts
    │   │   │   │   ├── entrypoint
    │   │   │   │   └── launch.sh
    │   │   ├── install
    │   │   └── module.yaml
    │   ├── metrics
    │   │   ├── added
    │   │   │   ├── agent-bond.jar
    │   │   │   └── jolokia-jvm-1.3.6-agent.jar
    │   │   ├── install
    │   │   └── module.yaml
    │   ├── s2i
    │   │   ├── added
    │   │   │   ├── assemble
    │   │   │   ├── s2i-env-vars
    │   │   │   └── usage
    │   │   ├── install
    │   │   └── module.yaml
    │   └── spark
    │   │   ├── added
    │   │       └── spark-entrypoint.sh
    │   │   ├── check_for_download
    │   │   ├── install
    │   │   └── module.yaml
    └── spark-3.0.1-bin-hadoop3.2.tgz
├── spark-metrics-template.yaml
├── tag.sh
├── template.yaml
├── test
    ├── common.sh
    ├── completed
    │   ├── config-changes.sh
    │   ├── deploy.sh
    │   ├── deploy_jolokia.sh
    │   └── deploy_prometheus.sh
    ├── incomplete
    │   ├── app_fail.sh
    │   └── install_spark.sh
    ├── localcomplete.sh
    ├── prepare.sh
    ├── resources
    │   ├── config
    │   │   ├── log4j.properties
    │   │   └── spark-defaults.conf
    │   ├── test-configmap.yaml
    │   ├── test-spark-metrics-template.yaml
    │   └── test-template.yaml
    ├── run.sh
    └── sparkinputs.sh
└── travis-check-pods.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | template.active
2 | target/
3 | 
4 | # Intellij
5 | .idea/
6 | *.iml
7 | *.iws
8 | 


--------------------------------------------------------------------------------
/.travis.before.install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -xe
 4 | 
 5 | main() {
 6 |   if [[ "${TRAVIS_JOB_NAME}" != "Push container images" ]] || \
 7 |      [[ "${TRAVIS_BRANCH}" = "master" && "${TRAVIS_PULL_REQUEST}" = "false" ]] || \
 8 |      [[ "${TRAVIS_TAG}" =~ ^[0-9]+\.[0-9]+\.[0-9]+-[0-9]+$ ]]; then
 9 |     pwd
10 |     bash --version
11 |     sudo apt-get install --only-upgrade bash
12 |     bash --version
13 |     ./test/prepare.sh
14 |   else
15 |     echo "[Before install] Not doing the ''./test/prepare.sh', because the tag '${TRAVIS_TAG}' is not of form x.y.z-n or we are not building the master branch"
16 |   fi
17 | }
18 | 
19 | main
20 | 


--------------------------------------------------------------------------------
/.travis.release.images.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 |   
  3 | set -xe
  4 | 
  5 | OWNER="${OWNER:-radanalyticsio}"
  6 | IMAGES="${IMAGES:-
  7 |   openshift-spark
  8 |   openshift-spark-inc
  9 | }"
 10 | 
 11 | main() {
 12 |   if [[ "$TRAVIS_BRANCH" = "master" && "$TRAVIS_PULL_REQUEST" = "false" ]]; then
 13 |     echo "Squashing and pushing the :latest images to docker.io and quay.io"
 14 |     buildImages
 15 |     installDockerSquash
 16 |     loginDockerIo
 17 |     pushLatestImages "docker.io"
 18 |     loginQuayIo
 19 |     pushLatestImages "quay.io"
 20 |   elif [[ "${TRAVIS_TAG}" =~ ^[0-9]+\.[0-9]+\.[0-9]+-[0-9]+$ ]]; then
 21 |     echo "Squashing and pushing the '${TRAVIS_TAG}' images to docker.io and quay.io"
 22 |     buildImages
 23 |     installDockerSquash
 24 |     loginDockerIo
 25 |     pushReleaseImages "docker.io"
 26 |     loginQuayIo
 27 |     pushReleaseImages "quay.io"
 28 |   else
 29 |     echo "Not doing the docker push, because the tag '${TRAVIS_TAG}' is not of form x.y.z-n or we are not building the master branch"
 30 |   fi
 31 | }
 32 | 
 33 | buildImages() {
 34 |   BUILDER=docker make build
 35 |   BUILDER=docker make -f Makefile.inc build
 36 | }
 37 | 
 38 | loginDockerIo() {
 39 |   set +x
 40 |   docker login -u "$DOCKER_USERNAME" -p "$DOCKER_PASSWORD"
 41 |   set -x
 42 | }
 43 | 
 44 | loginQuayIo() {
 45 |   set +x
 46 |   docker login -u "$QUAY_USERNAME" -p "$QUAY_PASSWORD" quay.io
 47 |   set -x
 48 | }
 49 | 
 50 | installDockerSquash() {
 51 |   command -v docker-squash || pip install --user docker-squash
 52 | }
 53 | 
 54 | squashAndPush() {
 55 |   if [[ $# != 2 ]]; then
 56 |     echo "Usage: squashAndPush input_image output_image" && exit
 57 |   fi
 58 |   set +e
 59 |   local _in=$1
 60 |   local _out=$2
 61 | 
 62 |   local _layers_total=$(docker history -q $_in | wc -l)
 63 |   local _layers_to_keep=4
 64 | 
 65 |   if [[ ! "$_layers_total" =~ ^[0-9]+$ ]] || [[ "$_layers_total" -le "$_layers_to_keep" ]] ; then
 66 |     echo "error: _layers_total ('$_layers_total') is not a number or lower than or equal to $_layers_to_keep" >&2; return
 67 |   fi
 68 |   local _last_n=$[_layers_total - _layers_to_keep]
 69 | 
 70 |   echo "Squashing $_out (last $_last_n layers).."
 71 |   docker-squash -f $_last_n -t $_out $_in
 72 |   docker push $_out
 73 |   set -e
 74 | }
 75 | 
 76 | pushLatestImages() {
 77 |   if [[ $# != 1 ]]; then
 78 |     echo "Usage: pushLatestImages image_repo" && exit
 79 |   fi
 80 |   REPO="$1"
 81 | 
 82 |   for image in $IMAGES ; do
 83 |     squashAndPush $image "${REPO}/${OWNER}/${image}:latest"
 84 |   done
 85 | }
 86 | 
 87 | pushReleaseImages() {
 88 |   if [[ $# != 1 ]]; then
 89 |     echo "Usage: pushReleaseImages image_repo" && exit
 90 |   fi
 91 |   REPO="$1"
 92 | 
 93 |   for image in $IMAGES ; do
 94 |     local _fully_qualified_image="${REPO}/${OWNER}/${image}:${TRAVIS_TAG}"
 95 |     echo "Squashing $_fully_qualified_image.."
 96 | 
 97 |     squashAndPush $image $_fully_qualified_image
 98 | 
 99 |     # tag and push "x.y" image which acts as a "latest" for all  major.minor.Z versions
100 |     local _x_y_latest=`echo ${TRAVIS_TAG} | sed -r 's;([[:digit:]]+\.[[:digit:]]+).*;\1;'`
101 |     docker tag $_fully_qualified_image ${REPO}/${OWNER}/${image}:${_x_y_latest}
102 |     docker push ${REPO}/${OWNER}/${image}:${_x_y_latest}
103 | 
104 |     # tag and push also :latest image
105 |     docker tag $_fully_qualified_image ${REPO}/${OWNER}/${image}:latest
106 |     docker push ${REPO}/${OWNER}/${image}:latest
107 |   done
108 | 
109 |   docker logout
110 | }
111 | 
112 | main
113 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: required
 2 | dist: trusty
 3 | language: go
 4 | ## home folder is /home/travis/gopath/src/github.com/radanalyticsio/oshinko-cli
 5 | services:
 6 | - docker
 7 | stages:
 8 |   - Openshift tests
 9 |   - deploy
10 | 
11 | before_install:
12 | - ./.travis.before.install.sh
13 | 
14 | env:
15 |   global: OPENSHIFT_VERSION="v3.10"
16 | 
17 | jobs:
18 |   include:
19 |   - stage: Openshift tests
20 |     name: openshift-spark
21 |     script: BUILDER=docker make test-e2e
22 | 
23 |   - name: openshift-spark-inc
24 |     script: BUILDER=docker make -f Makefile.inc test-e2e
25 | 
26 |   - name: openshift-spark-comp
27 |     script: BUILDER=docker make -f Makefile.inc test-e2e-completed
28 | 
29 |   - stage: deploy
30 |     name: "Push container images"
31 |     script: ./.travis.release.images.sh
32 | 
33 | notifications:
34 |  email:
35 |    on_success: never
36 |    on_failure: never
37 | 


--------------------------------------------------------------------------------
/Jenkinsfile:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env groovy
 2 | 
 3 | // Used Jenkins plugins:
 4 | // * Pipeline GitHub Notify Step Plugin
 5 | // * Disable GitHub Multibranch Status Plugin
 6 | //
 7 | // $OCP_HOSTNAME -- hostname of running Openshift cluster
 8 | // $OCP_USER     -- Openshift user
 9 | // $OCP_PASSWORD -- Openshift user's password
10 | 
11 | node('radanalytics-test') {
12 | 	withEnv(["SPARK_TEST_EXTERNAL_REGISTRY=$EXTERNAL_DOCKER_REGISTRY", "SPARK_TEST_EXTERNAL_USER=$EXTERNAL_DOCKER_REGISTRY_USER", "SPARK_TEST_EXTERNAL_PASSWORD=$EXTERNAL_DOCKER_REGISTRY_PASSWORD", "KUBECONFIG=$WORKSPACE/client/kubeconfig", "PATH+OC_PATH=$WORKSPACE/client"]) {
13 | 
14 | 		// generate build url
15 | 		def buildUrl = sh(script: 'curl https://url.corp.redhat.com/new?$BUILD_URL', returnStdout: true)
16 | 
17 | 		stage('Test') {
18 | 
19 | 			try {
20 | 				githubNotify(context: 'jenkins-ci/openshift-spark', description: 'This change is being tested', status: 'PENDING', targetUrl: buildUrl)
21 | 			} catch (err) {
22 | 				echo("Wasn't able to notify Github: ${err}")
23 | 			}
24 | 
25 | 			try {
26 | 				// wipeout workspace
27 | 				deleteDir()
28 | 
29 | 				dir('openshift-spark') {
30 | 					checkout scm
31 | 				}
32 | 
33 | 				// download oc client
34 | 				dir('client') {
35 | 					sh('curl -LO https://github.com/openshift/origin/releases/download/v3.7.0/openshift-origin-client-tools-v3.7.0-7ed6862-linux-64bit.tar.gz')
36 | 					sh('curl -LO https://github.com/openshift/origin/releases/download/v3.7.0/openshift-origin-server-v3.7.0-7ed6862-linux-64bit.tar.gz')
37 | 					sh('tar -xzf openshift-origin-client-tools-v3.7.0-7ed6862-linux-64bit.tar.gz')
38 | 					sh('tar -xzf openshift-origin-server-v3.7.0-7ed6862-linux-64bit.tar.gz')
39 | 					sh('cp openshift-origin-client-tools-v3.7.0-7ed6862-linux-64bit/oc .')
40 | 					sh('cp openshift-origin-server-v3.7.0-7ed6862-linux-64bit/* .')
41 | 				}
42 | 
43 | 				// login to openshift instance
44 | 				sh('oc login https://$OCP_HOSTNAME:8443 -u $OCP_USER -p $OCP_PASSWORD --insecure-skip-tls-verify=true')
45 | 				// let's start on a specific project, to prevent start on a random project which could be deleted in the meantime
46 | 				sh('oc project testsuite')
47 | 
48 | 				// test
49 | 				dir('openshift-spark') {
50 | 					sh('make test-e2e | tee -a test.log && exit ${PIPESTATUS[0]}')
51 | 				}
52 | 			} catch (err) {
53 | 				try {
54 | 					githubNotify(context: 'jenkins-ci/openshift-spark', description: 'There are test failures', status: 'ERROR', targetUrl: buildUrl)
55 | 				} catch (errNotify) {
56 | 					echo("Wasn't able to notify Github: ${errNotify}")
57 | 				}
58 | 				throw err
59 | 			} finally {
60 | 				dir('openshift-spark') {
61 | 					archiveArtifacts(allowEmptyArchive: true, artifacts: 'test.log')
62 | 				}
63 | 			}
64 | 
65 | 			try {
66 | 				githubNotify(context: 'jenkins-ci/openshift-spark', description: 'This change looks good', status: 'SUCCESS', targetUrl: buildUrl)
67 | 			} catch (err) {
68 | 				echo("Wasn't able to notify Github: ${err}")
69 | 			}
70 | 		}
71 | 	}
72 | }
73 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | LOCAL_IMAGE ?= openshift-spark
 2 | SPARK_IMAGE=radanalyticsio/openshift-spark
 3 | DOCKERFILE_CONTEXT=openshift-spark-build
 4 | BUILDER ?= podman
 5 | 
 6 | # If you're pushing to an integrated registry
 7 | # in Openshift, SPARK_IMAGE will look something like this
 8 | 
 9 | # SPARK_IMAGE=172.30.242.71:5000/myproject/openshift-spark
10 | 
11 | OPENSHIFT_SPARK_TEST_IMAGE ?= spark-testimage
12 | export OPENSHIFT_SPARK_TEST_IMAGE
13 | 
14 | .PHONY: build clean push create destroy test-e2e clean-target clean-context zero-tarballs
15 | 
16 | build: $(DOCKERFILE_CONTEXT)
17 | 	$(BUILDER) build -t $(LOCAL_IMAGE) $(DOCKERFILE_CONTEXT)
18 | 
19 | clean: clean-context
20 | 	-$(BUILDER) rmi $(LOCAL_IMAGE)
21 | 
22 | push: build
23 | 	$(BUILDER) tag $(LOCAL_IMAGE) $(SPARK_IMAGE)
24 | 	$(BUILDER) push $(SPARK_IMAGE)
25 | 
26 | create: push template.yaml
27 | 	oc process -f template.yaml -v SPARK_IMAGE=$(SPARK_IMAGE) > template.active
28 | 	oc create -f template.active
29 | 
30 | destroy: template.active
31 | 	oc delete -f template.active
32 | 	rm template.active
33 | 
34 | clean-context:
35 | 	-rm -rf $(DOCKERFILE_CONTEXT)/*
36 | 
37 | clean-target:
38 | 	-rm -rf target
39 | 
40 | context: $(DOCKERFILE_CONTEXT)
41 | 
42 | $(DOCKERFILE_CONTEXT): $(DOCKERFILE_CONTEXT)/Dockerfile \
43 | 	                   $(DOCKERFILE_CONTEXT)/modules
44 | 
45 | $(DOCKERFILE_CONTEXT)/Dockerfile $(DOCKERFILE_CONTEXT)/modules:
46 | 	cekit --descriptor image.yaml build --dry-run $(BUILDER)
47 | 	cp -R target/image/* $(DOCKERFILE_CONTEXT)
48 | 
49 | zero-tarballs:
50 | 	find ./$(DOCKERFILE_CONTEXT) -name "*.tgz" -type f -exec truncate -s 0 {} \;
51 | 	find ./$(DOCKERFILE_CONTEXT) -name "*.tar.gz" -type f -exec truncate -s 0 {} \;
52 | 
53 | test-e2e:
54 | 	LOCAL_IMAGE=$(OPENSHIFT_SPARK_TEST_IMAGE) make build
55 | 	test/run.sh completed/
56 | 


--------------------------------------------------------------------------------
/Makefile.inc:
--------------------------------------------------------------------------------
 1 | LOCAL_IMAGE ?= openshift-spark-inc
 2 | SPARK_IMAGE=radanalyticsio/openshift-spark-inc
 3 | BUILDER ?= podman
 4 | 
 5 | DOCKERFILE_CONTEXT=openshift-spark-build-inc
 6 | 
 7 | SPARK_TEST_IMAGE ?= spark-testimage-inc
 8 | 
 9 | export SPARK_TEST_IMAGE
10 | 
11 | .PHONY: build clean push create destroy test-e2e test-e2e-completed
12 | 
13 | build: $(DOCKERFILE_CONTEXT)
14 | 	$(BUILDER) build -t $(LOCAL_IMAGE) $(DOCKERFILE_CONTEXT)
15 | 
16 | push: build
17 | 	$(BUILDER) tag $(LOCAL_IMAGE) $(SPARK_IMAGE)
18 | 	$(BUILDER) push $(SPARK_IMAGE)
19 | 
20 | clean: clean-context
21 | 	-$(BUILDER) rmi $(LOCAL_IMAGE)
22 | 
23 | clean-target:
24 | 	-rm -rf target
25 | 
26 | clean-context:
27 | 	-rm -rf $(DOCKERFILE_CONTEXT)/*
28 | 
29 | context: $(DOCKERFILE_CONTEXT)
30 | 
31 | $(DOCKERFILE_CONTEXT): $(DOCKERFILE_CONTEXT)/Dockerfile $(DOCKERFILE_CONTEXT)/modules
32 | 
33 | $(DOCKERFILE_CONTEXT)/Dockerfile $(DOCKERFILE_CONTEXT)/modules:
34 | 	-mkdir -p $(DOCKERFILE_CONTEXT)
35 | 	cekit --descriptor image-inc.yaml build --dry-run $(BUILDER)
36 | 	cp -R target/image/* $(DOCKERFILE_CONTEXT)
37 | 	-rm $(DOCKERFILE_CONTEXT)/spark*.tgz
38 | 
39 | zero-tarballs:
40 | 	find ./$(DOCKERFILE_CONTEXT) -name "*.tgz" -type f -exec truncate -s 0 {} \;
41 | 	find ./$(DOCKERFILE_CONTEXT) -name "*.tar.gz" -type f -exec truncate -s 0 {} \;
42 | 
43 | test-e2e:
44 | 	test/sparkinputs.sh
45 | 	LOCAL_IMAGE=$(SPARK_TEST_IMAGE) make -f Makefile.inc build
46 | 	SPARK_TEST_IMAGE=$(SPARK_TEST_IMAGE) test/run.sh incomplete/
47 | 
48 | test-e2e-completed:
49 | 	test/sparkinputs.sh
50 | 	LOCAL_IMAGE=$(SPARK_TEST_IMAGE) make -f Makefile.inc build
51 | 	test/localcomplete.sh $(SPARK_TEST_IMAGE) spark-complete
52 | 	SPARK_TEST_IMAGE=spark-complete test/run.sh completed/
53 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Build status](https://travis-ci.org/radanalyticsio/openshift-spark.svg?branch=master)](https://travis-ci.org/radanalyticsio/openshift-spark)
  2 | [![Docker build](https://img.shields.io/docker/automated/radanalyticsio/openshift-spark.svg)](https://hub.docker.com/r/radanalyticsio/openshift-spark)
  3 | [![Layers info](https://images.microbadger.com/badges/image/radanalyticsio/openshift-spark.svg)](https://microbadger.com/images/radanalyticsio/openshift-spark)
  4 | 
  5 | # Apache Spark images for OpenShift
  6 | 
  7 | This repository contains several files for building
  8 | [Apache Spark](https://spark.apache.org) focused container images, targeted
  9 | for usage on [OpenShift Origin](https://openshift.org).
 10 | 
 11 | By default, it will build the following images into your local Docker
 12 | registry:
 13 | 
 14 | * `openshift-spark`, Apache Spark, Python 3.6
 15 | 
 16 | For Spark versions, please see the `image.yaml` file.
 17 | 
 18 | # Instructions
 19 | 
 20 | ## Build
 21 | 
 22 | ### Prerequisites
 23 | 
 24 | * `cekit` version 3.7.0 from the [cekit project](https://github.com/cekit/cekit)
 25 | 
 26 | ### Procedure
 27 | 
 28 | Create all images and save them in the local Docker registry.
 29 | 
 30 |     make
 31 | 
 32 | ## Push
 33 | 
 34 | Tag and push the images to the designated reference.
 35 | 
 36 |     make push SPARK_IMAGE=[REGISTRY_HOST[:REGISTRY_PORT]/]NAME[:TAG]
 37 | 
 38 | ## Customization
 39 | 
 40 | There are several ways to customize the construction and build process. This
 41 | project uses the [GNU Make tool](https://www.gnu.org/software/make/) for
 42 | the build workflow, see the `Makefile` for more information. For container
 43 | specification and construction, the
 44 | [Container Evolution Kit `cekit`](https://github.com/cekit/cekit) is
 45 | used as the primary point of investigation, see the `image.yaml` file for
 46 | more information.
 47 | 
 48 | # Partial images without an Apache Spark distribution installed
 49 | 
 50 | This repository also supports building 'incomplete' versions of
 51 | the images which contain tooling for OpenShift but lack an actual
 52 | Spark distribution. An s2i workflow can be used with these partial
 53 | images to install a Spark distribution of a user's choosing.
 54 | This gives users an alternative to checking out the repository
 55 | and modifying build files if they want to run a custom
 56 | Spark distribution. By default, the partial images built will be
 57 | 
 58 | * `openshift-spark-inc`, Apache Spark, Python 3.6
 59 | 
 60 | ## Build
 61 | 
 62 | To build the partial images, use make with Makefile.inc
 63 | 
 64 |     make -f Makefile.inc
 65 | 
 66 | ## Push
 67 | 
 68 | Tag and push the images to the designated reference.
 69 | 
 70 |     make -f Makefile.inc push SPARK_IMAGE=[REGISTRY_HOST[:REGISTRY_PORT]/]NAME[:TAG]
 71 | 
 72 | ## Image Completion
 73 | 
 74 | To produce a final image, a source-to-image build must be performed which takes
 75 | a Spark distribution as input. This can be done in OpenShift or locally using
 76 | the [s2i tool](https://github.com/openshift/source-to-image) if it's installed.
 77 | The final images created can be used just like the `openshfit-spark` image
 78 | described above.
 79 | 
 80 | ### Build inputs
 81 | 
 82 | The OpenShift method can take either local files or a URL as build input.
 83 | For the s2i method, local files are required. Here is an example which
 84 | downloads an Apache Spark distribution to a local 'build-input' directory
 85 | (including the sha512 file is optional).
 86 | 
 87 |     $ mkdir build-input
 88 |     $ wget https://archive.apache.org/dist/spark/spark-3.0.0/spark-3.0.0-bin-hadoop3.2.tgz -O build-input/spark-3.0.0-bin-hadoop3.2.tgz
 89 |     $ wget https://archive.apache.org/dist/spark/spark-3.0.0/spark-3.0.0-bin-hadoop3.2.tgz.sha512 -O build-input/spark-3.0.0-bin-hadoop3.2.tgz.sha512
 90 | 
 91 | Optionally, your `build-input` directory may contain a `modify-spark` directory. The structure of this directory should be parallel to the structure
 92 | of the top-level directory in the Spark distribution tarball. During the installation, the contents of this directory will be copied to the Spark
 93 | installation using `rsync`, allowing you to add or overwrite files. To add `my.jar` to Spark, for example, put it in  `build-input/modify-spark/jars/my.jar`
 94 | 
 95 | ### Running the image completion
 96 | 
 97 | To complete the image using the [s2i tool](https://github.com/openshift/source-to-image)
 98 | 
 99 |     $ s2i build build-input radanalyticsio/openshift-spark-inc openshift-spark
100 | 
101 | To complete the image using OpenShift, for example:
102 | 
103 |     $ oc new-build --name=openshift-spark --docker-image=radanalyticsio/openshift-spark-inc --binary
104 |     $ oc start-build openshift-spark --from-file=https://archive.apache.org/dist/spark/spark-3.0.0/spark-3.0.0-bin-hadoop3.2.tgz
105 | 
106 |     Note that the value of `--from-file` could also be the `build-input` directory from the s2i example above.
107 | 
108 | This will write the completed image to an imagestream called `openshift-spark` in the current project
109 | 
110 | # A 'usage' command for all images
111 | 
112 | Note that all of the images described here will respond to a 'usage' command for reference. For example
113 | 
114 |     $ docker run --rm openshift-spark:latest usage
115 | 


--------------------------------------------------------------------------------
/change-yaml.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | function usage() {
  4 |     echo
  5 | 
  6 |     echo "Changes the image.*.yaml file and adds it to the current commit (git add)"
  7 |     echo
  8 |     echo "Usage: change-yaml.sh [options] SPARK_VERSION"
  9 |     echo
 10 |     echo "required arguments"
 11 |     echo
 12 |     echo "  SPARK_VERSION      The spark version number, like 3.0.0"
 13 |     echo
 14 |     echo "optional arguments:"
 15 |     echo
 16 |     echo "  -h                  Show this message"
 17 | }
 18 | 
 19 | # Set the hadoop version
 20 | HVER=3.2
 21 | 
 22 | while getopts h opt; do
 23 |     case $opt in
 24 |         h)
 25 |             usage
 26 |             exit 0
 27 |             ;;
 28 |         \?)
 29 |             echo "Invalid option: -$OPTARG" >&2
 30 |             exit 1
 31 |             ;;
 32 |     esac
 33 | done
 34 | 
 35 | shift "$((OPTIND-1))"
 36 | 
 37 | if [ "$#" -lt 1 ]; then
 38 |     echo No spark version specified
 39 |     usage
 40 |     exit 1
 41 | fi
 42 | 
 43 | SPARK=$1
 44 | 
 45 | # Extract the current spark version from the image.yaml file
 46 | # Works by parsing the line following "name: sparkversion"
 47 | VER=$(sed -n '\@name: sparkversion@!b;n;p' image.yaml  | tr -d '[:space:]' | cut -d':' -f2)
 48 | if [ "$VER" == "$SPARK" ]; then
 49 |     echo "Nothing to do, spark version in image.yaml is already $SPARK"
 50 |     exit 0
 51 | fi
 52 | 
 53 | # Change spark distro and download urls
 54 | if [ ! -z ${SPARK+x} ]; then
 55 | 
 56 |     # TODO remove this download when sha512 support lands in upstream cekit (elmiko)
 57 |     if [ -f "/tmp/spark-${SPARK}-bin-hadoop${HVER}.tgz" ]; then
 58 |         echo
 59 |         echo Using existing "/tmp/spark-${SPARK}-bin-hadoop${HVER}.tgz", if this is not what you want delete it and run again
 60 |         echo
 61 |     else
 62 |         wget https://archive.apache.org/dist/spark/spark-${SPARK}/spark-${SPARK}-bin-hadoop${HVER}.tgz -O /tmp/spark-${SPARK}-bin-hadoop${HVER}.tgz
 63 |         if [ "$?" -ne 0 ]; then
 64 |             echo "Failed to download the specified version Spark archive"
 65 |             exit 1
 66 |         fi
 67 |     fi
 68 | 
 69 |     wget https://archive.apache.org/dist/spark/spark-${SPARK}/spark-${SPARK}-bin-hadoop${HVER}.tgz.sha512 -O /tmp/spark-${SPARK}-bin-hadoop${HVER}.tgz.sha512
 70 |     if [ "$?" -ne 0 ]; then
 71 |         echo "Failed to download the sha512 sum for the specified Spark version"
 72 |         exit 1
 73 |     fi
 74 | 
 75 |     # TODO remove this checksum calculation when sha512 support lands in upstream cekit (elmiko)
 76 |     calcsum=$(sha512sum /tmp/spark-${SPARK}-bin-hadoop${HVER}.tgz | cut -d" "  -f1)
 77 |     sum=$(cat  /tmp/spark-${SPARK}-bin-hadoop${HVER}.tgz.sha512 | tr -d [:space:] | cut -d: -f2 | tr [:upper:] [:lower:])
 78 |     if [ "$calcsum" != "$sum" ]; then
 79 |         echo "Failed to confirm authenticity of Spark archive, checksum mismatch"
 80 |         echo "sha512sum   : ${calcsum}"
 81 |         echo ".sha512 file: ${sum}"
 82 |         exit 1
 83 |     fi
 84 | 
 85 |     # Fix the url references
 86 |     sed -i "s@https://archive\.apache\.org/dist/spark/spark-.*/spark-.*-bin-hadoop.*\.tgz@https://archive\.apache\.org/dist/spark/spark-${SPARK}/spark-${SPARK}-bin-hadoop${HVER}\.tgz@" modules/spark/module.yaml
 87 | 
 88 |     # TODO replace this with sha512 when it lands in upstream cekit (elmiko)
 89 |     # Fix the md5 sum references on the line following the url
 90 |     calcsum=$(md5sum /tmp/spark-${SPARK}-bin-hadoop${HVER}.tgz | cut -d" " -f1)
 91 |     sed -i '\@url: https://archive.apache.org/dist/spark/@!b;n;s/md5.*/md5: '$calcsum'/' modules/spark/module.yaml
 92 | 
 93 |     # Fix the spark version label
 94 |     sed -i '\@name: sparkversion@!b;n;s/value.*/value: '$SPARK'/' image.yaml
 95 | 
 96 |     # Fix the image version value (do this for incomplete as well)
 97 |     V=$(echo $SPARK | cut -d'.' -f1,2)
 98 |     sed -i 's@^version:.*-latest$@version: '$V'-latest@' image*.yaml
 99 | fi
100 | 
101 | git add image.yaml
102 | 


--------------------------------------------------------------------------------
/docs/functional-testing.md:
--------------------------------------------------------------------------------
 1 | # Functional testing
 2 | 
 3 | This repository contains a set of end-to-end functional tests. These tests
 4 | will create the images, deploy them, and run a few basic connectivity and
 5 | application suites.
 6 | 
 7 | These tests will run automatically on all proposed changes to the project
 8 | repository, but it is often useful to run them locally to diagnose changes or
 9 | hunt for bugs. Although the tests are automated, running them locally requires
10 | a very specific setup. These instructions will guide you through the process.
11 | 
12 | ## Prerequisites
13 | 
14 | * Access to an OpenShift cluster available. You will need to have basic access
15 |   to a cluster with the ability to create new projects and objects within
16 |   those projects. We recommend using a local deployment methodology for these
17 |   tests, you can find more information about deploying OpenShift in
18 |   [this upstream documentation](https://docs.okd.io/latest/getting_started/administrators.html).
19 | * Access to the `docker` tooling on the OpenShift cluster instance. The test
20 |   tooling will create and push the images to a local container registry using
21 |   `docker`. The test suite will need to build and push images, ensure that
22 |   you have this access.
23 | * GNU `make` available. The tests are run through the `Makefile`, you will
24 |   need this command to start the entire process.
25 | * Go language tooling available. As the tests will attempt to build certain
26 |   Go specific applications, you will need to have the Go tooling installed on
27 |   the machine where the tests will run.
28 | 
29 | ## Procedure
30 | 
31 | 1. Download the source code. You will need to clone this repository onto the
32 |    host where the tests will run.
33 | 1. Login to OpenShift and create a new project. The test scripts will attempt
34 |    to determine your project namespace, occasionally it is possible to have a
35 |    login with no associated project. To avoid errors, create a project with
36 |    any name or switch to a previously used project, the test suite will create
37 |    a new project for its work.
38 | 1. Start the tests. Change directory to the root of the repository clone and
39 |    run the make command, this will start the tests and you will see the output
40 |    in your terminal. This command will run all the tests:
41 |    ```
42 |    make test-e2e
43 |    ```
44 | 
45 | ## Additional resources
46 | 
47 | * [Makefile](/Makefile). This is where all the action starts, see the entry
48 |   for the `test-e2e` target.
49 | * [test/run.sh](/test/run.sh). This script file is the primary entrypoint for
50 |   all the test suites, you should examine this file to understand how the
51 |   tests are structured and executed.
52 | 


--------------------------------------------------------------------------------
/docs/spark-version-update-process.md:
--------------------------------------------------------------------------------
 1 | # Updating the base image Spark version
 2 | 
 3 | This document describes the general workflow for updating the Apache Spark
 4 | version present in the base image. This guide follows the process for
 5 | installing the default binary archives as distributed by the
 6 | [Spark project](https://spark.apache.org).
 7 | 
 8 | ## prerequisites
 9 | 
10 | * shell access
11 | * an editor available
12 | * access to the `docker` command line tool and a registry (for testing)
13 | * [cekit](https://cekit.readthedocs.io/en/latest/) available
14 | 
15 | ## procedure
16 | 
17 | ### update the version numbers
18 | 
19 | 1. update version and download link in `image.yaml`
20 | 1. update version in `image-inc.yaml` (to keep consistent versioning)
21 | 
22 | There is a script name `change-yaml.sh` that will automate this process,
23 | invoke it by type the script name followed by the desired version. For
24 | example, if you were creating an update for version `3.0.0` of Spark, you
25 | would type the following:
26 | 
27 | ```
28 | ./change-yaml.sh 3.0.0
29 | ```
30 | 
31 | ### rebuild generated files
32 | 
33 | 1. remove the generated cekit files for the previous version.
34 |    ```
35 |    make clean-context
36 |    make -f Makefile.inc clean-context
37 |    ```
38 | 1. generate the new cekit files. these will be the artifacts for image
39 |    creation.
40 |    ```
41 |    make context
42 |    make -f Makefile.inc context
43 |    ```
44 | 1. zero the archive files. as these files are currently checked in to the
45 |    repository it is important to zero out the archive files. they will be
46 |    re-downloaded during the image constructions phase.
47 |    ```
48 |    make zero-tarballs
49 |    ```
50 | 
51 | This process is also captured in a script file named `make-build-dir.sh`, it
52 | automates the process of cleaning and then regenerating the cekit files
53 | and Spark binaries. The script requires no parameters and it will attempt to
54 | add the updated files to the current git staging process.
55 | 
56 | At this point the files are ready for testing. You can create new images from
57 | the files available in the directory. You will want to check these files in
58 | to your working branch before testing.
59 | 
60 | ## Build and test the images
61 | 
62 | Build the images with the following command:
63 | 
64 | ```
65 | make build
66 | make -f Makefile.inc build
67 | ```
68 | 
69 | This will run an image build against the generated cekit files and store
70 | the image in the registry associated with your docker installation
71 | (usually localhost).
72 | 
73 | The images are now ready for testing.
74 | 


--------------------------------------------------------------------------------
/hack/compress.awk:
--------------------------------------------------------------------------------
 1 | # Helper functions
 2 | function trim(s) {
 3 | 	gsub(/^[ \t\r\n]+|[ \t\r\n]+$/, "", s);
 4 | 	return s;
 5 | }
 6 | 
 7 | function printRecordAndCount(record, count) {
 8 | 	print record;
 9 | 	if (count > 1) {
10 | 		printf("... repeated %d times\n", count)
11 | 	}
12 | }
13 | 
14 | BEGIN {
15 | 	# Before processing, set the record separator to the ASCII record separator character \x1e
16 | 	RS = "\x1e";
17 | }
18 | 
19 | # This action is executed for each record
20 | {
21 | 	# Build our current var from the trimmed record
22 | 	current = trim($0);
23 | 
24 | 	# Bump the count of times we have seen it
25 | 	seen[current]++;
26 | 
27 | 	# Print the previous record and its count (if it is not identical to the current record)
28 | 	if (previous && previous != current) {
29 | 		printRecordAndCount(previous, seen[previous]);
30 | 	}
31 | 
32 | 	# Store the current record as the previous record
33 | 	previous = current;
34 | }
35 | 
36 | END {
37 | 	# After processing, print the last record and count if it is non-empty
38 | 	if (previous) {
39 | 		printRecordAndCount(previous, seen[previous]);
40 | 	}
41 | }


--------------------------------------------------------------------------------
/hack/lib/build/constants.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script provides constants for the Golang binary build process
 4 | 
 5 | readonly OS_BUILD_ENV_GOLANG="${OS_BUILD_ENV_GOLANG:-1.7}"
 6 | readonly OS_BUILD_ENV_IMAGE="${OS_BUILD_ENV_IMAGE:-openshift/origin-release:golang-${OS_BUILD_ENV_GOLANG}}"
 7 | 
 8 | readonly OS_OUTPUT_SUBPATH="${OS_OUTPUT_SUBPATH:-_output/local}"
 9 | readonly OS_OUTPUT="${OS_ROOT}/${OS_OUTPUT_SUBPATH}"
10 | readonly OS_LOCAL_RELEASEPATH="${OS_OUTPUT}/releases"
11 | readonly OS_OUTPUT_BINPATH="${OS_OUTPUT}/bin"
12 | readonly OS_OUTPUT_PKGDIR="${OS_OUTPUT}/pkgdir"
13 | 
14 | readonly OS_GO_PACKAGE=github.com/openshift/origin
15 | 
16 | readonly OS_SDN_COMPILE_TARGETS_LINUX=(
17 |   pkg/sdn/plugin/sdn-cni-plugin
18 |   vendor/github.com/containernetworking/cni/plugins/ipam/host-local
19 |   vendor/github.com/containernetworking/cni/plugins/main/loopback
20 | )
21 | readonly OS_IMAGE_COMPILE_TARGETS_LINUX=(
22 |   images/pod
23 |   cmd/dockerregistry
24 |   cmd/gitserver
25 |   "${OS_SDN_COMPILE_TARGETS_LINUX[@]}"
26 | )
27 | readonly OS_SCRATCH_IMAGE_COMPILE_TARGETS_LINUX=(
28 |   examples/hello-openshift
29 |   examples/deployment
30 | )
31 | readonly OS_IMAGE_COMPILE_BINARIES=("${OS_SCRATCH_IMAGE_COMPILE_TARGETS_LINUX[@]##*/}" "${OS_IMAGE_COMPILE_TARGETS_LINUX[@]##*/}")
32 | 
33 | readonly OS_CROSS_COMPILE_TARGETS=(
34 |   cmd/openshift
35 |   cmd/oc
36 | )
37 | readonly OS_CROSS_COMPILE_BINARIES=("${OS_CROSS_COMPILE_TARGETS[@]##*/}")
38 | 
39 | readonly OS_TEST_TARGETS=(
40 |   test/extended/extended.test
41 | )
42 | 
43 | #If you update this list, be sure to get the images/origin/Dockerfile
44 | readonly OPENSHIFT_BINARY_SYMLINKS=(
45 |   openshift-router
46 |   openshift-deploy
47 |   openshift-recycle
48 |   openshift-sti-build
49 |   openshift-docker-build
50 |   origin
51 |   osc
52 |   oadm
53 |   osadm
54 |   kubectl
55 |   kubernetes
56 |   kubelet
57 |   kube-proxy
58 |   kube-apiserver
59 |   kube-controller-manager
60 |   kube-scheduler
61 | )
62 | readonly OPENSHIFT_BINARY_COPY=(
63 |   oadm
64 |   kubelet
65 |   kube-proxy
66 |   kube-apiserver
67 |   kube-controller-manager
68 |   kube-scheduler
69 | )
70 | readonly OC_BINARY_COPY=(
71 |   kubectl
72 | )
73 | readonly OS_BINARY_RELEASE_CLIENT_WINDOWS=(
74 |   oc.exe
75 |   README.md
76 |   ./LICENSE
77 | )
78 | readonly OS_BINARY_RELEASE_CLIENT_MAC=(
79 |   oc
80 |   README.md
81 |   ./LICENSE
82 | )
83 | readonly OS_BINARY_RELEASE_CLIENT_LINUX=(
84 |   ./oc
85 |   ./README.md
86 |   ./LICENSE
87 | )
88 | readonly OS_BINARY_RELEASE_SERVER_LINUX=(
89 |   './*'
90 | )
91 | readonly OS_BINARY_RELEASE_CLIENT_EXTRA=(
92 |   ${OS_ROOT}/README.md
93 |   ${OS_ROOT}/LICENSE
94 | )


--------------------------------------------------------------------------------
/hack/lib/build/rpm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This library holds utilities for building RPMs from Origin.
 4 | 
 5 | # os::build::rpm::generate_nevra_vars determines the NEVRA of the RPMs
 6 | # that would be built from the current git state.
 7 | #
 8 | # Globals:
 9 | #  - OS_GIT_VERSION
10 | # Arguments:
11 | #  - None
12 | # Exports:
13 | #  - OS_RPM_NAME
14 | #  - OS_RPM_VERSION
15 | #  - OS_RPM_RELEASE
16 | #  - OS_RPM_ARCHITECTURE
17 | function os::build::rpm::get_nvra_vars() {
18 | 	# the package name can be overwritten but is normally 'origin'
19 | 	OS_RPM_NAME="${OS_RPM_NAME:-"origin"}"
20 | 	OS_RPM_ARCHITECTURE="$(uname -i)"
21 | 
22 | 	# we can extract the pacakge version from the build version
23 | 	os::build::get_version_vars
24 | 	if [[ "${OS_GIT_VERSION}" =~ ^v([0-9](\.[0-9]+)*)(.*) ]]; then
25 | 		OS_RPM_VERSION="${BASH_REMATCH[1]}"
26 | 		metadata="${BASH_REMATCH[3]}"
27 | 	else
28 | 		os::log::fatal "Malformed \$OS_GIT_VERSION: ${OS_GIT_VERSION}"
29 | 	fi
30 | 
31 | 	# we can generate the package release from the git version metadata
32 | 	# OS_GIT_VERSION will always have metadata, but either contain
33 | 	# pre-release information _and_ build metadata, or only the latter
34 | 	# ex.
35 | 	#    -alpha.0+shasums-123-dirty
36 | 	#    -alpha.0+shasums-123
37 | 	#    +shasums-123-dirty
38 | 	#    +shasums-123
39 | 	if [[ "${metadata:0:1}" == "+" ]]; then
40 | 		# we only have build metadata, but need to massage it so
41 | 		# we can generate a valid RPM release from it
42 | 		if [[ "${metadata}" =~ ^\+([a-z0-9]{7})-([0-9]+)(-dirty)?$ ]]; then
43 | 			build_sha="${BASH_REMATCH[1]}"
44 | 			build_num="${BASH_REMATCH[2]}"
45 | 		else
46 | 			os::log::fatal "Malformed git version metadata: ${metadata}"
47 | 		fi
48 | 		OS_RPM_RELEASE="1.${build_num}.${build_sha}"
49 | 	elif [[ "${metadata:0:1}" == "-" ]]; then
50 | 		# we have both build metadata and pre-release info
51 | 		if [[ "${metadata}" =~ ^-([^\+]+)\+([a-z0-9]{7})-([0-9]+)(-dirty)?$ ]]; then
52 | 			pre_release="${BASH_REMATCH[1]}"
53 | 			build_sha="${BASH_REMATCH[2]}"
54 | 			build_num="${BASH_REMATCH[3]}"
55 | 		else
56 | 			os::log::fatal "Malformed git version metadata: ${metadata}"
57 | 		fi
58 | 		OS_RPM_RELEASE="0.${pre_release}.${build_num}.${build_sha}"
59 | 	else
60 | 		os::log::fatal "Malformed git version metadata: ${metadata}"
61 | 	fi
62 | 
63 | 	export OS_RPM_NAME OS_RPM_VERSION OS_RPM_RELEASE OS_RPM_ARCHITECTURE
64 | }
65 | 
66 | 
67 | # os::build::rpm::format_nvra formats the rpm NVRA vars generated by
68 | # os::build::rpm::get_nvra_vars and will generate them if necessary
69 | #
70 | # Globals:
71 | #  - OS_RPM_NAME
72 | #  - OS_RPM_VERSION
73 | #  - OS_RPM_RELEASE
74 | #  - OS_RPM_ARCHITECTURE
75 | # Arguments:
76 | #  None
77 | # Returns:
78 | #  None
79 | function os::build::rpm::format_nvra() {
80 | 	if [[ -z "${OS_RPM_NAME:-}" || -z "${OS_RPM_VERSION:-}" || -z "${OS_RPM_RELEASE:-}" ]]; then
81 | 		os::build::rpm::get_nvra_vars
82 | 	fi
83 | 
84 | 	echo "${OS_RPM_NAME}-${OS_RPM_VERSION}-${OS_RPM_RELEASE}.${OS_RPM_ARCHITECTURE}"
85 | }
86 | 


--------------------------------------------------------------------------------
/hack/lib/cleanup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This library holds functions that are used to clean up local
 4 | # system state after other scripts have run.
 5 | 
 6 | # os::cleanup::dump_etcd dumps the full contents of etcd to a file.
 7 | #
 8 | # Globals:
 9 | #  ARTIFACT_DIR
10 | # Arguments:
11 | #  None
12 | # Returns:
13 | #  None
14 | function os::cleanup::dump_etcd() {
15 | 	os::log::info "Dumping etcd contents to ${ARTIFACT_DIR}/etcd_dump.json"
16 | 	os::util::curl_etcd "/v2/keys/?recursive=true" > "${ARTIFACT_DIR}/etcd_dump.json"
17 | }


--------------------------------------------------------------------------------
/hack/lib/init.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script is meant to be the entrypoint for OpenShift Bash scripts to import all of the support
 4 | # libraries at once in order to make Bash script preambles as minimal as possible. This script recur-
 5 | # sively `source`s *.sh files in this directory tree. As such, no files should be `source`ed outside
 6 | # of this script to ensure that we do not attempt to overwrite read-only variables.
 7 | 
 8 | set -o errexit
 9 | set -o nounset
10 | set -o pipefail
11 | 
12 | # os::util::absolute_path returns the absolute path to the directory provided
13 | function os::util::absolute_path() {
14 | 	local relative_path="$1"
15 | 	local absolute_path
16 | 
17 | 	pushd "${relative_path}" >/dev/null
18 | 	relative_path="$( pwd )"
19 | 	if [[ -h "${relative_path}" ]]; then
20 | 		absolute_path="$( readlink "${relative_path}" )"
21 | 	else
22 | 		absolute_path="${relative_path}"
23 | 	fi
24 | 	popd >/dev/null
25 | 
26 | 	echo "${absolute_path}"
27 | }
28 | readonly -f os::util::absolute_path
29 | 
30 | # find the absolute path to the root of the Origin source tree
31 | init_source="$( dirname "${BASH_SOURCE}" )/../.."
32 | OS_ROOT="$( os::util::absolute_path "${init_source}" )"
33 | export OS_ROOT
34 | cd "${OS_ROOT}"
35 | 
36 | library_files=( $( find "${OS_ROOT}/hack/lib" -type f -name '*.sh' -not -path '*/hack/lib/init.sh' ) )
37 | echo $library_files
38 | # TODO(skuzmets): Move the contents of the following files into respective library files.
39 | library_files+=( "${OS_ROOT}/hack/common.sh" )
40 | library_files+=( "${OS_ROOT}/hack/util.sh" )
41 | 
42 | for library_file in "${library_files[@]}"; do
43 | 	source "${library_file}"
44 | done
45 | 
46 | unset library_files library_file init_source
47 | 
48 | # all of our Bash scripts need to have the stacktrace
49 | # handler installed to deal with errors
50 | os::log::stacktrace::install
51 | 
52 | # All of our Bash scripts need to have access to the
53 | # binaries that we build so we don't have to find
54 | # them before every invocation.
55 | os::util::environment::update_path_var


--------------------------------------------------------------------------------
/hack/lib/log/output.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This file contains functions used for writing log messages
 4 | # to stdout and stderr from scripts while they run.
 5 | 
 6 | # os::log::info writes the message to stdout.
 7 | #
 8 | # Arguments:
 9 | #  - all: message to write
10 | function os::log::info() {
11 | 	os::log::internal::prefix_lines "[INFO]" "$*"
12 | }
13 | readonly -f os::log::info
14 | 
15 | # os::log::warn writes the message to stderr.
16 | # A warning indicates something went wrong but
17 | # not so wrong that we cannot recover.
18 | #
19 | # Arguments:
20 | #  - all: message to write
21 | function os::log::warn() {
22 | 	os::text::print_yellow "$( os::log::internal::prefix_lines "[WARNING]" "$*" )" 1>&2
23 | }
24 | readonly -f os::log::warn
25 | 
26 | # os::log::error writes the message to stderr.
27 | # An error indicates that something went wrong
28 | # and we will most likely fail after this.
29 | #
30 | # Arguments:
31 | #  - all: message to write
32 | function os::log::error() {
33 | 	os::text::print_red "$( os::log::internal::prefix_lines "[ERROR]" "$*" )" 1>&2
34 | }
35 | readonly -f os::log::error
36 | 
37 | # os::log::fatal writes the message to stderr and
38 | # returns a non-zero code to force a process exit.
39 | # A fatal error indicates that there is no chance
40 | # of recovery.
41 | #
42 | # Arguments:
43 | #  - all: message to write
44 | function os::log::fatal() {
45 | 	os::text::print_red "$( os::log::internal::prefix_lines "[FATAL]" "$*" )" 1>&2
46 | 	exit 1
47 | }
48 | readonly -f os::log::fatal
49 | 
50 | # os::log::debug writes the message to stderr if
51 | # the ${OS_DEBUG} variable is set.
52 | #
53 | # Arguments:
54 | #  - all: message to write
55 | function os::log::debug() {
56 | 	if [[ -n "${OS_DEBUG:-}" ]]; then
57 | 		os::text::print_blue "$( os::log::internal::prefix_lines "[DEBUG]" "$*" )" 1>&2
58 | 	fi
59 | }
60 | readonly -f os::log::debug
61 | 
62 | # os::log::internal::prefix_lines prints out the
63 | # original content with the given prefix at the
64 | # start of every line.
65 | #
66 | # Arguments:
67 | #  - 1: prefix for lines
68 | #  - 2: content to prefix
69 | function os::log::internal::prefix_lines() {
70 | 	local prefix="$1"
71 | 	local content="$2"
72 | 
73 | 	local old_ifs="${IFS}"
74 | 	IFS=$'\n'
75 | 	for line in ${content}; do
76 | 		echo "${prefix} ${line}"
77 | 	done
78 | 	IFS="${old_ifs}"
79 | }


--------------------------------------------------------------------------------
/hack/lib/log/stacktrace.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # This library contains an implementation of a stack trace for Bash scripts.
 4 | 
 5 | # os::log::stacktrace::install installs the stacktrace as a handler for the ERR signal if one
 6 | # has not already been installed and sets `set -o errtrace` in order to propagate the handler
 7 | # If the ERR trap is not initialized, installing this plugin will initialize it.
 8 | #
 9 | # Globals:
10 | #  None
11 | # Arguments:
12 | #  None
13 | # Returns:
14 | #  - export OS_USE_STACKTRACE
15 | function os::log::stacktrace::install() {
16 |     # setting 'errtrace' propagates our ERR handler to functions, expansions and subshells
17 |     set -o errtrace
18 | 
19 |     # OS_USE_STACKTRACE is read by os::util::trap at runtime to request a stacktrace
20 |     export OS_USE_STACKTRACE=true
21 | 
22 |     os::util::trap::init_err
23 | }
24 | readonly -f os::log::stacktrace::install
25 | 
26 | # os::log::stacktrace::print prints the stacktrace and exits with the return code from the script that
27 | # called for a stack trace. This function will always return 0 if it is not handling the signal, and if it
28 | # is handling the signal, this function will always `exit`, not return, the return code it receives as
29 | # its first argument.
30 | #
31 | # Globals:
32 | #  - BASH_SOURCE
33 | #  - BASH_LINENO
34 | #  - FUNCNAME
35 | # Arguments:
36 | #  - 1: the return code of the command in the script that generated the ERR signal
37 | #  - 2: the last command that ran before handlers were invoked
38 | #  - 3: whether or not `set -o errexit` was set in the script that generated the ERR signal
39 | # Returns:
40 | #  None
41 | function os::log::stacktrace::print() {
42 |     local return_code=$1
43 |     local last_command=$2
44 |     local errexit_set=${3:-}
45 | 
46 |     if [[ "${return_code}" = "0" ]]; then
47 |         # we're not supposed to respond when no error has occurred
48 |         return 0
49 |     fi
50 | 
51 |     if [[ -z "${errexit_set}" ]]; then
52 |         # if errexit wasn't set in the shell when the ERR signal was issued, then we can ignore the signal
53 |         # as this is not cause for failure
54 |         return 0
55 |     fi
56 | 
57 |     # iterate backwards through the stack until we leave library files, so we can be sure we start logging
58 |     # actual script code and not this handler's call
59 |     local stack_begin_index
60 |     for (( stack_begin_index = 0; stack_begin_index < ${#BASH_SOURCE[@]}; stack_begin_index++ )); do
61 |         if [[ ! "${BASH_SOURCE[${stack_begin_index}]}" =~ hack/lib/(log/stacktrace|util/trap)\.sh ]]; then
62 |             break
63 |         fi
64 |     done
65 | 
66 |     local preamble_finished
67 |     local stack_index=1
68 |     local i
69 |     for (( i = stack_begin_index; i < ${#BASH_SOURCE[@]}; i++ )); do
70 |         local bash_source
71 |         bash_source="$( os::util::repository_relative_path "${BASH_SOURCE[$i]}" )"
72 |         if [[ -z "${preamble_finished:-}" ]]; then
73 |             preamble_finished=true
74 |             os::log::error "PID ${BASHPID:-$$}: ${bash_source}:${BASH_LINENO[$i-1]}: \`${last_command}\` exited with status ${return_code}." >&2
75 |             os::log::info $'\t\t'"Stack Trace: "  >&2
76 |             os::log::info $'\t\t'"  ${stack_index}: ${bash_source}:${BASH_LINENO[$i-1]}: \`${last_command}\`" >&2
77 |         else
78 |             os::log::info $'\t\t'"  ${stack_index}: ${bash_source}:${BASH_LINENO[$i-1]}: ${FUNCNAME[$i-1]}" >&2
79 |         fi
80 |         stack_index=$(( stack_index + 1 ))
81 |     done
82 | 
83 |     # we know we're the privileged handler in this chain, so we can safely exit the shell without
84 |     # starving another handler of the privilege of reacting to this signal
85 |     os::log::info "  Exiting with code ${return_code}." >&2
86 |     exit "${return_code}"
87 | }
88 | readonly -f os::log::stacktrace::print
89 | 


--------------------------------------------------------------------------------
/hack/lib/util/docs.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | # This library holds utility functions related to the generation
  4 | # of manpages and docs.
  5 | 
  6 | 
  7 | function generate_manual_pages() {
  8 | 	local dest="$1"
  9 | 	local cmdName="$2"
 10 | 	local filestore=".files_generated_${cmdName}"
 11 | 	local skipprefix="${3:-}"
 12 | 
 13 | 	os::util::environment::setup_tmpdir_vars generate/manuals
 14 | 
 15 | 	# We do this in a tmpdir in case the dest has other non-autogenned files
 16 | 	# We don't want to include them in the list of gen'd files
 17 | 	local tmpdir="${BASETMPDIR}/gen_man"
 18 | 	mkdir -p "${tmpdir}"
 19 | 	# generate the new files
 20 | 	genman "${tmpdir}" "${cmdName}"
 21 | 	# create the list of generated files
 22 | 	ls "${tmpdir}" | LC_ALL=C sort > "${tmpdir}/${filestore}"
 23 | 
 24 | 	# remove all old generated file from the destination
 25 | 	while read file; do
 26 | 		if [[ -e "${tmpdir}/${file}" && -n "${skipprefix}" ]]; then
 27 | 			local original generated
 28 | 			original=$(grep -v "^${skipprefix}" "${dest}/${file}") || :
 29 | 			generated=$(grep -v "^${skipprefix}" "${tmpdir}/${file}") || :
 30 | 			if [[ "${original}" == "${generated}" ]]; then
 31 | 				# overwrite generated with original.
 32 | 				mv "${dest}/${file}" "${tmpdir}/${file}"
 33 | 			fi
 34 | 		else
 35 | 			rm "${dest}/${file}" || true
 36 | 		fi
 37 | 	done <"${dest}/${filestore}"
 38 | 
 39 | 	# put the new generated file into the destination
 40 | 	find "${tmpdir}" -exec rsync -pt {} "${dest}" \; >/dev/null
 41 | 	#cleanup
 42 | 	rm -rf "${tmpdir}"
 43 | 
 44 | 	echo "Assets generated in ${dest}"
 45 | }
 46 | readonly -f generate_manual_pages
 47 | 
 48 | function generate_documentation() {
 49 | 	local dest="$1"
 50 | 	local skipprefix="${1:-}"
 51 | 
 52 | 	os::util::environment::setup_tmpdir_vars generate/docs
 53 | 
 54 | 	# We do this in a tmpdir in case the dest has other non-autogenned files
 55 | 	# We don't want to include them in the list of gen'd files
 56 | 	local tmpdir="${BASETMPDIR}/gen_doc"
 57 | 	mkdir -p "${tmpdir}"
 58 | 	# generate the new files
 59 | 	gendocs "${tmpdir}"
 60 | 	# create the list of generated files
 61 | 	ls "${tmpdir}" | LC_ALL=C sort > "${tmpdir}/.files_generated"
 62 | 
 63 | 	# remove all old generated file from the destination
 64 | 	while read file; do
 65 | 		if [[ -e "${tmpdir}/${file}" && -n "${skipprefix}" ]]; then
 66 | 			local original generated
 67 | 			original=$(grep -v "^${skipprefix}" "${dest}/${file}") || :
 68 | 			generated=$(grep -v "^${skipprefix}" "${tmpdir}/${file}") || :
 69 | 			if [[ "${original}" == "${generated}" ]]; then
 70 | 				# overwrite generated with original.
 71 | 				mv "${dest}/${file}" "${tmpdir}/${file}"
 72 | 			fi
 73 | 		else
 74 | 			rm "${dest}/${file}" || true
 75 | 		fi
 76 | 	done <"${dest}/.files_generated"
 77 | 
 78 | 	# put the new generated file into the destination
 79 | 	find "${tmpdir}" -exec rsync -pt {} "${dest}" \; >/dev/null
 80 | 	#cleanup
 81 | 	rm -rf "${tmpdir}"
 82 | 
 83 | 	echo "Assets generated in ${dest}"
 84 | }
 85 | readonly -f generate_documentation
 86 | 
 87 | # os::util::gen-docs generates docs and manpages for the all the binaries
 88 | # created for Origin.
 89 | function os::util::gen-docs() {
 90 | 	os::util::ensure::built_binary_exists 'gendocs'
 91 | 	os::util::ensure::built_binary_exists 'genman'
 92 | 
 93 | 	OUTPUT_DIR_REL=${1:-""}
 94 | 	OUTPUT_DIR="${OS_ROOT}/${OUTPUT_DIR_REL}/docs/generated"
 95 | 	MAN_OUTPUT_DIR="${OS_ROOT}/${OUTPUT_DIR_REL}/docs/man/man1"
 96 | 
 97 | 	mkdir -p "${OUTPUT_DIR}"
 98 | 	mkdir -p "${MAN_OUTPUT_DIR}"
 99 | 
100 | 	generate_documentation "${OUTPUT_DIR}"
101 | 	generate_manual_pages "${MAN_OUTPUT_DIR}" "oc"
102 | 	generate_manual_pages "${MAN_OUTPUT_DIR}" "openshift"
103 | 	generate_manual_pages "${MAN_OUTPUT_DIR}" "oadm"
104 | }
105 | readonly -f os::util::gen-docs
106 | 
107 | # os::util::set-man-placeholder puts a placeholder for every generated manpage.
108 | function os::util::set-man-placeholder() {
109 | 	MAN_OUTPUT_DIR="$1"
110 | 	declare -A generated_files=( ["${1}/.files_generated_oadm"]="${1}/.files_generated_oadm"
111 | 	                           ["${1}/.files_generated_oc"]="${1}/.files_generated_oc"
112 | 	                           ["${1}/.files_generated_openshift"]="${1}/.files_generated_openshift" )
113 | 
114 | 	# remove all of the old manpages; we don't want to check them in.
115 | 	for generated_file in "${generated_files[@]}"; do
116 | 		while read file; do
117 | 			generated_man="$MAN_OUTPUT_DIR/${file}"
118 | 			if [[ "${generated_man}" != "${generated_files[$generated_file]}" ]]; then
119 | 				cp "${OS_ROOT}/hack/autogenerated_placeholder.txt" "${generated_man}"
120 | 			fi
121 | 		done <"${generated_file}"
122 | 	done
123 | }
124 | readonly -f os::util::set-man-placeholder
125 | 
126 | # os::util::set-docs-placeholder puts a placeholder for every generated doc.
127 | function os::util::set-docs-placeholder() {
128 | 	OUTPUT_DIR="$1"
129 | 	local generated_file="${OUTPUT_DIR}/.files_generated"
130 | 
131 | 	# remove all of the old docs; we don't want to check them in.
132 | 	while read file; do
133 | 		generated_doc="$OUTPUT_DIR/${file}"
134 | 		if [[ "${generated_doc}" != "${generated_file}" ]]; then
135 | 			cp "${OS_ROOT}/hack/autogenerated_placeholder.txt" "${generated_doc}"
136 | 		fi
137 | 	done <"${generated_file}"
138 | }
139 | readonly -f os::util::set-docs-placeholder
140 | 


--------------------------------------------------------------------------------
/hack/lib/util/ensure.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # This script contains helper functions for ensuring that dependencies
  4 | # exist on a host system that are required to run Origin scripts.
  5 | 
  6 | # os::util::ensure::system_binary_exists ensures that the
  7 | # given binary exists on the system in the $PATH.
  8 | #
  9 | # Globals:
 10 | #  None
 11 | # Arguments:
 12 | #  - 1: binary to search for
 13 | # Returns:
 14 | #  None
 15 | function os::util::ensure::system_binary_exists() {
 16 | 	local binary="$1"
 17 | 
 18 | 	if ! os::util::find::system_binary "${binary}" >/dev/null 2>&1; then
 19 | 		os::log::fatal "Required \`${binary}\` binary was not found in \$PATH."
 20 | 	fi
 21 | }
 22 | readonly -f os::util::ensure::system_binary_exists
 23 | 
 24 | # os::util::ensure::built_binary_exists ensures that the
 25 | # given binary exists on the system in the local output
 26 | # directory for the current platform. If it doesn't, we
 27 | # will attempt to build it if we can determine the correct
 28 | # hack/build-go.sh target for the binary.
 29 | #
 30 | # This function will attempt to determine the correct
 31 | # hack/build-go.sh target for the binary, but may not
 32 | # be able to do so if the target doesn't live under
 33 | # cmd/ or tools/. In that case, one should be given.
 34 | #
 35 | # Globals:
 36 | #  - OS_ROOT
 37 | # Arguments:
 38 | #  - 1: binary to search for
 39 | #  - 2: optional build target for this binary
 40 | # Returns:
 41 | #  None
 42 | function os::util::ensure::built_binary_exists() {
 43 | 	local binary="$1"
 44 | 	local target="${2:-}"
 45 | 
 46 | 	if ! os::util::find::built_binary "${binary}" >/dev/null 2>&1; then
 47 | 		if [[ -z "${target}" ]]; then
 48 | 			if [[ -d "${OS_ROOT}/cmd/${binary}" ]]; then
 49 | 				target="cmd/${binary}"
 50 | 			elif [[ -d "${OS_ROOT}/tools/${binary}" ]]; then
 51 | 				target="tools/${binary}"
 52 | 			elif [[ -d "${OS_ROOT}/tools/rebasehelpers/${binary}" ]]; then
 53 | 				target="tools/rebasehelpers/${binary}"
 54 | 			fi
 55 | 		fi
 56 | 
 57 | 		if [[ -n "${target}" ]]; then
 58 | 			os::log::warn "No compiled \`${binary}\` binary was found. Attempting to build one using:
 59 |   $ hack/build-go.sh ${target}"
 60 | 			"${OS_ROOT}/hack/build-go.sh" "${target}"
 61 | 		else
 62 | 			os::log::fatal "No compiled \`${binary}\` binary was found and no build target could be determined.
 63 | Provide the binary and try running $0 again."
 64 | 		fi
 65 | 	fi
 66 | }
 67 | readonly -f os::util::ensure::built_binary_exists
 68 | 
 69 | # os::util::ensure::gopath_binary_exists ensures that the
 70 | # given binary exists on the system in $GOPATH.
 71 | #
 72 | # Globals:
 73 | #  - GOPATH
 74 | # Arguments:
 75 | #  - 1: binary to search for
 76 | # Returns:
 77 | #  None
 78 | function os::util::ensure::gopath_binary_exists() {
 79 | 	local binary="$1"
 80 | 
 81 | 	if ! os::util::find::gopath_binary "${binary}" >/dev/null 2>&1; then
 82 | 		os::log::fatal "Required \`${binary}\` binary was not found in \$GOPATH."
 83 | 	fi
 84 | }
 85 | readonly -f os::util::ensure::gopath_binary_exists
 86 | 
 87 | # os::util::ensure::iptables_privileges_exist tests if the
 88 | # testing machine has iptables available and in PATH. Also
 89 | # tests that the user can list iptables rules, trying with
 90 | # `sudo` if it fails without.
 91 | #
 92 | # Globals:
 93 | #  None
 94 | # Arguments:
 95 | #  None
 96 | # Returns:
 97 | #  None
 98 | function os::util::ensure::iptables_privileges_exist() {
 99 | 	os::util::ensure::system_binary_exists 'iptables'
100 | 
101 | 	if ! iptables --list >/dev/null 2>&1 && ! sudo iptables --list >/dev/null 2>&1; then
102 | 		os::log::fatal "You do not have \`iptables\` or \`sudo\` privileges. Kubernetes services will not work
103 | without \`iptables\` access. See https://github.com/kubernetes/kubernetes/issues/1859."
104 | 	fi
105 | }
106 | readonly -f os::util::ensure::iptables_privileges_exist


--------------------------------------------------------------------------------
/hack/lib/util/find.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script contains helper functions for finding components
 4 | # in the Origin repository or on the host machine running scripts.
 5 | 
 6 | # os::util::find::system_binary determines the absolute path to a
 7 | # system binary, if it exists.
 8 | #
 9 | # Globals:
10 | #  None
11 | # Arguments:
12 | #  - 1: binary name
13 | # Returns:
14 | #  - location of the binary
15 | function os::util::find::system_binary() {
16 | 	local binary_name="$1"
17 | 
18 | 	command -v "${binary_name}"
19 | }
20 | readonly -f os::util::find::system_binary
21 | 
22 | # os::util::find::built_binary determines the absolute path to a
23 | # built binary for the current platform, if it exists.
24 | #
25 | # Globals:
26 | #  - OS_OUTPUT_BINPATH
27 | # Arguments:
28 | #  - 1: binary name
29 | # Returns:
30 | #  - location of the binary
31 | function os::util::find::built_binary() {
32 | 	local binary_name="$1"
33 | 
34 | 	local binary_path; binary_path="${OS_OUTPUT_BINPATH}/$( os::build::host_platform )/${binary_name}"
35 | 	# we need to check that the path leads to a file
36 | 	# as directories also have the executable bit set
37 | 	if [[ -f "${binary_path}" && -x "${binary_path}" ]]; then
38 | 		echo "${binary_path}"
39 | 		return 0
40 | 	else
41 | 		return 1
42 | 	fi
43 | }
44 | readonly -f os::util::find::built_binary
45 | 
46 | # os::util::find::gopath_binary determines the absolute path to a
47 | # binary installed through the go toolchain, if it exists.
48 | #
49 | # Globals:
50 | #  - GOPATH
51 | # Arguments:
52 | #  - 1: binary name
53 | # Returns:
54 | #  - location of the binary
55 | function os::util::find::gopath_binary() {
56 | 	local binary_name="$1"
57 | 
58 | 	local old_ifs="${IFS}"
59 | 	IFS=":"
60 | 	for part in ${GOPATH}; do
61 | 		local binary_path="${part}/bin/${binary_name}"
62 | 		# we need to check that the path leads to a file
63 | 		# as directories also have the executable bit set
64 | 		if [[ -f "${binary_path}" && -x "${binary_path}" ]]; then
65 | 			echo "${binary_path}"
66 | 			IFS="${old_ifs}"
67 | 			return 0
68 | 		fi
69 | 	done
70 | 	IFS="${old_ifs}"
71 | 	return 1
72 | }
73 | readonly -f os::util::find::gopath_binary


--------------------------------------------------------------------------------
/hack/lib/util/golang.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # This library holds golang related utility functions.
 4 | 
 5 | # os::golang::verify_go_version ensure the go tool exists and is a viable version.
 6 | function os::golang::verify_go_version() {
 7 | 	os::util::ensure::system_binary_exists 'go'
 8 | 
 9 | 	local go_version
10 | 	go_version=($(go version))
11 | 	if [[ "${go_version[2]}" != go1.7* ]]; then
12 | 		os::log::info "Detected go version: ${go_version[*]}."
13 | 		if [[ -z "${PERMISSIVE_GO:-}" ]]; then
14 | 			os::log::error "Please install Go version 1.7 or use PERMISSIVE_GO=y to bypass this check."
15 | 			return 1
16 | 		else
17 | 			os::log::warn "Detected golang version doesn't match preferred Go version for Origin."
18 | 			os::log::warn "This version mismatch could lead to differences in execution between this run and the Origin CI systems."
19 | 			return 0
20 | 		fi
21 | 	fi
22 | }
23 | readonly -f os::golang::verify_go_version
24 | 


--------------------------------------------------------------------------------
/hack/lib/util/misc.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | # This library holds miscellaneous utility functions. If there begin to be groups of functions in this
  4 | # file that share intent or are thematically similar, they should be split into their own files.
  5 | 
  6 | # os::util::describe_return_code describes an exit code
  7 | #
  8 | # Globals:
  9 | #  - OS_SCRIPT_START_TIME
 10 | # Arguments:
 11 | #  - 1: exit code to describe
 12 | # Returns:
 13 | #  None
 14 | function os::util::describe_return_code() {
 15 | 	local return_code=$1
 16 | 
 17 | 	if [[ "${return_code}" = "0" ]]; then
 18 | 		echo -n "[INFO] $0 succeeded "
 19 | 	else
 20 | 		echo -n "[ERROR] $0 failed "
 21 | 	fi
 22 | 
 23 | 	if [[ -n "${OS_SCRIPT_START_TIME:-}" ]]; then
 24 | 		local end_time
 25 |         end_time="$(date +%s)"
 26 | 		local elapsed_time
 27 |         elapsed_time="$(( end_time - OS_SCRIPT_START_TIME ))"
 28 | 		local formatted_time
 29 |         formatted_time="$( os::util::format_seconds "${elapsed_time}" )"
 30 | 		echo "after ${formatted_time}"
 31 | 	else
 32 | 		echo
 33 | 	fi
 34 | }
 35 | readonly -f os::util::describe_return_code
 36 | 
 37 | # os::util::install_describe_return_code installs the return code describer for the EXIT trap
 38 | # If the EXIT trap is not initialized, installing this plugin will initialize it.
 39 | #
 40 | # Globals:
 41 | #  None
 42 | # Arguments:
 43 | #  None
 44 | # Returns:
 45 | #  - export OS_DESCRIBE_RETURN_CODE
 46 | #  - export OS_SCRIPT_START_TIME
 47 | function os::util::install_describe_return_code() {
 48 | 	export OS_DESCRIBE_RETURN_CODE="true"
 49 | 	OS_SCRIPT_START_TIME="$( date +%s )"; export OS_SCRIPT_START_TIME
 50 | 	os::util::trap::init_exit
 51 | }
 52 | readonly -f os::util::install_describe_return_code
 53 | 
 54 | # OS_ORIGINAL_WD is the original working directory the script sourcing this utility file was called
 55 | # from. This is an important directory as if $0 is a relative path, we cannot use the following path
 56 | # utility without knowing from where $0 is relative.
 57 | if [[ -z "${OS_ORIGINAL_WD:-}" ]]; then
 58 | 	# since this could be sourced in a context where the utilities are already loaded,
 59 | 	# we want to ensure that this is re-entrant, so we only set $OS_ORIGINAL_WD if it
 60 | 	# is not set already
 61 | 	OS_ORIGINAL_WD="$( pwd )"
 62 | 	readonly OS_ORIGINAL_WD
 63 | 	export OS_ORIGINAL_WD
 64 | fi
 65 | 
 66 | # os::util::repository_relative_path returns the relative path from the $OS_ROOT directory to the
 67 | # given file, if the file is inside of the $OS_ROOT directory. If the file is outside of $OS_ROOT,
 68 | # this function will return the absolute path to the file
 69 | #
 70 | # Globals:
 71 | #  - OS_ROOT
 72 | # Arguments:
 73 | #  - 1: the path to relativize
 74 | # Returns:
 75 | #  None
 76 | function os::util::repository_relative_path() {
 77 | 	local filename=$1
 78 | 	local directory; directory="$( dirname "${filename}" )"
 79 | 	filename="$( basename "${filename}" )"
 80 | 
 81 | 	if [[ "${directory}" != "${OS_ROOT}"* ]]; then
 82 | 		pushd "${OS_ORIGINAL_WD}" >/dev/null 2>&1
 83 | 		directory="$( os::util::absolute_path "${directory}" )"
 84 | 		popd >/dev/null 2>&1
 85 | 	fi
 86 | 
 87 | 	directory="${directory##*${OS_ROOT}/}"
 88 | 
 89 | 	echo "${directory}/${filename}"
 90 | }
 91 | readonly -f os::util::repository_relative_path
 92 | 
 93 | # os::util::format_seconds formats a duration of time in seconds to print in HHh MMm SSs
 94 | #
 95 | # Globals:
 96 | #  None
 97 | # Arguments:
 98 | #  - 1: time in seconds to format
 99 | # Return:
100 | #  None
101 | function os::util::format_seconds() {
102 | 	local raw_seconds=$1
103 | 
104 | 	local hours minutes seconds
105 | 	(( hours=raw_seconds/3600 ))
106 | 	(( minutes=(raw_seconds%3600)/60 ))
107 | 	(( seconds=raw_seconds%60 ))
108 | 
109 | 	printf '%02dh %02dm %02ds' "${hours}" "${minutes}" "${seconds}"
110 | }
111 | readonly -f os::util::format_seconds
112 | 
113 | # os::util::sed attempts to make our Bash scripts agnostic to the platform
114 | # on which they run `sed` by glossing over a discrepancy in flag use in GNU.
115 | #
116 | # Globals:
117 | #  None
118 | # Arguments:
119 | #  - all: arguments to pass to `sed -i`
120 | # Return:
121 | #  None
122 | function os::util::sed() {
123 | 	if LANG=C sed --help 2>&1 | grep -q "GNU sed"; then
124 | 		sed -i'' "$@"
125 | 	else
126 | 		sed -i '' "$@"
127 | 	fi
128 | }
129 | readonly -f os::util::sed
130 | 
131 | # os::util::base64decode attempts to make our Bash scripts agnostic to the platform
132 | # on which they run `base64decode` by glossing over a discrepancy in flag use in GNU.
133 | #
134 | # Globals:
135 | #  None
136 | # Arguments:
137 | #  - all: arguments to pass to `base64decode`
138 | # Return:
139 | #  None
140 | function os::util::base64decode() {
141 | 	if [[ "$(go env GOHOSTOS)" == "darwin" ]]; then
142 | 		base64 -D "$@"
143 | 	else
144 | 		base64 -d "$@"
145 | 	fi
146 | }
147 | readonly -f os::util::base64decode
148 | 
149 | # os::util::curl_etcd sends a request to the backing etcd store for the master.
150 | # We use the administrative client cert and key for access and re-encode them
151 | # as necessary for OSX clients.
152 | #
153 | # Globals:
154 | #  MASTER_CONFIG_DIR
155 | #  API_SCHEME
156 | #  API_HOST
157 | #  ETCD_PORT
158 | # Arguments:
159 | #  - 1: etcd-relative URL to curl, with leading slash
160 | # Returns:
161 | #  None
162 | function os::util::curl_etcd() {
163 | 	local url="$1"
164 | 	local full_url="${API_SCHEME}://${API_HOST}:${ETCD_PORT}${url}"
165 | 
166 | 	local etcd_client_cert="${MASTER_CONFIG_DIR}/master.etcd-client.crt"
167 | 	local etcd_client_key="${MASTER_CONFIG_DIR}/master.etcd-client.key"
168 | 	local ca_bundle="${MASTER_CONFIG_DIR}/ca-bundle.crt"
169 | 
170 | 	if curl -V | grep -q 'SecureTransport'; then
171 | 		# on newer OSX `curl` implementations, SSL is not used and client certs
172 | 		# and keys are expected to be encoded in P12 format instead of PEM format,
173 | 		# so we need to convert the secrets that the server wrote if we haven't
174 | 		# already done so
175 | 		local etcd_client_cert_p12="${MASTER_CONFIG_DIR}/master.etcd-client.crt.p12"
176 | 		local etcd_client_cert_p12_password="${CURL_CERT_P12_PASSWORD:-'password'}"
177 | 		if [[ ! -f "${etcd_client_cert_p12}" ]]; then
178 | 			openssl pkcs12 -export                        \
179 | 			               -in "${etcd_client_cert}"      \
180 | 			               -inkey "${etcd_client_key}"    \
181 | 			               -out "${etcd_client_cert_p12}" \
182 | 			               -password "pass:${etcd_client_cert_p12_password}"
183 | 		fi
184 | 
185 | 		curl --fail --silent --cacert "${ca_bundle}" \
186 | 		     --cert "${etcd_client_cert_p12}:${etcd_client_cert_p12_password}" "${full_url}"
187 | 	else
188 | 		curl --fail --silent --cacert "${ca_bundle}" \
189 | 		     --cert "${etcd_client_cert}" --key "${etcd_client_key}" "${full_url}"
190 | 	fi
191 | }
192 | 
193 | # os::util::host_platform determines what the host OS and architecture
194 | # are, as Golang sees it. The go tool chain does some slightly different
195 | # things when the target platform matches the host platform.
196 | #
197 | # Globals:
198 | #  None
199 | # Arguments:
200 | #  None
201 | # Returns:
202 | #  None
203 | function os::util::host_platform() {
204 | 	echo "$(go env GOHOSTOS)/$(go env GOHOSTARCH)"
205 | }
206 | readonly -f os::util::host_platform


--------------------------------------------------------------------------------
/hack/lib/util/text.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # This file contains helpful aliases for manipulating the output text to the terminal as
  4 | # well as functions for one-command augmented printing.
  5 | 
  6 | # os::text::reset resets the terminal output to default if it is called in a TTY
  7 | function os::text::reset() {
  8 | 	if os::text::internal::is_tty; then
  9 | 		tput sgr0
 10 | 	fi
 11 | }
 12 | readonly -f os::text::reset
 13 | 
 14 | # os::text::bold sets the terminal output to bold text if it is called in a TTY
 15 | function os::text::bold() {
 16 | 	if os::text::internal::is_tty; then
 17 | 		tput bold
 18 | 	fi
 19 | }
 20 | readonly -f os::text::bold
 21 | 
 22 | # os::text::red sets the terminal output to red text if it is called in a TTY
 23 | function os::text::red() {
 24 | 	if os::text::internal::is_tty; then
 25 | 		tput setaf 1
 26 | 	fi
 27 | }
 28 | readonly -f os::text::red
 29 | 
 30 | # os::text::green sets the terminal output to green text if it is called in a TTY
 31 | function os::text::green() {
 32 | 	if os::text::internal::is_tty; then
 33 | 		tput setaf 2
 34 | 	fi
 35 | }
 36 | readonly -f os::text::green
 37 | 
 38 | # os::text::blue sets the terminal output to blue text if it is called in a TTY
 39 | function os::text::blue() {
 40 | 	if os::text::internal::is_tty; then
 41 | 		tput setaf 4
 42 | 	fi
 43 | }
 44 | readonly -f os::text::blue
 45 | 
 46 | # os::text::yellow sets the terminal output to yellow text if it is called in a TTY
 47 | function os::text::yellow() {
 48 | 	if os::text::internal::is_tty; then
 49 | 		tput setaf 11
 50 | 	fi
 51 | }
 52 | readonly -f os::text::yellow
 53 | 
 54 | # os::text::clear_last_line clears the text from the last line of output to the
 55 | # terminal and leaves the cursor on that line to allow for overwriting that text
 56 | # if it is called in a TTY
 57 | function os::text::clear_last_line() {
 58 | 	if os::text::internal::is_tty; then
 59 | 		tput cuu 1
 60 | 		tput el
 61 | 	fi
 62 | }
 63 | readonly -f os::text::clear_last_line
 64 | 
 65 | # os::text::clear_string attempts to clear the entirety of a string from the terminal.
 66 | # If the string contains literal tabs or other characters that take up more than one
 67 | # character space in output, or if the window size is changed before this function
 68 | # is called, it will not function correctly.
 69 | # No action is taken if this is called outside of a TTY
 70 | function os::text::clear_string() {
 71 |     local -r string="$1"
 72 |     if os::text::internal::is_tty; then
 73 |         echo "${string}" | while read line; do
 74 |             # num_lines is the number of terminal lines this one line of output
 75 |             # would have taken up with the current terminal width in columns
 76 |             local num_lines=$(( ${#line} / $( tput cols ) ))
 77 |             for (( i = 0; i <= num_lines; i++ )); do
 78 |                 os::text::clear_last_line
 79 |             done
 80 |         done
 81 |     fi
 82 | }
 83 | 
 84 | # os::text::internal::is_tty determines if we are outputting to a TTY
 85 | function os::text::internal::is_tty() {
 86 | 	[[ -t 1 && -n "${TERM:-}" ]]
 87 | }
 88 | readonly -f os::text::internal::is_tty
 89 | 
 90 | # os::text::print_bold prints all input in bold text
 91 | function os::text::print_bold() {
 92 | 	os::text::bold
 93 | 	echo "${*}"
 94 | 	os::text::reset
 95 | }
 96 | readonly -f os::text::print_bold
 97 | 
 98 | # os::text::print_red prints all input in red text
 99 | function os::text::print_red() {
100 | 	os::text::red
101 | 	echo "${*}"
102 | 	os::text::reset
103 | }
104 | readonly -f os::text::print_red
105 | 
106 | # os::text::print_red_bold prints all input in bold red text
107 | function os::text::print_red_bold() {
108 | 	os::text::red
109 | 	os::text::bold
110 | 	echo "${*}"
111 | 	os::text::reset
112 | }
113 | readonly -f os::text::print_red_bold
114 | 
115 | # os::text::print_green prints all input in green text
116 | function os::text::print_green() {
117 | 	os::text::green
118 | 	echo "${*}"
119 | 	os::text::reset
120 | }
121 | readonly -f os::text::print_green
122 | 
123 | # os::text::print_green_bold prints all input in bold green text
124 | function os::text::print_green_bold() {
125 | 	os::text::green
126 | 	os::text::bold
127 | 	echo "${*}"
128 | 	os::text::reset
129 | }
130 | readonly -f os::text::print_green_bold
131 | 
132 | # os::text::print_blue prints all input in blue text
133 | function os::text::print_blue() {
134 | 	os::text::blue
135 | 	echo "${*}"
136 | 	os::text::reset
137 | }
138 | readonly -f os::text::print_blue
139 | 
140 | # os::text::print_blue_bold prints all input in bold blue text
141 | function os::text::print_blue_bold() {
142 | 	os::text::blue
143 | 	os::text::bold
144 | 	echo "${*}"
145 | 	os::text::reset
146 | }
147 | readonly -f os::text::print_blue_bold
148 | 
149 | # os::text::print_yellow prints all input in yellow text
150 | function os::text::print_yellow() {
151 | 	os::text::yellow
152 | 	echo "${*}"
153 | 	os::text::reset
154 | }
155 | readonly -f os::text::print_yellow
156 | 
157 | # os::text::print_yellow_bold prints all input in bold yellow text
158 | function os::text::print_yellow_bold() {
159 | 	os::text::yellow
160 | 	os::text::bold
161 | 	echo "${*}"
162 | 	os::text::reset
163 | }
164 | readonly -f os::text::print_yellow_bold
165 | 


--------------------------------------------------------------------------------
/hack/lib/util/trap.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | # This library defines the trap handlers for the ERR and EXIT signals. Any new handler for these signals
  4 | # must be added to these handlers and activated by the environment variable mechanism that the rest use.
  5 | # These functions ensure that no handler can ever alter the exit code that was emitted by a command
  6 | # in a test script.
  7 | 
  8 | # os::util::trap::init_err initializes the privileged handler for the ERR signal if it hasn't
  9 | # been registered already. This will overwrite any other handlers registered on the signal.
 10 | #
 11 | # Globals:
 12 | #  None
 13 | # Arguments:
 14 | #  None
 15 | # Returns:
 16 | #  None
 17 | function os::util::trap::init_err() {
 18 |     if ! trap -p ERR | grep -q 'os::util::trap::err_handler'; then
 19 |         trap 'os::util::trap::err_handler;' ERR
 20 |     fi
 21 | }
 22 | readonly -f os::util::trap::init_err
 23 | 
 24 | # os::util::trap::init_exit initializes the privileged handler for the EXIT signal if it hasn't
 25 | # been registered already. This will overwrite any other handlers registered on the signal.
 26 | #
 27 | # Globals:
 28 | #  None
 29 | # Arguments:
 30 | #  None
 31 | # Returns:
 32 | #  None
 33 | function os::util::trap::init_exit() {
 34 |     if ! trap -p EXIT | grep -q 'os::util::trap::exit_handler'; then
 35 |         trap 'os::util::trap::exit_handler;' EXIT
 36 |     fi
 37 | }
 38 | readonly -f os::util::trap::init_exit
 39 | 
 40 | # os::util::trap::err_handler is the handler for the ERR signal.
 41 | #
 42 | # Globals:
 43 | #  - OS_TRAP_DEBUG
 44 | #  - OS_USE_STACKTRACE
 45 | # Arguments:
 46 | #  None
 47 | # Returns:
 48 | #  - returns original return code, allows privileged handler to exit if necessary
 49 | function os::util::trap::err_handler() {
 50 |     local -r return_code=$?
 51 |     local -r last_command="${BASH_COMMAND}"
 52 | 
 53 |     if set +o | grep -q '\-o errexit'; then
 54 |         local -r errexit_set=true
 55 |     fi
 56 | 
 57 |     if [[ "${OS_TRAP_DEBUG:-}" = "true" ]]; then
 58 |         echo "[DEBUG] Error handler executing with return code \`${return_code}\`, last command \`${last_command}\`, and errexit set \`${errexit_set:-}\`"
 59 |     fi
 60 | 
 61 |     if [[ "${OS_USE_STACKTRACE:-}" = "true" ]]; then
 62 |         # the OpenShift stacktrace function is treated as a privileged handler for this signal
 63 |         # and is therefore allowed to run outside of a subshell in order to allow it to `exit`
 64 |         # if necessary
 65 |         os::log::stacktrace::print "${return_code}" "${last_command}" "${errexit_set:-}"
 66 |     fi
 67 | 
 68 |     return "${return_code}"
 69 | }
 70 | readonly -f os::util::trap::err_handler
 71 | 
 72 | # os::util::trap::exit_handler is the handler for the EXIT signal.
 73 | #
 74 | # Globals:
 75 | #  - OS_TRAP_DEBUG
 76 | #  - OS_DESCRIBE_RETURN_CODE
 77 | # Arguments:
 78 | #  None
 79 | # Returns:
 80 | #  - original exit code of the script that exited
 81 | function os::util::trap::exit_handler() {
 82 |     local -r return_code=$?
 83 | 
 84 |     # we do not want these traps to be able to trigger more errors, we can let them fail silently
 85 |     set +o errexit
 86 | 
 87 |     if [[ "${OS_TRAP_DEBUG:-}" = "true" ]]; then
 88 |         echo "[DEBUG] Exit handler executing with return code \`${return_code}\`"
 89 |     fi
 90 | 
 91 |     # the following envars selectively enable optional exit traps, all of which are run inside of
 92 |     # a subshell in order to sandbox them and not allow them to influence how this script will exit
 93 |     if [[ "${OS_DESCRIBE_RETURN_CODE:-}" = "true" ]]; then
 94 |         ( os::util::describe_return_code "${return_code}" )
 95 |     fi
 96 | 
 97 |     exit "${return_code}"
 98 | }
 99 | readonly -f os::util::trap::exit_handler
100 | 


--------------------------------------------------------------------------------
/hack/util.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Provides simple utility functions
  4 | 
  5 | # kill_all_processes function will kill all
  6 | # all processes created by the test script.
  7 | function kill_all_processes() {
  8 | 	local sudo="${USE_SUDO:+sudo}"
  9 | 
 10 | 	pids=($(jobs -pr))
 11 | 	for i in ${pids[@]-}; do
 12 | 		pgrep -P "${i}" | xargs $sudo kill &> /dev/null
 13 | 		$sudo kill ${i} &> /dev/null
 14 | 	done
 15 | }
 16 | readonly -f kill_all_processes
 17 | 
 18 | # dump_container_logs writes container logs to $LOG_DIR
 19 | function dump_container_logs() {
 20 | 	if ! docker version >/dev/null 2>&1; then
 21 | 		return
 22 | 	fi
 23 | 
 24 | 	mkdir -p ${LOG_DIR}
 25 | 
 26 | 	os::log::info "Dumping container logs to ${LOG_DIR}"
 27 | 	for container in $(docker ps -aq); do
 28 | 		container_name=$(docker inspect -f "{{.Name}}" $container)
 29 | 		# strip off leading /
 30 | 		container_name=${container_name:1}
 31 | 		if [[ "$container_name" =~ ^k8s_ ]]; then
 32 | 			pod_name=$(echo $container_name | awk 'BEGIN { FS="[_.]+" }; { print $4 }')
 33 | 			container_name=${pod_name}-$(echo $container_name | awk 'BEGIN { FS="[_.]+" }; { print $2 }')
 34 | 		fi
 35 | 		docker logs "$container" >&"${LOG_DIR}/container-${container_name}.log"
 36 | 	done
 37 | }
 38 | readonly -f dump_container_logs
 39 | 
 40 | # delete_empty_logs deletes empty logs
 41 | function delete_empty_logs() {
 42 | 	# Clean up zero byte log files
 43 | 	find "${ARTIFACT_DIR}" "${LOG_DIR}" -type f -name '*.log' \( -empty \) -delete
 44 | }
 45 | readonly -f delete_empty_logs
 46 | 
 47 | # truncate_large_logs truncates large logs
 48 | function truncate_large_logs() {
 49 | 	# Clean up large log files so they don't end up on jenkins
 50 | 	local max_file_size="100M"
 51 | 	local large_files=$(find "${ARTIFACT_DIR}" "${LOG_DIR}" -type f -name '*.log' \( -size +${max_file_size} \))
 52 | 	for file in ${large_files}; do
 53 | 		mv "${file}" "${file}.tmp"
 54 | 		echo "LOGFILE TOO LONG ($(du -h "${file}.tmp")), PREVIOUS BYTES TRUNCATED. LAST ${max_file_size} OF LOGFILE:" > "${file}"
 55 | 		tail -c ${max_file_size} "${file}.tmp" >> "${file}"
 56 | 		rm "${file}.tmp"
 57 | 	done
 58 | }
 59 | readonly -f truncate_large_logs
 60 | 
 61 | ######
 62 | # start of common functions for extended test group's run.sh scripts
 63 | ######
 64 | 
 65 | # cleanup_openshift saves container logs, saves resources, and kills all processes and containers
 66 | function cleanup_openshift() {
 67 | 	LOG_DIR="${LOG_DIR:-${BASETMPDIR}/logs}"
 68 | 	ARTIFACT_DIR="${ARTIFACT_DIR:-${LOG_DIR}}"
 69 | 	API_HOST="${API_HOST:-127.0.0.1}"
 70 | 	API_SCHEME="${API_SCHEME:-https}"
 71 | 	ETCD_PORT="${ETCD_PORT:-4001}"
 72 | 
 73 | 	set +e
 74 | 	dump_container_logs
 75 | 
 76 | 	# pull information out of the server log so that we can get failure management in jenkins to highlight it and
 77 | 	# really have it smack people in their logs.  This is a severe correctness problem
 78 | 	grep -a5 "CACHE.*ALTERED" ${LOG_DIR}/openshift.log
 79 | 
 80 | 	os::cleanup::dump_etcd
 81 | 
 82 | 	if [[ -z "${SKIP_TEARDOWN-}" ]]; then
 83 | 		os::log::info "Tearing down test"
 84 | 		kill_all_processes
 85 | 
 86 | 		if docker version >/dev/null 2>&1; then
 87 | 			os::log::info "Stopping k8s docker containers"; docker ps | awk 'index($NF,"k8s_")==1 { print $1 }' | xargs -l -r docker stop -t 1 >/dev/null
 88 | 			if [[ -z "${SKIP_IMAGE_CLEANUP-}" ]]; then
 89 | 				os::log::info "Removing k8s docker containers"; docker ps -a | awk 'index($NF,"k8s_")==1 { print $1 }' | xargs -l -r docker rm -v >/dev/null
 90 | 			fi
 91 | 		fi
 92 | 
 93 | 		os::log::info "Pruning etcd data directory..."
 94 | 		local sudo="${USE_SUDO:+sudo}"
 95 | 		${sudo} rm -rf "${ETCD_DATA_DIR}"
 96 | 
 97 | 		set -u
 98 | 	fi
 99 | 
100 | 	if grep -q 'no Docker socket found' "${LOG_DIR}/openshift.log" && command -v journalctl >/dev/null 2>&1; then
101 | 		# the Docker daemon crashed, we need the logs
102 | 		journalctl --unit docker.service --since -4hours > "${LOG_DIR}/docker.log"
103 | 	fi
104 | 
105 | 	delete_empty_logs
106 | 	truncate_large_logs
107 | 
108 | 	os::log::info "Cleanup complete"
109 | 	set -e
110 | }
111 | readonly -f cleanup_openshift
112 | 
113 | ######
114 | # end of common functions for extended test group's run.sh scripts
115 | ######
116 | 
117 | function find_files() {
118 | 	find . -not \( \
119 | 		\( \
120 | 		-wholename './_output' \
121 | 		-o -wholename './.*' \
122 | 		-o -wholename './pkg/assets/bindata.go' \
123 | 		-o -wholename './pkg/assets/*/bindata.go' \
124 | 		-o -wholename './pkg/bootstrap/bindata.go' \
125 | 		-o -wholename './openshift.local.*' \
126 | 		-o -wholename '*/vendor/*' \
127 | 		-o -wholename './assets/bower_components/*' \
128 | 		\) -prune \
129 | 	\) -name '*.go' | sort -u
130 | }
131 | readonly -f find_files
132 | 


--------------------------------------------------------------------------------
/image-inc.yaml:
--------------------------------------------------------------------------------
 1 | schema_version: 1
 2 | 
 3 | # The S2I bits are included to pick up the usage script and
 4 | # to give a meaningful error on an attempt to install Spark in
 5 | # a complete image via S2I
 6 | 
 7 | version: 3.0
 8 | release: community
 9 | name: radanalyticsio/openshift-spark-inc
10 | from: centos:8
11 | labels:
12 |     - name: maintainer
13 |       value: Trevor McKay <tmckay@redhat.com>
14 |     - name: "io.openshift.s2i.scripts-url"
15 |       value: "image:///usr/libexec/s2i"
16 | 
17 | modules:
18 |     repositories:
19 |         - path: modules
20 | 
21 |     install:
22 |         - name: common
23 |         - name: metrics
24 |         - name: s2i
25 | packages:
26 |     install:
27 |         - java-11-openjdk
28 |         - rsync
29 | run:
30 |     user: 185
31 |     entrypoint:
32 |       - "/entrypoint"
33 |     cmd:
34 |       - "/usr/libexec/s2i/usage"
35 | 


--------------------------------------------------------------------------------
/image.yaml:
--------------------------------------------------------------------------------
 1 | schema_version: 1
 2 | 
 3 | # The S2I bits are included to pick up the usage script and
 4 | # to give a meaningful error on an attempt to install Spark in
 5 | # a complete image via S2I
 6 | 
 7 | version: 3.0
 8 | release: community
 9 | name: radanalyticsio/openshift-spark
10 | from: centos:8
11 | labels:
12 |     - name: maintainer
13 |       value: Trevor McKay <tmckay@redhat.com>
14 |     - name: sparkversion
15 |       value: 3.0.1
16 |     - name: "io.openshift.s2i.scripts-url"
17 |       value: "image:///usr/libexec/s2i"
18 | 
19 | packages:
20 |     install:
21 |         - java-11-openjdk
22 |         # python36 added in the common module
23 |         - python3-numpy
24 | 
25 | modules:
26 |     repositories:
27 |         - path: modules
28 |     install:
29 |         - name: common
30 |         - name: metrics
31 |         - name: spark
32 |         - name: s2i
33 | run:
34 |     user: 185
35 |     entrypoint:
36 |       - "/entrypoint"
37 |     cmd:
38 |       - "/launch.sh"
39 |     workdir: /tmp
40 | 


--------------------------------------------------------------------------------
/make-build-dir.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Regenerate the build directory based on image.*.yaml
 4 | make clean-target
 5 | make clean-context
 6 | make -f Makefile.inc clean-context
 7 | 
 8 | make context
 9 | make -f Makefile.inc context
10 | 
11 | make zero-tarballs
12 | make -f Makefile.inc zero-tarballs
13 | 
14 | # Add any changes for a commit
15 | git add openshift-spark-build
16 | git add openshift-spark-build-inc
17 | 


--------------------------------------------------------------------------------
/modules/common/added/conf/agent-config.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | lowercaseOutputName: true
 3 | lowercaseOutputLabelNames: true
 4 | whitelistObjectNames:
 5 |   - 'metrics:*'
 6 | 
 7 | rules:
 8 | 
 9 |   # These come from the master
10 |   # Example: master.aliveWorkers
11 |   - pattern: "metrics<name=master\\.(.*)><>Value"
12 |     name: spark_master_$1
13 | 
14 |   # These come from the worker
15 |   # Example: worker.coresFree
16 |   - pattern: "metrics<name=worker\\.(.*)><>Value"
17 |     name: spark_worker_$1
18 | 
19 |   # These come from the application driver
20 |   # Example: app-20160809000059-0000.driver.DAGScheduler.stage.failedStages
21 |   - pattern: "metrics<name=(.*)\\.driver\\.(DAGScheduler|BlockManager)\\.(.*)><>Value"
22 |     name: spark_driver_$2_$3
23 |     labels:
24 |       app_id: "$1"
25 | 
26 |   # These come from the application driver if it's a streaming application
27 |   # Example: app-20160809000059-0000.driver.com.example.ClassName.StreamingMetrics.streaming.lastCompletedBatch_schedulingDelay
28 |   - pattern: "metrics<name=(.*)\\.driver\\.(.*)\\.StreamingMetrics\\.streaming\\.(.*)><>Value"
29 |     name: spark_streaming_driver_$3
30 |     labels:
31 |       app_id: "$1"
32 |       app_name: "$2"
33 | 
34 |   # These come from the application driver if it's a structured streaming application
35 |   # Example: app-20160809000059-0000.driver.spark.streaming.QueryName.inputRate-total
36 |   - pattern: "metrics<name=(.*)\\.driver\\.spark\\.streaming\\.(.*)\\.(.*)><>Value"
37 |     name: spark_structured_streaming_driver_$3
38 |     labels:
39 |       app_id: "$1"
40 |       query_name: "$2"
41 | 
42 |   # These come from the application executors
43 |   # Example: app-20160809000059-0000.0.executor.threadpool.activeTasks
44 |   - pattern: "metrics<name=(.*)\\.(.*)\\.executor\\.(.*)><>Value"
45 |     name: spark_executor_$3
46 |     labels:
47 |       app_id: "$1"
48 |       executor_id: "$2"
49 | 
50 |   # These come from the master
51 |   # Example: application.com.example.ClassName.1470700859054.cores
52 |   - pattern: "metrics<name=application\\.(.*)\\.([0-9]+)\\.(.*)><>Value"
53 |     name: spark_application_$3
54 |     labels:
55 |       app_name: "$1"
56 |       app_start_epoch: "$2"
57 | 


--------------------------------------------------------------------------------
/modules/common/added/conf/agent.properties:
--------------------------------------------------------------------------------
1 | jmx_exporter=7777:/opt/spark/conf/agent-config.yaml
2 | 


--------------------------------------------------------------------------------
/modules/common/added/conf/log4j.properties:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Set everything to be logged to the console
19 | log4j.rootCategory=INFO, console
20 | log4j.appender.console=org.apache.log4j.ConsoleAppender
21 | log4j.appender.console.target=System.err
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
24 | 
25 | # Set the default spark-shell log level to WARN. When running the spark-shell, the
26 | # log level for this class is used to overwrite the root logger's log level, so that
27 | # the user can have different defaults for the shell and regular Spark apps.
28 | log4j.logger.org.apache.spark.repl.Main=WARN
29 | 
30 | # Settings to quiet third party logs that are too verbose
31 | log4j.logger.org.spark_project.jetty=WARN
32 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
33 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
34 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
35 | log4j.logger.org.apache.parquet=ERROR
36 | log4j.logger.parquet=ERROR
37 | 
38 | # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
39 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
40 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
41 | 


--------------------------------------------------------------------------------
/modules/common/added/conf/metrics.properties:
--------------------------------------------------------------------------------
1 | *.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink
2 | master.source.jvm.class=org.apache.spark.metrics.source.JvmSource
3 | worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource
4 | driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource
5 | executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
6 | application.source.jvm.class=org.apache.spark.metrics.source.JvmSource
7 | 


--------------------------------------------------------------------------------
/modules/common/added/conf/spark-defaults.conf:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Default system properties included when running spark-submit.
19 | # This is useful for setting default environmental settings.
20 | 
21 | # Example:
22 | # spark.master                     spark://master:7077
23 | # spark.eventLog.enabled           true
24 | # spark.eventLog.dir               hdfs://namenode:8021/directory
25 | # spark.serializer                 org.apache.spark.serializer.KryoSerializer
26 | # spark.driver.memory              5g
27 | # spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
28 | spark.ui.reverseProxy              true
29 | spark.ui.reverseProxyUrl           /
30 | 


--------------------------------------------------------------------------------
/modules/common/added/scripts/entrypoint:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # If we get an s2i command and it's anything but "run" just do it
 4 | # Otherwise we'll turn it into a launch
 5 | if [[ -n "$STI_SCRIPTS_PATH" ]] && [[ $@ == *"$STI_SCRIPTS_PATH"* ]]; then
 6 |     if ! [[ $@ ==  *"$STI_SCRIPTS_PATH"/run* ]]; then
 7 |         exec "$@"
 8 | 	exit $?
 9 |     fi
10 |     CMD=/launch.sh
11 | 
12 | # allow just a simple "usage" command to print the usage script
13 | elif [[ -n "$STI_SCRIPTS_PATH" ]] && [[ $@ == "usage" ]]; then
14 |     exec $STI_SCRIPTS_PATH/usage
15 |     exit $?
16 | else
17 |     CMD=$@
18 | fi
19 | 
20 | trap handle_term TERM INT
21 | 
22 | function handle_term {
23 |     echo Received a termination signal
24 | 
25 |     local cnt
26 |     local killed=1
27 |     if [ -n "$PID" ]; then
28 |         echo "Stopping subprocess $PID"
29 |         kill -TERM $PID
30 |         for cnt in {1..10}
31 |         do
32 |             kill -0 $PID >/dev/null 2>&1
33 |             if [ "$?" -ne 0 ]; then
34 |                 killed=0
35 |                 break
36 |             else
37 |                 sleep 1
38 |             fi
39 |         done
40 |         if [ "$killed" -ne 0 ]; then
41 |             echo Process is still running 10 seconds after TERM, sending KILL
42 |             kill -9 $PID
43 |         fi
44 |         wait $PID
45 |         echo "Subprocess stopped"
46 |     fi
47 |     exit 0
48 | }
49 | 
50 | function patch_uid {
51 |     # Check whether there is a passwd entry for the container UID
52 |     myuid=$(id -u)
53 |     mygid=$(id -g)
54 |     uidentry=$(getent passwd $myuid)
55 | 
56 |     # If there is no passwd entry for the container UID, attempt to create one
57 |     if [ -z "$uidentry" ] ; then
58 |         if [ -w /etc/passwd ] ; then
59 |             echo "$myuid:x:$myuid:$mygid:anonymous uid:$SPARK_HOME:/bin/false" >> /etc/passwd
60 |         else
61 |             echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
62 |         fi
63 |     fi
64 | }
65 | 
66 | # If we receive a spark-on-kube command, hand it off to the
67 | # standard spark entrypoint
68 | case "$1" in
69 |     driver | executor)
70 |         $SPARK_INSTALL/entrypoint.sh $CMD &
71 |         ;;
72 |     *)
73 |         patch_uid
74 |         $CMD &
75 |         ;;
76 | esac
77 | PID=$!
78 | wait $PID
79 | 


--------------------------------------------------------------------------------
/modules/common/added/scripts/launch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | function check_reverse_proxy {
 4 |     grep -e "^spark\.ui\.reverseProxy" $SPARK_HOME/conf/spark-defaults.conf &> /dev/null
 5 |     if [ "$?" -ne 0 ]; then
 6 |         echo "Appending default reverse proxy config to spark-defaults.conf"
 7 |         echo "spark.ui.reverseProxy              true" >> $SPARK_HOME/conf/spark-defaults.conf
 8 |         echo "spark.ui.reverseProxyUrl           /" >> $SPARK_HOME/conf/spark-defaults.conf
 9 |     fi
10 | }
11 | 
12 | # If the UPDATE_SPARK_CONF_DIR dir is non-empty,
13 | # copy the contents to $SPARK_HOME/conf
14 | if [ -d "$UPDATE_SPARK_CONF_DIR" ]; then
15 |     sparkconfs=$(ls -1 $UPDATE_SPARK_CONF_DIR | wc -l)
16 |     if [ "$sparkconfs" -ne "0" ]; then
17 |         echo "Copying from $UPDATE_SPARK_CONF_DIR to $SPARK_HOME/conf"
18 |         ls -1 $UPDATE_SPARK_CONF_DIR
19 |         cp $UPDATE_SPARK_CONF_DIR/* $SPARK_HOME/conf
20 |     fi
21 | elif [ -n "$UPDATE_SPARK_CONF_DIR" ]; then
22 |     echo "Directory $UPDATE_SPARK_CONF_DIR does not exist, using default spark config"
23 | fi
24 | 
25 | check_reverse_proxy
26 | 
27 | if [ -z ${SPARK_METRICS_ON+_} ]; then
28 |     JAVA_AGENT=
29 |     metrics=""
30 | elif [ ${SPARK_METRICS_ON} == "prometheus" ]; then
31 |     JAVA_AGENT=" -javaagent:/opt/metrics/agent-bond.jar=$SPARK_HOME/conf/agent.properties"
32 |     metrics=" with prometheus metrics enabled"
33 | else
34 |     JAVA_AGENT=" -javaagent:/opt/metrics/jolokia-jvm-1.3.6-agent.jar=port=7777,host=0.0.0.0"
35 |     metrics=" with jolokia metrics enabled (deprecated, set SPARK_METRICS_ON to 'prometheus')"
36 | fi
37 | 
38 | if [ -z ${SPARK_MASTER_ADDRESS+_} ]; then
39 |     echo "Starting master$metrics"
40 |     exec $SPARK_HOME/bin/spark-class$JAVA_AGENT org.apache.spark.deploy.master.Master
41 | else
42 |     echo "Starting worker$metrics, will connect to: $SPARK_MASTER_ADDRESS"
43 | 
44 |     # spark://x.y.z:7077  -> x.y.z/7077
45 |     _MASTER_HOST_AND_PORT=$(echo $SPARK_MASTER_ADDRESS | sed -r 's;.*//(.*):(.*);\1/\2;g')
46 |     while true; do
47 |         echo "Waiting for spark master to be available ..."
48 |         timeout 1 sh -c "(</dev/tcp/$_MASTER_HOST_AND_PORT) &>/dev/null"
49 |         if [ $? -eq 0 ]; then
50 |             break
51 |         fi
52 |         sleep 1
53 |     done
54 |     exec $SPARK_HOME/bin/spark-class$JAVA_AGENT org.apache.spark.deploy.worker.Worker $SPARK_MASTER_ADDRESS
55 | fi
56 | 


--------------------------------------------------------------------------------
/modules/common/install:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | SCRIPT_DIR=$(dirname $0)
 6 | ADDED_DIR=${SCRIPT_DIR}/added
 7 | 
 8 | # Put entrypoint and launch.sh at the root
 9 | cp $ADDED_DIR/scripts/* /
10 | 
11 | # Set up a place for spark to go
12 | # We'll also stage our default spark config files here
13 | # so that when spark is installed they can be copied over
14 | # if the spark tarball itself does not include files of the same name
15 | # (ie, "copy if not overwrite")
16 | if ! [ -d $SPARK_INSTALL ]; then
17 |     mkdir -p $SPARK_INSTALL
18 |     mv $ADDED_DIR/conf $SPARK_INSTALL
19 |     chown -R 185:0 $SPARK_INSTALL && chmod -R g+rwX $SPARK_INSTALL
20 |     ln -sfn $SPARK_INSTALL/distro /opt/spark
21 | fi
22 | 
23 | # Change the permissions on /etc/passwd so that anonymous user
24 | # can be added to satisfy Spark
25 | chgrp root /etc/passwd && chmod g+rw /etc/passwd
26 | 
27 | # Make Python3 the default. This is important because it seems
28 | # that Spark still wants to invoke python scripts with "python"
29 | # in the executors, as opposed to "python3"
30 | alternatives --set python /usr/bin/python3
31 | 


--------------------------------------------------------------------------------
/modules/common/module.yaml:
--------------------------------------------------------------------------------
 1 | version: 1.0
 2 | 
 3 | name: common
 4 | envs:
 5 |     - name: SPARK_INSTALL
 6 |       value: /opt/spark-distro
 7 | packages:
 8 |     install:
 9 |       - python36
10 | execute:
11 |     - script: install
12 | 


--------------------------------------------------------------------------------
/modules/metrics/added/agent-bond.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radanalyticsio/openshift-spark/f360e822b4c4859e717791cf39cb9682dc242c5f/modules/metrics/added/agent-bond.jar


--------------------------------------------------------------------------------
/modules/metrics/added/jolokia-jvm-1.3.6-agent.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radanalyticsio/openshift-spark/f360e822b4c4859e717791cf39cb9682dc242c5f/modules/metrics/added/jolokia-jvm-1.3.6-agent.jar


--------------------------------------------------------------------------------
/modules/metrics/install:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -u
 3 | set -e
 4 | 
 5 | SCRIPT_DIR=$(dirname $0)
 6 | ADDED_DIR=${SCRIPT_DIR}/added
 7 | mkdir -p /opt/metrics
 8 | mv $ADDED_DIR/*.jar /opt/metrics
 9 | 
10 | chown -R 185:0 /opt/metrics && chmod g+rwX /opt/metrics
11 | 


--------------------------------------------------------------------------------
/modules/metrics/module.yaml:
--------------------------------------------------------------------------------
1 | version: 1.0
2 | 
3 | name: metrics
4 | execute:
5 |     - script: install
6 | 


--------------------------------------------------------------------------------
/modules/s2i/added/assemble:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | source $STI_SCRIPTS_PATH/s2i-env-vars
  4 | 
  5 | # Just a word about the directory structure
  6 | # SPARK_HOME == /opt/spark
  7 | # SPARK_INSTALL == /opt/spark-distro
  8 | 
  9 | # Extra things like default configuration files and additional
 10 | # boot scripts may be stored in SPARK_INSTALL
 11 | 
 12 | # At runtime, /opt/spark is a symlink to /opt/spark-distro/distro
 13 | # but /opt/spark-distro/distro does not actually exist yet
 14 | 
 15 | # The Spark tarball will be expanded in /opt/spark-distro using
 16 | # it's original name, for example /opt/spark-distro/spark-2.3.0-bin-hadoop2.7,
 17 | # as a dev aid to tracking and version checking
 18 | 
 19 | # Ultimately, /opt/spark-distro/distro is created as a symlink to the Spark root
 20 | # directory. This double-hop from /opt/spark to the Spark root through symlinks
 21 | # allows the Spark installation to be staged in the base image but completed in
 22 | # the S2I build without expanding permissions
 23 | 
 24 | function match_sum {
 25 |     local sumfile=$1
 26 |     local delim=$2
 27 |     local sha512=$3
 28 |     local initial=$(cat $sumfile | tr -d [:space:] | cut -d"$delim" -f1 | tr [:upper:] [:lower:])
 29 |     local rest=$(cat $sumfile | tr -d [:space:] | cut -d"$delim" --complement -f1 | tr [:upper:] [:lower:])
 30 |     if [ "$sha512" == "$initial" ] || [ "$sha512" == "$rest" ]; then
 31 |         return 0
 32 |     fi
 33 |     return 1
 34 | }
 35 | 
 36 | if [ -f $SPARK_HOME/bin/spark-submit ]; then
 37 |     echo "Spark is installed, nothing to do"
 38 |     exit 1
 39 | else
 40 |     echo "Attempting to install Spark"
 41 |     # If a url has been specfified for spark use it
 42 |     if [ -n "$SPARK_URL" ]; then
 43 |         echo Downloading $SPARK_URL
 44 |         wget $SPARK_URL -P $S2I_SOURCE_DIR
 45 |     fi
 46 |     if [ -n "$SPARK_SHA512_URL" ]; then
 47 |         echo Downloading $SPARK_SHA512_URL
 48 |         wget $SPARK_SHA512_URL -P $S2I_SOURCE_DIR
 49 |     fi
 50 | 
 51 |     for spark in $(ls "$S2I_SOURCE_DIR"); do
 52 | 
 53 |         spark=$S2I_SOURCE_DIR/$spark
 54 |         echo Found $spark
 55 |         echo Checking for valid Spark archive
 56 | 
 57 |         # Is the file a directory? If it contains spark-submit, move it
 58 |         if [ -d "$spark" ]; then
 59 |             if ! [ -f $spark/bin/spark-submit ]; then
 60 |                 echo Ignoring directory $spark, no spark-submit
 61 |                 continue
 62 |             fi
 63 |             echo Installing from directory $spark
 64 |             sparkdir=$SPARK_INSTALL/$(basename $spark)
 65 |             mv $spark $SPARK_INSTALL
 66 |         else
 67 |             # If we can get the table of contents, it's a tar archive, otherwise ignore
 68 |             tar -tf $spark &> /dev/null
 69 |             if [ "$?" -ne 0 ]; then
 70 |                 echo Ignoring $spark, not a tar archive
 71 |                 continue
 72 |             fi
 73 |             echo Validating tar archive $spark
 74 | 
 75 |             # Does the tarball contain a spark-submit?
 76 |             name=$(tar -tzf $spark | grep -e "^[^/]*/bin/spark-submit$")
 77 |             if [ "$?" -ne 0 ]; then
 78 |                 echo Ignoring tarball $spark, no spark-submit
 79 |                 continue
 80 |             else
 81 |                 echo Found valid tar archive, matching checksums
 82 |                 # See if we have an sha512 file to match against
 83 |                 if [ -f "$spark".sha512 ]; then
 84 |                     calcvalue=$(sha512sum "$spark" | cut -d\  -f1)
 85 |                     # split the sha512 file using a colon
 86 |                     match_sum "$spark".sha512 \:  $calcvalue
 87 |                     matched="$?"
 88 |                     if [ "$matched" -ne 0 ]; then
 89 |                         # split the sha512 file using equals sign in case it's BSD
 90 |                         match_sum "$spark".sha512 \=  $calcvalue
 91 |                         matched="$?"
 92 |                     fi
 93 |                     if [ "$matched" -ne 0 ]; then
 94 |                         echo Ignoring tarball $spark, sha512sum did not match
 95 |                         continue
 96 |                     fi
 97 |                 fi
 98 | 
 99 |                 # dname will be the intial directory from the path of spark-submit
100 |                 # we found in the tarball, ie the dir created by tar
101 |                 echo Installing from tarball $spark
102 |                 dname=$(dirname $name | cut -d/ -f 1)
103 |                 sparkdir=$SPARK_INSTALL/$dname
104 |                 tar -xzf $spark -C $SPARK_INSTALL
105 |             fi
106 |         fi
107 | 
108 |         ln -s $sparkdir $SPARK_INSTALL/distro
109 | 
110 |         # Search for the spark entrypoint file and copy it to $SPARK_INSTALL
111 |         entry=$(find $sparkdir/kubernetes -name entrypoint.sh)
112 |         if [ -n "$entry" ]; then
113 |             echo Installing spark native entrypoint for use with spark-on-k8s commands
114 |             cp $entry $SPARK_INSTALL
115 | 
116 |             # We want to get rid of the tini invocation
117 |             sed -i "s@exec .*/tini -s --@exec@" $SPARK_INSTALL/entrypoint.sh
118 |         else
119 |             echo No spark native entrypoint found for use with spark-on-k8s commands
120 |         fi
121 | 
122 | 	# Include the default spark configuration files
123 |         mv --no-clobber "$SPARK_INSTALL"/conf/* "$SPARK_HOME"/conf/
124 | 
125 | 	# If someone included mods in a parallel directory, install them with rsync
126 |         # Don't try to preserve permisions, owner, or group because we don't have
127 |         # any control over how s2i uploaded the files, so there's no use preserving.
128 |         if [ -x /usr/bin/rsync ] && [ -d "$S2I_SOURCE_DIR/modify-spark" ]; then
129 | 	    echo Found a modify-spark directory, running rsync to install changes
130 | 	    rsync -vrltD "$S2I_SOURCE_DIR/modify-spark/" $SPARK_HOME
131 |         fi
132 | 
133 |         # Spark workers need to write to the spark directory to track apps
134 |         chmod -R g+rwX $sparkdir
135 | 
136 |         # Can we run spark-submit?
137 |         $SPARK_HOME/bin/spark-submit --version
138 |         if [ "$?" -eq 0 ]; then
139 |             echo Spark installed successfully
140 |             exit 0
141 | 	else
142 |             echo Cannot run spark-submit, Spark install failed
143 |         fi
144 | 
145 |         # Just in case there is more than one tarball, clean up
146 |         rm -rf $sparkdir
147 |     done
148 | 
149 |     echo no valid Spark distribution found
150 | 
151 |     if [ -n "$DEBUG_ASSEMBLE" ]; then
152 |         echo Looping forever so you can \'oc rsh\'
153 |         while true; do
154 |             sleep 5
155 |         done
156 |     fi
157 |     exit 1
158 | fi
159 | 


--------------------------------------------------------------------------------
/modules/s2i/added/s2i-env-vars:
--------------------------------------------------------------------------------
1 | # Local vars setup with defaults
2 | S2I_DESTINATION=${S2I_DESTINATION:-/tmp}
3 | S2I_SOURCE_DIR="${S2I_DESTINATION}/src"
4 | 


--------------------------------------------------------------------------------
/modules/s2i/added/usage:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | if [ -f "$SPARK_HOME"/bin/spark-submit ]; then
 3 | cat <<EOF
 4 | This openshift-spark image will run an Apache Spark master or worker process.
 5 | There are several environment variables which affect its operation:
 6 | 
 7 | SPARK_MASTER_ADDRESS   -- If this variable is unset, a Spark standalone master
 8 |                           process will run.
 9 | 
10 |                           To run a Spark worker process, set this to the URL of
11 |                           the Spark master, for example spark://mymaster:7077
12 | 
13 | SPARK_METRICS_ON       -- If this variable is unset no metrics sink will be
14 |                           configured.
15 | 
16 |                           If it's set to 'prometheus' then the Spark process
17 |                           will be started with prometheus metrics on port 7777.
18 | 
19 |                           If it's set to anything else then the Spark process
20 |                           will be started with jolokia metrics on port 7777.
21 | 
22 | UPDATE_SPARK_CONF_DIR  -- If this variable is set to a directory, the files in
23 |                           that directory will be copied to $SPARK_HOME/conf
24 |                           before the Spark process starts. This can be used
25 |                           to selectively overwrite Spark configuration files.
26 | 
27 |                           Note that if $SPARK_HOME/spark-defaults.conf
28 |                           does not contain any explicit settings for parameters
29 |                           matching the pattern 'spark.ui.reverseProxy*', default
30 |                           settings to turn reverse proxy on will be added to
31 |                           the configuration.
32 | EOF
33 | 
34 | else
35 | cat <<EOF
36 | This is an incomplete openshift-spark image. It contains scripts for running an
37 | Apache Spark master or worker but is missing an actual Spark distribution.
38 | 
39 | To produce a final image, a source-to-image build must be performed which takes
40 | a Spark distribution as input. This can be done in OpenShift or locally using
41 | the s2i tool if it's installed.
42 | 
43 | Build inputs:
44 | -------------
45 | 
46 | The OpenShift method can take either local files or a URL as build input.
47 | For the s2i method, local files are required. Here is an example which
48 | downloads an Apache Spark distribution to a local 'build-input' directory
49 | (including the sha512 file is optional).
50 | 
51 | $ mkdir build-input
52 | $ wget https://archive.apache.org/dist/spark/spark-2.4.0/spark-2.4.0-bin-hadoop2.7.tgz -O build-input/spark-2.4.0-bin-hadoop2.7.tgz
53 | $ wget https://archive.apache.org/dist/spark/spark-2.4.0/spark-2.4.0-bin-hadoop2.7.tgz.sha512 -O build-input/spark-2.4.0-bin-hadoop2.7.tgz.sha512
54 | 
55 | Optionally, your `build-input` directory may contain a modify-spark directory. The structure of this directory should be parallel to the structure
56 | of the top-level directory in the Spark distribution tarball. The contents of this directory will be copied to the Spark installation using rsync,
57 | allowing you to add or overwrite files. To add my.jar to Spark, for example, put it in build-input/modify-spark/jars/my.jar
58 | 
59 | Completing the image with OpenShift:
60 | ------------------------------------
61 | 
62 | Run a binary build specifying the spark distribution, for example:
63 | 
64 | $ oc new-build --name=openshift-spark --docker-image=radanalyticsio/openshift-spark-inc --binary
65 | $ oc start-build openshift-spark --from-file=https://archive.apache.org/dist/spark/spark-2.4.0/spark-2.4.0-bin-hadoop2.7.tgz
66 | 
67 | This will write the completed image to an imagestream called 'openshift-spark'
68 | in the current project. Note that the value of --from-file can also be a local
69 | directory (see Build Inputs above).
70 | 
71 | Completing the image with the s2i tool:
72 | ---------------------------------------
73 | 
74 | s2i build build-input radanalyticsio/openshift-spark-inc openshift-spark
75 | 
76 | This will build a local image named 'openshift-spark:latest' which can
77 | then be uploaded to an image repository.
78 | EOF
79 | fi
80 | 


--------------------------------------------------------------------------------
/modules/s2i/install:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | SCRIPT_DIR=$(dirname $0)
 6 | ADDED_DIR=${SCRIPT_DIR}/added
 7 | 
 8 | mkdir -p $STI_SCRIPTS_PATH
 9 | cp $ADDED_DIR/* $STI_SCRIPTS_PATH
10 | 


--------------------------------------------------------------------------------
/modules/s2i/module.yaml:
--------------------------------------------------------------------------------
 1 | version: 1.0
 2 | 
 3 | name: s2i
 4 | envs:
 5 |     - name: STI_SCRIPTS_PATH
 6 |       value: /usr/libexec/s2i
 7 |     - name: PATH
 8 |       value: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin
 9 |     - name: SPARK_HOME
10 |       value: /opt/spark
11 |     - name: SPARK_INSTALL
12 |       value: /opt/spark-distro
13 | packages:
14 |     install:
15 |         - wget
16 | execute:
17 |     - script: install
18 | 


--------------------------------------------------------------------------------
/modules/spark/added/spark-entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Licensed to the Apache Software Foundation (ASF) under one or more
 4 | # contributor license agreements.  See the NOTICE file distributed with
 5 | # this work for additional information regarding copyright ownership.
 6 | # The ASF licenses this file to You under the Apache License, Version 2.0
 7 | # (the "License"); you may not use this file except in compliance with
 8 | # the License.  You may obtain a copy of the License at
 9 | #
10 | #    http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | 
19 | # echo commands to the terminal output
20 | set -ex
21 | 
22 | # Check whether there is a passwd entry for the container UID
23 | myuid=$(id -u)
24 | mygid=$(id -g)
25 | set +e
26 | uidentry=$(getent passwd $myuid)
27 | set -e
28 | 
29 | # If there is no passwd entry for the container UID, attempt to create one
30 | if [ -z "$uidentry" ] ; then
31 |     if [ -w /etc/passwd ] ; then
32 |         echo "$myuid:x:$myuid:$mygid:anonymous uid:$SPARK_HOME:/bin/false" >> /etc/passwd
33 |     else
34 |         echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
35 |     fi
36 | fi
37 | 
38 | SPARK_K8S_CMD="$1"
39 | if [ -z "$SPARK_K8S_CMD" ]; then
40 |   echo "No command to execute has been provided." 1>&2
41 |   exit 1
42 | fi
43 | shift 1
44 | 
45 | SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*"
46 | env | grep SPARK_JAVA_OPT_ | sed 's/[^=]*=\(.*\)/\1/g' > /tmp/java_opts.txt
47 | readarray -t SPARK_JAVA_OPTS < /tmp/java_opts.txt
48 | if [ -n "$SPARK_MOUNTED_CLASSPATH" ]; then
49 |   SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_MOUNTED_CLASSPATH"
50 | fi
51 | if [ -n "$SPARK_MOUNTED_FILES_DIR" ]; then
52 |   cp -R "$SPARK_MOUNTED_FILES_DIR/." .
53 | fi
54 | 
55 | case "$SPARK_K8S_CMD" in
56 |   driver)
57 |     CMD=(
58 |       ${JAVA_HOME}/bin/java
59 |       "${SPARK_JAVA_OPTS[@]}"
60 |       -cp "$SPARK_CLASSPATH"
61 |       -Xms$SPARK_DRIVER_MEMORY
62 |       -Xmx$SPARK_DRIVER_MEMORY
63 |       -Dspark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS
64 |       $SPARK_DRIVER_CLASS
65 |       $SPARK_DRIVER_ARGS
66 |     )
67 |     ;;
68 | 
69 |   executor)
70 |     CMD=(
71 |       ${JAVA_HOME}/bin/java
72 |       "${SPARK_JAVA_OPTS[@]}"
73 |       -Xms$SPARK_EXECUTOR_MEMORY
74 |       -Xmx$SPARK_EXECUTOR_MEMORY
75 |       -cp "$SPARK_CLASSPATH"
76 |       org.apache.spark.executor.CoarseGrainedExecutorBackend
77 |       --driver-url $SPARK_DRIVER_URL
78 |       --executor-id $SPARK_EXECUTOR_ID
79 |       --cores $SPARK_EXECUTOR_CORES
80 |       --app-id $SPARK_APPLICATION_ID
81 |       --hostname $SPARK_EXECUTOR_POD_IP
82 |     )
83 |     ;;
84 | 
85 |   init)
86 |     CMD=(
87 |       "$SPARK_HOME/bin/spark-class"
88 |       "org.apache.spark.deploy.k8s.SparkPodInitContainer"
89 |       "$@"
90 |     )
91 |     ;;
92 | 
93 |   *)
94 |     echo "Unknown command: $SPARK_K8S_CMD" 1>&2
95 |     exit 1
96 | esac
97 | 
98 | exec "${CMD[@]}"
99 | 


--------------------------------------------------------------------------------
/modules/spark/check_for_download:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | echo "checking length of file $1"
3 | if ! [ -s "$1" ]; then
4 |     filename=$(basename $1)
5 |     version=$(echo $filename | cut -d '-' -f2)
6 |     wget https://archive.apache.org/dist/spark/spark-$version/$filename -O $1
7 | fi
8 | 


--------------------------------------------------------------------------------
/modules/spark/install:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | SCRIPT_DIR=$(dirname $0)
 4 | ADDED_DIR=${SCRIPT_DIR}/added
 5 | ARTIFACTS_DIR=/tmp/artifacts
 6 | 
 7 | # If there is a zero-length spark tarball, find the verison in the
 8 | # name and download from Apache
 9 | fullname=$(find $ARTIFACTS_DIR -name spark-[0-9.]*\.tgz)
10 | /bin/sh -x $SCRIPT_DIR/check_for_download $fullname
11 | 
12 | # Make a place for spark to go (dupe what's done in common in case we're standalone)
13 | if ! [ -d $SPARK_INSTALL ]; then
14 |     mkdir -p $SPARK_INSTALL/conf
15 |     ln -sfn $SPARK_INSTALL/distro $SPARK_HOME
16 | fi
17 | 
18 | pushd $SPARK_INSTALL
19 | cp $fullname .
20 | tar -zxf $(basename $fullname)
21 | ln -s $(basename $fullname .tgz) distro
22 | rm $(basename $fullname)
23 | popd
24 | 
25 | # Add in the configuration files (from the common module) if they don't already exist
26 | mv --no-clobber "$SPARK_INSTALL"/conf/* "$SPARK_HOME"/conf/
27 | 
28 | # Make everything under the spark directory accessible to the group
29 | chown 185:0 $SPARK_INSTALL/distro && chmod g+rwX $SPARK_INSTALL/distro
30 | 
31 | # Search for the spark entrypoint file and copy it to $SPARK_INSTALL
32 | entry=$(find $SPARK_HOME/kubernetes -name entrypoint.sh)
33 | if [ -n "$entry" ]; then
34 |     cp $entry $SPARK_INSTALL
35 | 
36 |     # We want to get rid of the tini invocation
37 |     sed -i "s@exec .*/tini -s --@exec@" $SPARK_INSTALL/entrypoint.sh
38 | fi
39 | 


--------------------------------------------------------------------------------
/modules/spark/module.yaml:
--------------------------------------------------------------------------------
 1 | version: 1.0
 2 | 
 3 | name: spark
 4 | envs:
 5 |     - name: PATH
 6 |       value: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin
 7 |     - name: SPARK_HOME
 8 |       value: /opt/spark
 9 |     - name: SPARK_INSTALL
10 |       value: /opt/spark-distro
11 | packages:
12 |     install:
13 |         - wget
14 | artifacts:
15 |     - url: https://archive.apache.org/dist/spark/spark-3.0.1/spark-3.0.1-bin-hadoop3.2.tgz
16 |       md5: 31e019e35e75a4c55c7efa4464641bf1
17 | execute:
18 |     - script: install
19 | 


--------------------------------------------------------------------------------
/openshift-spark-build-inc/Dockerfile:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 Red Hat
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | #
 15 | # ------------------------------------------------------------------------
 16 | #
 17 | # This is a Dockerfile for the radanalyticsio/openshift-spark-inc:3.0 image.
 18 | 
 19 | 
 20 | ## START target image radanalyticsio/openshift-spark-inc:3.0
 21 | ## \
 22 |     FROM centos:8
 23 | 
 24 |     USER root
 25 | 
 26 | ###### START module 'common:1.0'
 27 | ###### \
 28 |         # Copy 'common' module content
 29 |         COPY modules/common /tmp/scripts/common
 30 |         # Switch to 'root' user to install 'common' module defined packages
 31 |         USER root
 32 |         # Install packages defined in the 'common' module
 33 |         RUN yum --setopt=tsflags=nodocs install -y python36 \
 34 |             && rpm -q python36
 35 |         # Set 'common' module defined environment variables
 36 |         ENV \
 37 |             SPARK_INSTALL="/opt/spark-distro" 
 38 |         # Custom scripts from 'common' module
 39 |         USER root
 40 |         RUN [ "sh", "-x", "/tmp/scripts/common/install" ]
 41 | ###### /
 42 | ###### END module 'common:1.0'
 43 | 
 44 | ###### START module 'metrics:1.0'
 45 | ###### \
 46 |         # Copy 'metrics' module content
 47 |         COPY modules/metrics /tmp/scripts/metrics
 48 |         # Custom scripts from 'metrics' module
 49 |         USER root
 50 |         RUN [ "sh", "-x", "/tmp/scripts/metrics/install" ]
 51 | ###### /
 52 | ###### END module 'metrics:1.0'
 53 | 
 54 | ###### START module 's2i:1.0'
 55 | ###### \
 56 |         # Copy 's2i' module content
 57 |         COPY modules/s2i /tmp/scripts/s2i
 58 |         # Switch to 'root' user to install 's2i' module defined packages
 59 |         USER root
 60 |         # Install packages defined in the 's2i' module
 61 |         RUN yum --setopt=tsflags=nodocs install -y wget \
 62 |             && rpm -q wget
 63 |         # Set 's2i' module defined environment variables
 64 |         ENV \
 65 |             PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin" \
 66 |             SPARK_HOME="/opt/spark" \
 67 |             SPARK_INSTALL="/opt/spark-distro" \
 68 |             STI_SCRIPTS_PATH="/usr/libexec/s2i" 
 69 |         # Custom scripts from 's2i' module
 70 |         USER root
 71 |         RUN [ "sh", "-x", "/tmp/scripts/s2i/install" ]
 72 | ###### /
 73 | ###### END module 's2i:1.0'
 74 | 
 75 | ###### START image 'radanalyticsio/openshift-spark-inc:3.0'
 76 | ###### \
 77 |         # Switch to 'root' user to install 'radanalyticsio/openshift-spark-inc' image defined packages
 78 |         USER root
 79 |         # Install packages defined in the 'radanalyticsio/openshift-spark-inc' image
 80 |         RUN yum --setopt=tsflags=nodocs install -y java-11-openjdk rsync \
 81 |             && rpm -q java-11-openjdk rsync
 82 |         # Set 'radanalyticsio/openshift-spark-inc' image defined environment variables
 83 |         ENV \
 84 |             JBOSS_IMAGE_NAME="radanalyticsio/openshift-spark-inc" \
 85 |             JBOSS_IMAGE_VERSION="3.0" 
 86 |         # Set 'radanalyticsio/openshift-spark-inc' image defined labels
 87 |         LABEL \
 88 |             io.cekit.version="3.6.0"  \
 89 |             io.openshift.s2i.scripts-url="image:///usr/libexec/s2i"  \
 90 |             maintainer="Trevor McKay <tmckay@redhat.com>"  \
 91 |             name="radanalyticsio/openshift-spark-inc"  \
 92 |             version="3.0" 
 93 | ###### /
 94 | ###### END image 'radanalyticsio/openshift-spark-inc:3.0'
 95 | 
 96 | 
 97 |     # Switch to 'root' user and remove artifacts and modules
 98 |     USER root
 99 |     RUN [ ! -d /tmp/scripts ] || rm -rf /tmp/scripts
100 |     RUN [ ! -d /tmp/artifacts ] || rm -rf /tmp/artifacts
101 | 
102 |     # Clear package manager metadata
103 |     RUN yum clean all && [ ! -d /var/cache/yum ] || rm -rf /var/cache/yum
104 | 
105 |     # Define the user
106 |     USER 185
107 |     # Define entrypoint
108 |     ENTRYPOINT ["/entrypoint"]
109 |     # Define run cmd
110 |     CMD ["/usr/libexec/s2i/usage"]
111 | ## /
112 | ## END target image


--------------------------------------------------------------------------------
/openshift-spark-build-inc/modules/common/added/conf/agent-config.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | lowercaseOutputName: true
 3 | lowercaseOutputLabelNames: true
 4 | whitelistObjectNames:
 5 |   - 'metrics:*'
 6 | 
 7 | rules:
 8 | 
 9 |   # These come from the master
10 |   # Example: master.aliveWorkers
11 |   - pattern: "metrics<name=master\\.(.*)><>Value"
12 |     name: spark_master_$1
13 | 
14 |   # These come from the worker
15 |   # Example: worker.coresFree
16 |   - pattern: "metrics<name=worker\\.(.*)><>Value"
17 |     name: spark_worker_$1
18 | 
19 |   # These come from the application driver
20 |   # Example: app-20160809000059-0000.driver.DAGScheduler.stage.failedStages
21 |   - pattern: "metrics<name=(.*)\\.driver\\.(DAGScheduler|BlockManager)\\.(.*)><>Value"
22 |     name: spark_driver_$2_$3
23 |     labels:
24 |       app_id: "$1"
25 | 
26 |   # These come from the application driver if it's a streaming application
27 |   # Example: app-20160809000059-0000.driver.com.example.ClassName.StreamingMetrics.streaming.lastCompletedBatch_schedulingDelay
28 |   - pattern: "metrics<name=(.*)\\.driver\\.(.*)\\.StreamingMetrics\\.streaming\\.(.*)><>Value"
29 |     name: spark_streaming_driver_$3
30 |     labels:
31 |       app_id: "$1"
32 |       app_name: "$2"
33 | 
34 |   # These come from the application driver if it's a structured streaming application
35 |   # Example: app-20160809000059-0000.driver.spark.streaming.QueryName.inputRate-total
36 |   - pattern: "metrics<name=(.*)\\.driver\\.spark\\.streaming\\.(.*)\\.(.*)><>Value"
37 |     name: spark_structured_streaming_driver_$3
38 |     labels:
39 |       app_id: "$1"
40 |       query_name: "$2"
41 | 
42 |   # These come from the application executors
43 |   # Example: app-20160809000059-0000.0.executor.threadpool.activeTasks
44 |   - pattern: "metrics<name=(.*)\\.(.*)\\.executor\\.(.*)><>Value"
45 |     name: spark_executor_$3
46 |     labels:
47 |       app_id: "$1"
48 |       executor_id: "$2"
49 | 
50 |   # These come from the master
51 |   # Example: application.com.example.ClassName.1470700859054.cores
52 |   - pattern: "metrics<name=application\\.(.*)\\.([0-9]+)\\.(.*)><>Value"
53 |     name: spark_application_$3
54 |     labels:
55 |       app_name: "$1"
56 |       app_start_epoch: "$2"
57 | 


--------------------------------------------------------------------------------
/openshift-spark-build-inc/modules/common/added/conf/agent.properties:
--------------------------------------------------------------------------------
1 | jmx_exporter=7777:/opt/spark/conf/agent-config.yaml
2 | 


--------------------------------------------------------------------------------
/openshift-spark-build-inc/modules/common/added/conf/log4j.properties:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Set everything to be logged to the console
19 | log4j.rootCategory=INFO, console
20 | log4j.appender.console=org.apache.log4j.ConsoleAppender
21 | log4j.appender.console.target=System.err
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
24 | 
25 | # Set the default spark-shell log level to WARN. When running the spark-shell, the
26 | # log level for this class is used to overwrite the root logger's log level, so that
27 | # the user can have different defaults for the shell and regular Spark apps.
28 | log4j.logger.org.apache.spark.repl.Main=WARN
29 | 
30 | # Settings to quiet third party logs that are too verbose
31 | log4j.logger.org.spark_project.jetty=WARN
32 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
33 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
34 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
35 | log4j.logger.org.apache.parquet=ERROR
36 | log4j.logger.parquet=ERROR
37 | 
38 | # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
39 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
40 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
41 | 


--------------------------------------------------------------------------------
/openshift-spark-build-inc/modules/common/added/conf/metrics.properties:
--------------------------------------------------------------------------------
1 | *.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink
2 | master.source.jvm.class=org.apache.spark.metrics.source.JvmSource
3 | worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource
4 | driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource
5 | executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
6 | application.source.jvm.class=org.apache.spark.metrics.source.JvmSource
7 | 


--------------------------------------------------------------------------------
/openshift-spark-build-inc/modules/common/added/conf/spark-defaults.conf:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Default system properties included when running spark-submit.
19 | # This is useful for setting default environmental settings.
20 | 
21 | # Example:
22 | # spark.master                     spark://master:7077
23 | # spark.eventLog.enabled           true
24 | # spark.eventLog.dir               hdfs://namenode:8021/directory
25 | # spark.serializer                 org.apache.spark.serializer.KryoSerializer
26 | # spark.driver.memory              5g
27 | # spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
28 | spark.ui.reverseProxy              true
29 | spark.ui.reverseProxyUrl           /
30 | 


--------------------------------------------------------------------------------
/openshift-spark-build-inc/modules/common/added/scripts/entrypoint:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # If we get an s2i command and it's anything but "run" just do it
 4 | # Otherwise we'll turn it into a launch
 5 | if [[ -n "$STI_SCRIPTS_PATH" ]] && [[ $@ == *"$STI_SCRIPTS_PATH"* ]]; then
 6 |     if ! [[ $@ ==  *"$STI_SCRIPTS_PATH"/run* ]]; then
 7 |         exec "$@"
 8 | 	exit $?
 9 |     fi
10 |     CMD=/launch.sh
11 | 
12 | # allow just a simple "usage" command to print the usage script
13 | elif [[ -n "$STI_SCRIPTS_PATH" ]] && [[ $@ == "usage" ]]; then
14 |     exec $STI_SCRIPTS_PATH/usage
15 |     exit $?
16 | else
17 |     CMD=$@
18 | fi
19 | 
20 | trap handle_term TERM INT
21 | 
22 | function handle_term {
23 |     echo Received a termination signal
24 | 
25 |     local cnt
26 |     local killed=1
27 |     if [ -n "$PID" ]; then
28 |         echo "Stopping subprocess $PID"
29 |         kill -TERM $PID
30 |         for cnt in {1..10}
31 |         do
32 |             kill -0 $PID >/dev/null 2>&1
33 |             if [ "$?" -ne 0 ]; then
34 |                 killed=0
35 |                 break
36 |             else
37 |                 sleep 1
38 |             fi
39 |         done
40 |         if [ "$killed" -ne 0 ]; then
41 |             echo Process is still running 10 seconds after TERM, sending KILL
42 |             kill -9 $PID
43 |         fi
44 |         wait $PID
45 |         echo "Subprocess stopped"
46 |     fi
47 |     exit 0
48 | }
49 | 
50 | function patch_uid {
51 |     # Check whether there is a passwd entry for the container UID
52 |     myuid=$(id -u)
53 |     mygid=$(id -g)
54 |     uidentry=$(getent passwd $myuid)
55 | 
56 |     # If there is no passwd entry for the container UID, attempt to create one
57 |     if [ -z "$uidentry" ] ; then
58 |         if [ -w /etc/passwd ] ; then
59 |             echo "$myuid:x:$myuid:$mygid:anonymous uid:$SPARK_HOME:/bin/false" >> /etc/passwd
60 |         else
61 |             echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
62 |         fi
63 |     fi
64 | }
65 | 
66 | # If we receive a spark-on-kube command, hand it off to the
67 | # standard spark entrypoint
68 | case "$1" in
69 |     driver | executor)
70 |         $SPARK_INSTALL/entrypoint.sh $CMD &
71 |         ;;
72 |     *)
73 |         patch_uid
74 |         $CMD &
75 |         ;;
76 | esac
77 | PID=$!
78 | wait $PID
79 | 


--------------------------------------------------------------------------------
/openshift-spark-build-inc/modules/common/added/scripts/launch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | function check_reverse_proxy {
 4 |     grep -e "^spark\.ui\.reverseProxy" $SPARK_HOME/conf/spark-defaults.conf &> /dev/null
 5 |     if [ "$?" -ne 0 ]; then
 6 |         echo "Appending default reverse proxy config to spark-defaults.conf"
 7 |         echo "spark.ui.reverseProxy              true" >> $SPARK_HOME/conf/spark-defaults.conf
 8 |         echo "spark.ui.reverseProxyUrl           /" >> $SPARK_HOME/conf/spark-defaults.conf
 9 |     fi
10 | }
11 | 
12 | # If the UPDATE_SPARK_CONF_DIR dir is non-empty,
13 | # copy the contents to $SPARK_HOME/conf
14 | if [ -d "$UPDATE_SPARK_CONF_DIR" ]; then
15 |     sparkconfs=$(ls -1 $UPDATE_SPARK_CONF_DIR | wc -l)
16 |     if [ "$sparkconfs" -ne "0" ]; then
17 |         echo "Copying from $UPDATE_SPARK_CONF_DIR to $SPARK_HOME/conf"
18 |         ls -1 $UPDATE_SPARK_CONF_DIR
19 |         cp $UPDATE_SPARK_CONF_DIR/* $SPARK_HOME/conf
20 |     fi
21 | elif [ -n "$UPDATE_SPARK_CONF_DIR" ]; then
22 |     echo "Directory $UPDATE_SPARK_CONF_DIR does not exist, using default spark config"
23 | fi
24 | 
25 | check_reverse_proxy
26 | 
27 | if [ -z ${SPARK_METRICS_ON+_} ]; then
28 |     JAVA_AGENT=
29 |     metrics=""
30 | elif [ ${SPARK_METRICS_ON} == "prometheus" ]; then
31 |     JAVA_AGENT=" -javaagent:/opt/metrics/agent-bond.jar=$SPARK_HOME/conf/agent.properties"
32 |     metrics=" with prometheus metrics enabled"
33 | else
34 |     JAVA_AGENT=" -javaagent:/opt/metrics/jolokia-jvm-1.3.6-agent.jar=port=7777,host=0.0.0.0"
35 |     metrics=" with jolokia metrics enabled (deprecated, set SPARK_METRICS_ON to 'prometheus')"
36 | fi
37 | 
38 | if [ -z ${SPARK_MASTER_ADDRESS+_} ]; then
39 |     echo "Starting master$metrics"
40 |     exec $SPARK_HOME/bin/spark-class$JAVA_AGENT org.apache.spark.deploy.master.Master
41 | else
42 |     echo "Starting worker$metrics, will connect to: $SPARK_MASTER_ADDRESS"
43 | 
44 |     # spark://x.y.z:7077  -> x.y.z/7077
45 |     _MASTER_HOST_AND_PORT=$(echo $SPARK_MASTER_ADDRESS | sed -r 's;.*//(.*):(.*);\1/\2;g')
46 |     while true; do
47 |         echo "Waiting for spark master to be available ..."
48 |         timeout 1 sh -c "(</dev/tcp/$_MASTER_HOST_AND_PORT) &>/dev/null"
49 |         if [ $? -eq 0 ]; then
50 |             break
51 |         fi
52 |         sleep 1
53 |     done
54 |     exec $SPARK_HOME/bin/spark-class$JAVA_AGENT org.apache.spark.deploy.worker.Worker $SPARK_MASTER_ADDRESS
55 | fi
56 | 


--------------------------------------------------------------------------------
/openshift-spark-build-inc/modules/common/install:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | SCRIPT_DIR=$(dirname $0)
 6 | ADDED_DIR=${SCRIPT_DIR}/added
 7 | 
 8 | # Put entrypoint and launch.sh at the root
 9 | cp $ADDED_DIR/scripts/* /
10 | 
11 | # Set up a place for spark to go
12 | # We'll also stage our default spark config files here
13 | # so that when spark is installed they can be copied over
14 | # if the spark tarball itself does not include files of the same name
15 | # (ie, "copy if not overwrite")
16 | if ! [ -d $SPARK_INSTALL ]; then
17 |     mkdir -p $SPARK_INSTALL
18 |     mv $ADDED_DIR/conf $SPARK_INSTALL
19 |     chown -R 185:0 $SPARK_INSTALL && chmod -R g+rwX $SPARK_INSTALL
20 |     ln -sfn $SPARK_INSTALL/distro /opt/spark
21 | fi
22 | 
23 | # Change the permissions on /etc/passwd so that anonymous user
24 | # can be added to satisfy Spark
25 | chgrp root /etc/passwd && chmod g+rw /etc/passwd
26 | 
27 | # Make Python3 the default. This is important because it seems
28 | # that Spark still wants to invoke python scripts with "python"
29 | # in the executors, as opposed to "python3"
30 | alternatives --set python /usr/bin/python3
31 | 


--------------------------------------------------------------------------------
/openshift-spark-build-inc/modules/common/module.yaml:
--------------------------------------------------------------------------------
 1 | artifacts: []
 2 | envs:
 3 | - name: SPARK_INSTALL
 4 |   value: /opt/spark-distro
 5 | execute:
 6 | - directory: common
 7 |   module_name: common
 8 |   name: common/install
 9 |   script: install
10 | labels: []
11 | modules:
12 |   install: []
13 |   repositories: []
14 | name: common
15 | osbs:
16 |   configuration: {}
17 |   repository: {}
18 | packages:
19 |   install:
20 |   - python36
21 |   repositories: []
22 | ports: []
23 | version: 1.0
24 | volumes: []
25 | 


--------------------------------------------------------------------------------
/openshift-spark-build-inc/modules/metrics/added/agent-bond.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radanalyticsio/openshift-spark/f360e822b4c4859e717791cf39cb9682dc242c5f/openshift-spark-build-inc/modules/metrics/added/agent-bond.jar


--------------------------------------------------------------------------------
/openshift-spark-build-inc/modules/metrics/added/jolokia-jvm-1.3.6-agent.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radanalyticsio/openshift-spark/f360e822b4c4859e717791cf39cb9682dc242c5f/openshift-spark-build-inc/modules/metrics/added/jolokia-jvm-1.3.6-agent.jar


--------------------------------------------------------------------------------
/openshift-spark-build-inc/modules/metrics/install:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -u
 3 | set -e
 4 | 
 5 | SCRIPT_DIR=$(dirname $0)
 6 | ADDED_DIR=${SCRIPT_DIR}/added
 7 | mkdir -p /opt/metrics
 8 | mv $ADDED_DIR/*.jar /opt/metrics
 9 | 
10 | chown -R 185:0 /opt/metrics && chmod g+rwX /opt/metrics
11 | 


--------------------------------------------------------------------------------
/openshift-spark-build-inc/modules/metrics/module.yaml:
--------------------------------------------------------------------------------
 1 | artifacts: []
 2 | envs: []
 3 | execute:
 4 | - directory: metrics
 5 |   module_name: metrics
 6 |   name: metrics/install
 7 |   script: install
 8 | labels: []
 9 | modules:
10 |   install: []
11 |   repositories: []
12 | name: metrics
13 | osbs:
14 |   configuration: {}
15 |   repository: {}
16 | packages:
17 |   install: []
18 |   repositories: []
19 | ports: []
20 | version: 1.0
21 | volumes: []
22 | 


--------------------------------------------------------------------------------
/openshift-spark-build-inc/modules/s2i/added/assemble:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | source $STI_SCRIPTS_PATH/s2i-env-vars
  4 | 
  5 | # Just a word about the directory structure
  6 | # SPARK_HOME == /opt/spark
  7 | # SPARK_INSTALL == /opt/spark-distro
  8 | 
  9 | # Extra things like default configuration files and additional
 10 | # boot scripts may be stored in SPARK_INSTALL
 11 | 
 12 | # At runtime, /opt/spark is a symlink to /opt/spark-distro/distro
 13 | # but /opt/spark-distro/distro does not actually exist yet
 14 | 
 15 | # The Spark tarball will be expanded in /opt/spark-distro using
 16 | # it's original name, for example /opt/spark-distro/spark-2.3.0-bin-hadoop2.7,
 17 | # as a dev aid to tracking and version checking
 18 | 
 19 | # Ultimately, /opt/spark-distro/distro is created as a symlink to the Spark root
 20 | # directory. This double-hop from /opt/spark to the Spark root through symlinks
 21 | # allows the Spark installation to be staged in the base image but completed in
 22 | # the S2I build without expanding permissions
 23 | 
 24 | function match_sum {
 25 |     local sumfile=$1
 26 |     local delim=$2
 27 |     local sha512=$3
 28 |     local initial=$(cat $sumfile | tr -d [:space:] | cut -d"$delim" -f1 | tr [:upper:] [:lower:])
 29 |     local rest=$(cat $sumfile | tr -d [:space:] | cut -d"$delim" --complement -f1 | tr [:upper:] [:lower:])
 30 |     if [ "$sha512" == "$initial" ] || [ "$sha512" == "$rest" ]; then
 31 |         return 0
 32 |     fi
 33 |     return 1
 34 | }
 35 | 
 36 | if [ -f $SPARK_HOME/bin/spark-submit ]; then
 37 |     echo "Spark is installed, nothing to do"
 38 |     exit 1
 39 | else
 40 |     echo "Attempting to install Spark"
 41 |     # If a url has been specfified for spark use it
 42 |     if [ -n "$SPARK_URL" ]; then
 43 |         echo Downloading $SPARK_URL
 44 |         wget $SPARK_URL -P $S2I_SOURCE_DIR
 45 |     fi
 46 |     if [ -n "$SPARK_SHA512_URL" ]; then
 47 |         echo Downloading $SPARK_SHA512_URL
 48 |         wget $SPARK_SHA512_URL -P $S2I_SOURCE_DIR
 49 |     fi
 50 | 
 51 |     for spark in $(ls "$S2I_SOURCE_DIR"); do
 52 | 
 53 |         spark=$S2I_SOURCE_DIR/$spark
 54 |         echo Found $spark
 55 |         echo Checking for valid Spark archive
 56 | 
 57 |         # Is the file a directory? If it contains spark-submit, move it
 58 |         if [ -d "$spark" ]; then
 59 |             if ! [ -f $spark/bin/spark-submit ]; then
 60 |                 echo Ignoring directory $spark, no spark-submit
 61 |                 continue
 62 |             fi
 63 |             echo Installing from directory $spark
 64 |             sparkdir=$SPARK_INSTALL/$(basename $spark)
 65 |             mv $spark $SPARK_INSTALL
 66 |         else
 67 |             # If we can get the table of contents, it's a tar archive, otherwise ignore
 68 |             tar -tf $spark &> /dev/null
 69 |             if [ "$?" -ne 0 ]; then
 70 |                 echo Ignoring $spark, not a tar archive
 71 |                 continue
 72 |             fi
 73 |             echo Validating tar archive $spark
 74 | 
 75 |             # Does the tarball contain a spark-submit?
 76 |             name=$(tar -tzf $spark | grep -e "^[^/]*/bin/spark-submit$")
 77 |             if [ "$?" -ne 0 ]; then
 78 |                 echo Ignoring tarball $spark, no spark-submit
 79 |                 continue
 80 |             else
 81 |                 echo Found valid tar archive, matching checksums
 82 |                 # See if we have an sha512 file to match against
 83 |                 if [ -f "$spark".sha512 ]; then
 84 |                     calcvalue=$(sha512sum "$spark" | cut -d\  -f1)
 85 |                     # split the sha512 file using a colon
 86 |                     match_sum "$spark".sha512 \:  $calcvalue
 87 |                     matched="$?"
 88 |                     if [ "$matched" -ne 0 ]; then
 89 |                         # split the sha512 file using equals sign in case it's BSD
 90 |                         match_sum "$spark".sha512 \=  $calcvalue
 91 |                         matched="$?"
 92 |                     fi
 93 |                     if [ "$matched" -ne 0 ]; then
 94 |                         echo Ignoring tarball $spark, sha512sum did not match
 95 |                         continue
 96 |                     fi
 97 |                 fi
 98 | 
 99 |                 # dname will be the intial directory from the path of spark-submit
100 |                 # we found in the tarball, ie the dir created by tar
101 |                 echo Installing from tarball $spark
102 |                 dname=$(dirname $name | cut -d/ -f 1)
103 |                 sparkdir=$SPARK_INSTALL/$dname
104 |                 tar -xzf $spark -C $SPARK_INSTALL
105 |             fi
106 |         fi
107 | 
108 |         ln -s $sparkdir $SPARK_INSTALL/distro
109 | 
110 |         # Search for the spark entrypoint file and copy it to $SPARK_INSTALL
111 |         entry=$(find $sparkdir/kubernetes -name entrypoint.sh)
112 |         if [ -n "$entry" ]; then
113 |             echo Installing spark native entrypoint for use with spark-on-k8s commands
114 |             cp $entry $SPARK_INSTALL
115 | 
116 |             # We want to get rid of the tini invocation
117 |             sed -i "s@exec .*/tini -s --@exec@" $SPARK_INSTALL/entrypoint.sh
118 |         else
119 |             echo No spark native entrypoint found for use with spark-on-k8s commands
120 |         fi
121 | 
122 | 	# Include the default spark configuration files
123 |         mv --no-clobber "$SPARK_INSTALL"/conf/* "$SPARK_HOME"/conf/
124 | 
125 | 	# If someone included mods in a parallel directory, install them with rsync
126 |         # Don't try to preserve permisions, owner, or group because we don't have
127 |         # any control over how s2i uploaded the files, so there's no use preserving.
128 |         if [ -x /usr/bin/rsync ] && [ -d "$S2I_SOURCE_DIR/modify-spark" ]; then
129 | 	    echo Found a modify-spark directory, running rsync to install changes
130 | 	    rsync -vrltD "$S2I_SOURCE_DIR/modify-spark/" $SPARK_HOME
131 |         fi
132 | 
133 |         # Spark workers need to write to the spark directory to track apps
134 |         chmod -R g+rwX $sparkdir
135 | 
136 |         # Can we run spark-submit?
137 |         $SPARK_HOME/bin/spark-submit --version
138 |         if [ "$?" -eq 0 ]; then
139 |             echo Spark installed successfully
140 |             exit 0
141 | 	else
142 |             echo Cannot run spark-submit, Spark install failed
143 |         fi
144 | 
145 |         # Just in case there is more than one tarball, clean up
146 |         rm -rf $sparkdir
147 |     done
148 | 
149 |     echo no valid Spark distribution found
150 | 
151 |     if [ -n "$DEBUG_ASSEMBLE" ]; then
152 |         echo Looping forever so you can \'oc rsh\'
153 |         while true; do
154 |             sleep 5
155 |         done
156 |     fi
157 |     exit 1
158 | fi
159 | 


--------------------------------------------------------------------------------
/openshift-spark-build-inc/modules/s2i/added/s2i-env-vars:
--------------------------------------------------------------------------------
1 | # Local vars setup with defaults
2 | S2I_DESTINATION=${S2I_DESTINATION:-/tmp}
3 | S2I_SOURCE_DIR="${S2I_DESTINATION}/src"
4 | 


--------------------------------------------------------------------------------
/openshift-spark-build-inc/modules/s2i/added/usage:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | if [ -f "$SPARK_HOME"/bin/spark-submit ]; then
 3 | cat <<EOF
 4 | This openshift-spark image will run an Apache Spark master or worker process.
 5 | There are several environment variables which affect its operation:
 6 | 
 7 | SPARK_MASTER_ADDRESS   -- If this variable is unset, a Spark standalone master
 8 |                           process will run.
 9 | 
10 |                           To run a Spark worker process, set this to the URL of
11 |                           the Spark master, for example spark://mymaster:7077
12 | 
13 | SPARK_METRICS_ON       -- If this variable is unset no metrics sink will be
14 |                           configured.
15 | 
16 |                           If it's set to 'prometheus' then the Spark process
17 |                           will be started with prometheus metrics on port 7777.
18 | 
19 |                           If it's set to anything else then the Spark process
20 |                           will be started with jolokia metrics on port 7777.
21 | 
22 | UPDATE_SPARK_CONF_DIR  -- If this variable is set to a directory, the files in
23 |                           that directory will be copied to $SPARK_HOME/conf
24 |                           before the Spark process starts. This can be used
25 |                           to selectively overwrite Spark configuration files.
26 | 
27 |                           Note that if $SPARK_HOME/spark-defaults.conf
28 |                           does not contain any explicit settings for parameters
29 |                           matching the pattern 'spark.ui.reverseProxy*', default
30 |                           settings to turn reverse proxy on will be added to
31 |                           the configuration.
32 | EOF
33 | 
34 | else
35 | cat <<EOF
36 | This is an incomplete openshift-spark image. It contains scripts for running an
37 | Apache Spark master or worker but is missing an actual Spark distribution.
38 | 
39 | To produce a final image, a source-to-image build must be performed which takes
40 | a Spark distribution as input. This can be done in OpenShift or locally using
41 | the s2i tool if it's installed.
42 | 
43 | Build inputs:
44 | -------------
45 | 
46 | The OpenShift method can take either local files or a URL as build input.
47 | For the s2i method, local files are required. Here is an example which
48 | downloads an Apache Spark distribution to a local 'build-input' directory
49 | (including the sha512 file is optional).
50 | 
51 | $ mkdir build-input
52 | $ wget https://archive.apache.org/dist/spark/spark-2.4.0/spark-2.4.0-bin-hadoop2.7.tgz -O build-input/spark-2.4.0-bin-hadoop2.7.tgz
53 | $ wget https://archive.apache.org/dist/spark/spark-2.4.0/spark-2.4.0-bin-hadoop2.7.tgz.sha512 -O build-input/spark-2.4.0-bin-hadoop2.7.tgz.sha512
54 | 
55 | Optionally, your `build-input` directory may contain a modify-spark directory. The structure of this directory should be parallel to the structure
56 | of the top-level directory in the Spark distribution tarball. The contents of this directory will be copied to the Spark installation using rsync,
57 | allowing you to add or overwrite files. To add my.jar to Spark, for example, put it in build-input/modify-spark/jars/my.jar
58 | 
59 | Completing the image with OpenShift:
60 | ------------------------------------
61 | 
62 | Run a binary build specifying the spark distribution, for example:
63 | 
64 | $ oc new-build --name=openshift-spark --docker-image=radanalyticsio/openshift-spark-inc --binary
65 | $ oc start-build openshift-spark --from-file=https://archive.apache.org/dist/spark/spark-2.4.0/spark-2.4.0-bin-hadoop2.7.tgz
66 | 
67 | This will write the completed image to an imagestream called 'openshift-spark'
68 | in the current project. Note that the value of --from-file can also be a local
69 | directory (see Build Inputs above).
70 | 
71 | Completing the image with the s2i tool:
72 | ---------------------------------------
73 | 
74 | s2i build build-input radanalyticsio/openshift-spark-inc openshift-spark
75 | 
76 | This will build a local image named 'openshift-spark:latest' which can
77 | then be uploaded to an image repository.
78 | EOF
79 | fi
80 | 


--------------------------------------------------------------------------------
/openshift-spark-build-inc/modules/s2i/install:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | SCRIPT_DIR=$(dirname $0)
 6 | ADDED_DIR=${SCRIPT_DIR}/added
 7 | 
 8 | mkdir -p $STI_SCRIPTS_PATH
 9 | cp $ADDED_DIR/* $STI_SCRIPTS_PATH
10 | 


--------------------------------------------------------------------------------
/openshift-spark-build-inc/modules/s2i/module.yaml:
--------------------------------------------------------------------------------
 1 | artifacts: []
 2 | envs:
 3 | - name: STI_SCRIPTS_PATH
 4 |   value: /usr/libexec/s2i
 5 | - name: PATH
 6 |   value: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin
 7 | - name: SPARK_HOME
 8 |   value: /opt/spark
 9 | - name: SPARK_INSTALL
10 |   value: /opt/spark-distro
11 | execute:
12 | - directory: s2i
13 |   module_name: s2i
14 |   name: s2i/install
15 |   script: install
16 | labels: []
17 | modules:
18 |   install: []
19 |   repositories: []
20 | name: s2i
21 | osbs:
22 |   configuration: {}
23 |   repository: {}
24 | packages:
25 |   install:
26 |   - wget
27 |   repositories: []
28 | ports: []
29 | version: 1.0
30 | volumes: []
31 | 


--------------------------------------------------------------------------------
/openshift-spark-build/Dockerfile:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 Red Hat
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | #
 15 | # ------------------------------------------------------------------------
 16 | #
 17 | # This is a Dockerfile for the radanalyticsio/openshift-spark:3.0 image.
 18 | 
 19 | 
 20 | ## START target image radanalyticsio/openshift-spark:3.0
 21 | ## \
 22 |     FROM centos:8
 23 | 
 24 |     USER root
 25 | 
 26 | ###### START module 'common:1.0'
 27 | ###### \
 28 |         # Copy 'common' module content
 29 |         COPY modules/common /tmp/scripts/common
 30 |         # Switch to 'root' user to install 'common' module defined packages
 31 |         USER root
 32 |         # Install packages defined in the 'common' module
 33 |         RUN yum --setopt=tsflags=nodocs install -y python36 \
 34 |             && rpm -q python36
 35 |         # Set 'common' module defined environment variables
 36 |         ENV \
 37 |             SPARK_INSTALL="/opt/spark-distro" 
 38 |         # Custom scripts from 'common' module
 39 |         USER root
 40 |         RUN [ "sh", "-x", "/tmp/scripts/common/install" ]
 41 | ###### /
 42 | ###### END module 'common:1.0'
 43 | 
 44 | ###### START module 'metrics:1.0'
 45 | ###### \
 46 |         # Copy 'metrics' module content
 47 |         COPY modules/metrics /tmp/scripts/metrics
 48 |         # Custom scripts from 'metrics' module
 49 |         USER root
 50 |         RUN [ "sh", "-x", "/tmp/scripts/metrics/install" ]
 51 | ###### /
 52 | ###### END module 'metrics:1.0'
 53 | 
 54 | ###### START module 'spark:1.0'
 55 | ###### \
 56 |         # Copy 'spark' module general artifacts
 57 |         COPY \
 58 |             spark-3.0.1-bin-hadoop3.2.tgz \
 59 |             /tmp/artifacts/
 60 |         # Copy 'spark' module content
 61 |         COPY modules/spark /tmp/scripts/spark
 62 |         # Switch to 'root' user to install 'spark' module defined packages
 63 |         USER root
 64 |         # Install packages defined in the 'spark' module
 65 |         RUN yum --setopt=tsflags=nodocs install -y wget \
 66 |             && rpm -q wget
 67 |         # Set 'spark' module defined environment variables
 68 |         ENV \
 69 |             PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin" \
 70 |             SPARK_HOME="/opt/spark" \
 71 |             SPARK_INSTALL="/opt/spark-distro" 
 72 |         # Custom scripts from 'spark' module
 73 |         USER root
 74 |         RUN [ "sh", "-x", "/tmp/scripts/spark/install" ]
 75 | ###### /
 76 | ###### END module 'spark:1.0'
 77 | 
 78 | ###### START module 's2i:1.0'
 79 | ###### \
 80 |         # Copy 's2i' module content
 81 |         COPY modules/s2i /tmp/scripts/s2i
 82 |         # Switch to 'root' user to install 's2i' module defined packages
 83 |         USER root
 84 |         # Install packages defined in the 's2i' module
 85 |         RUN yum --setopt=tsflags=nodocs install -y wget \
 86 |             && rpm -q wget
 87 |         # Set 's2i' module defined environment variables
 88 |         ENV \
 89 |             PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin" \
 90 |             SPARK_HOME="/opt/spark" \
 91 |             SPARK_INSTALL="/opt/spark-distro" \
 92 |             STI_SCRIPTS_PATH="/usr/libexec/s2i" 
 93 |         # Custom scripts from 's2i' module
 94 |         USER root
 95 |         RUN [ "sh", "-x", "/tmp/scripts/s2i/install" ]
 96 | ###### /
 97 | ###### END module 's2i:1.0'
 98 | 
 99 | ###### START image 'radanalyticsio/openshift-spark:3.0'
100 | ###### \
101 |         # Switch to 'root' user to install 'radanalyticsio/openshift-spark' image defined packages
102 |         USER root
103 |         # Install packages defined in the 'radanalyticsio/openshift-spark' image
104 |         RUN yum --setopt=tsflags=nodocs install -y java-11-openjdk python3-numpy \
105 |             && rpm -q java-11-openjdk python3-numpy
106 |         # Set 'radanalyticsio/openshift-spark' image defined environment variables
107 |         ENV \
108 |             JBOSS_IMAGE_NAME="radanalyticsio/openshift-spark" \
109 |             JBOSS_IMAGE_VERSION="3.0" 
110 |         # Set 'radanalyticsio/openshift-spark' image defined labels
111 |         LABEL \
112 |             io.cekit.version="3.6.0"  \
113 |             io.openshift.s2i.scripts-url="image:///usr/libexec/s2i"  \
114 |             maintainer="Trevor McKay <tmckay@redhat.com>"  \
115 |             name="radanalyticsio/openshift-spark"  \
116 |             sparkversion="3.0.1"  \
117 |             version="3.0" 
118 | ###### /
119 | ###### END image 'radanalyticsio/openshift-spark:3.0'
120 | 
121 | 
122 |     # Switch to 'root' user and remove artifacts and modules
123 |     USER root
124 |     RUN [ ! -d /tmp/scripts ] || rm -rf /tmp/scripts
125 |     RUN [ ! -d /tmp/artifacts ] || rm -rf /tmp/artifacts
126 | 
127 |     # Clear package manager metadata
128 |     RUN yum clean all && [ ! -d /var/cache/yum ] || rm -rf /var/cache/yum
129 | 
130 |     # Define the user
131 |     USER 185
132 |     # Define the working directory
133 |     WORKDIR /tmp
134 |     # Define entrypoint
135 |     ENTRYPOINT ["/entrypoint"]
136 |     # Define run cmd
137 |     CMD ["/launch.sh"]
138 | ## /
139 | ## END target image


--------------------------------------------------------------------------------
/openshift-spark-build/modules/common/added/conf/agent-config.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | lowercaseOutputName: true
 3 | lowercaseOutputLabelNames: true
 4 | whitelistObjectNames:
 5 |   - 'metrics:*'
 6 | 
 7 | rules:
 8 | 
 9 |   # These come from the master
10 |   # Example: master.aliveWorkers
11 |   - pattern: "metrics<name=master\\.(.*)><>Value"
12 |     name: spark_master_$1
13 | 
14 |   # These come from the worker
15 |   # Example: worker.coresFree
16 |   - pattern: "metrics<name=worker\\.(.*)><>Value"
17 |     name: spark_worker_$1
18 | 
19 |   # These come from the application driver
20 |   # Example: app-20160809000059-0000.driver.DAGScheduler.stage.failedStages
21 |   - pattern: "metrics<name=(.*)\\.driver\\.(DAGScheduler|BlockManager)\\.(.*)><>Value"
22 |     name: spark_driver_$2_$3
23 |     labels:
24 |       app_id: "$1"
25 | 
26 |   # These come from the application driver if it's a streaming application
27 |   # Example: app-20160809000059-0000.driver.com.example.ClassName.StreamingMetrics.streaming.lastCompletedBatch_schedulingDelay
28 |   - pattern: "metrics<name=(.*)\\.driver\\.(.*)\\.StreamingMetrics\\.streaming\\.(.*)><>Value"
29 |     name: spark_streaming_driver_$3
30 |     labels:
31 |       app_id: "$1"
32 |       app_name: "$2"
33 | 
34 |   # These come from the application driver if it's a structured streaming application
35 |   # Example: app-20160809000059-0000.driver.spark.streaming.QueryName.inputRate-total
36 |   - pattern: "metrics<name=(.*)\\.driver\\.spark\\.streaming\\.(.*)\\.(.*)><>Value"
37 |     name: spark_structured_streaming_driver_$3
38 |     labels:
39 |       app_id: "$1"
40 |       query_name: "$2"
41 | 
42 |   # These come from the application executors
43 |   # Example: app-20160809000059-0000.0.executor.threadpool.activeTasks
44 |   - pattern: "metrics<name=(.*)\\.(.*)\\.executor\\.(.*)><>Value"
45 |     name: spark_executor_$3
46 |     labels:
47 |       app_id: "$1"
48 |       executor_id: "$2"
49 | 
50 |   # These come from the master
51 |   # Example: application.com.example.ClassName.1470700859054.cores
52 |   - pattern: "metrics<name=application\\.(.*)\\.([0-9]+)\\.(.*)><>Value"
53 |     name: spark_application_$3
54 |     labels:
55 |       app_name: "$1"
56 |       app_start_epoch: "$2"
57 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/common/added/conf/agent.properties:
--------------------------------------------------------------------------------
1 | jmx_exporter=7777:/opt/spark/conf/agent-config.yaml
2 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/common/added/conf/log4j.properties:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Set everything to be logged to the console
19 | log4j.rootCategory=INFO, console
20 | log4j.appender.console=org.apache.log4j.ConsoleAppender
21 | log4j.appender.console.target=System.err
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
24 | 
25 | # Set the default spark-shell log level to WARN. When running the spark-shell, the
26 | # log level for this class is used to overwrite the root logger's log level, so that
27 | # the user can have different defaults for the shell and regular Spark apps.
28 | log4j.logger.org.apache.spark.repl.Main=WARN
29 | 
30 | # Settings to quiet third party logs that are too verbose
31 | log4j.logger.org.spark_project.jetty=WARN
32 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
33 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
34 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
35 | log4j.logger.org.apache.parquet=ERROR
36 | log4j.logger.parquet=ERROR
37 | 
38 | # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
39 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
40 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
41 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/common/added/conf/metrics.properties:
--------------------------------------------------------------------------------
1 | *.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink
2 | master.source.jvm.class=org.apache.spark.metrics.source.JvmSource
3 | worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource
4 | driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource
5 | executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
6 | application.source.jvm.class=org.apache.spark.metrics.source.JvmSource
7 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/common/added/conf/spark-defaults.conf:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Default system properties included when running spark-submit.
19 | # This is useful for setting default environmental settings.
20 | 
21 | # Example:
22 | # spark.master                     spark://master:7077
23 | # spark.eventLog.enabled           true
24 | # spark.eventLog.dir               hdfs://namenode:8021/directory
25 | # spark.serializer                 org.apache.spark.serializer.KryoSerializer
26 | # spark.driver.memory              5g
27 | # spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
28 | spark.ui.reverseProxy              true
29 | spark.ui.reverseProxyUrl           /
30 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/common/added/scripts/entrypoint:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # If we get an s2i command and it's anything but "run" just do it
 4 | # Otherwise we'll turn it into a launch
 5 | if [[ -n "$STI_SCRIPTS_PATH" ]] && [[ $@ == *"$STI_SCRIPTS_PATH"* ]]; then
 6 |     if ! [[ $@ ==  *"$STI_SCRIPTS_PATH"/run* ]]; then
 7 |         exec "$@"
 8 | 	exit $?
 9 |     fi
10 |     CMD=/launch.sh
11 | 
12 | # allow just a simple "usage" command to print the usage script
13 | elif [[ -n "$STI_SCRIPTS_PATH" ]] && [[ $@ == "usage" ]]; then
14 |     exec $STI_SCRIPTS_PATH/usage
15 |     exit $?
16 | else
17 |     CMD=$@
18 | fi
19 | 
20 | trap handle_term TERM INT
21 | 
22 | function handle_term {
23 |     echo Received a termination signal
24 | 
25 |     local cnt
26 |     local killed=1
27 |     if [ -n "$PID" ]; then
28 |         echo "Stopping subprocess $PID"
29 |         kill -TERM $PID
30 |         for cnt in {1..10}
31 |         do
32 |             kill -0 $PID >/dev/null 2>&1
33 |             if [ "$?" -ne 0 ]; then
34 |                 killed=0
35 |                 break
36 |             else
37 |                 sleep 1
38 |             fi
39 |         done
40 |         if [ "$killed" -ne 0 ]; then
41 |             echo Process is still running 10 seconds after TERM, sending KILL
42 |             kill -9 $PID
43 |         fi
44 |         wait $PID
45 |         echo "Subprocess stopped"
46 |     fi
47 |     exit 0
48 | }
49 | 
50 | function patch_uid {
51 |     # Check whether there is a passwd entry for the container UID
52 |     myuid=$(id -u)
53 |     mygid=$(id -g)
54 |     uidentry=$(getent passwd $myuid)
55 | 
56 |     # If there is no passwd entry for the container UID, attempt to create one
57 |     if [ -z "$uidentry" ] ; then
58 |         if [ -w /etc/passwd ] ; then
59 |             echo "$myuid:x:$myuid:$mygid:anonymous uid:$SPARK_HOME:/bin/false" >> /etc/passwd
60 |         else
61 |             echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
62 |         fi
63 |     fi
64 | }
65 | 
66 | # If we receive a spark-on-kube command, hand it off to the
67 | # standard spark entrypoint
68 | case "$1" in
69 |     driver | executor)
70 |         $SPARK_INSTALL/entrypoint.sh $CMD &
71 |         ;;
72 |     *)
73 |         patch_uid
74 |         $CMD &
75 |         ;;
76 | esac
77 | PID=$!
78 | wait $PID
79 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/common/added/scripts/launch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | function check_reverse_proxy {
 4 |     grep -e "^spark\.ui\.reverseProxy" $SPARK_HOME/conf/spark-defaults.conf &> /dev/null
 5 |     if [ "$?" -ne 0 ]; then
 6 |         echo "Appending default reverse proxy config to spark-defaults.conf"
 7 |         echo "spark.ui.reverseProxy              true" >> $SPARK_HOME/conf/spark-defaults.conf
 8 |         echo "spark.ui.reverseProxyUrl           /" >> $SPARK_HOME/conf/spark-defaults.conf
 9 |     fi
10 | }
11 | 
12 | # If the UPDATE_SPARK_CONF_DIR dir is non-empty,
13 | # copy the contents to $SPARK_HOME/conf
14 | if [ -d "$UPDATE_SPARK_CONF_DIR" ]; then
15 |     sparkconfs=$(ls -1 $UPDATE_SPARK_CONF_DIR | wc -l)
16 |     if [ "$sparkconfs" -ne "0" ]; then
17 |         echo "Copying from $UPDATE_SPARK_CONF_DIR to $SPARK_HOME/conf"
18 |         ls -1 $UPDATE_SPARK_CONF_DIR
19 |         cp $UPDATE_SPARK_CONF_DIR/* $SPARK_HOME/conf
20 |     fi
21 | elif [ -n "$UPDATE_SPARK_CONF_DIR" ]; then
22 |     echo "Directory $UPDATE_SPARK_CONF_DIR does not exist, using default spark config"
23 | fi
24 | 
25 | check_reverse_proxy
26 | 
27 | if [ -z ${SPARK_METRICS_ON+_} ]; then
28 |     JAVA_AGENT=
29 |     metrics=""
30 | elif [ ${SPARK_METRICS_ON} == "prometheus" ]; then
31 |     JAVA_AGENT=" -javaagent:/opt/metrics/agent-bond.jar=$SPARK_HOME/conf/agent.properties"
32 |     metrics=" with prometheus metrics enabled"
33 | else
34 |     JAVA_AGENT=" -javaagent:/opt/metrics/jolokia-jvm-1.3.6-agent.jar=port=7777,host=0.0.0.0"
35 |     metrics=" with jolokia metrics enabled (deprecated, set SPARK_METRICS_ON to 'prometheus')"
36 | fi
37 | 
38 | if [ -z ${SPARK_MASTER_ADDRESS+_} ]; then
39 |     echo "Starting master$metrics"
40 |     exec $SPARK_HOME/bin/spark-class$JAVA_AGENT org.apache.spark.deploy.master.Master
41 | else
42 |     echo "Starting worker$metrics, will connect to: $SPARK_MASTER_ADDRESS"
43 | 
44 |     # spark://x.y.z:7077  -> x.y.z/7077
45 |     _MASTER_HOST_AND_PORT=$(echo $SPARK_MASTER_ADDRESS | sed -r 's;.*//(.*):(.*);\1/\2;g')
46 |     while true; do
47 |         echo "Waiting for spark master to be available ..."
48 |         timeout 1 sh -c "(</dev/tcp/$_MASTER_HOST_AND_PORT) &>/dev/null"
49 |         if [ $? -eq 0 ]; then
50 |             break
51 |         fi
52 |         sleep 1
53 |     done
54 |     exec $SPARK_HOME/bin/spark-class$JAVA_AGENT org.apache.spark.deploy.worker.Worker $SPARK_MASTER_ADDRESS
55 | fi
56 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/common/install:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | SCRIPT_DIR=$(dirname $0)
 6 | ADDED_DIR=${SCRIPT_DIR}/added
 7 | 
 8 | # Put entrypoint and launch.sh at the root
 9 | cp $ADDED_DIR/scripts/* /
10 | 
11 | # Set up a place for spark to go
12 | # We'll also stage our default spark config files here
13 | # so that when spark is installed they can be copied over
14 | # if the spark tarball itself does not include files of the same name
15 | # (ie, "copy if not overwrite")
16 | if ! [ -d $SPARK_INSTALL ]; then
17 |     mkdir -p $SPARK_INSTALL
18 |     mv $ADDED_DIR/conf $SPARK_INSTALL
19 |     chown -R 185:0 $SPARK_INSTALL && chmod -R g+rwX $SPARK_INSTALL
20 |     ln -sfn $SPARK_INSTALL/distro /opt/spark
21 | fi
22 | 
23 | # Change the permissions on /etc/passwd so that anonymous user
24 | # can be added to satisfy Spark
25 | chgrp root /etc/passwd && chmod g+rw /etc/passwd
26 | 
27 | # Make Python3 the default. This is important because it seems
28 | # that Spark still wants to invoke python scripts with "python"
29 | # in the executors, as opposed to "python3"
30 | alternatives --set python /usr/bin/python3
31 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/common/module.yaml:
--------------------------------------------------------------------------------
 1 | artifacts: []
 2 | envs:
 3 | - name: SPARK_INSTALL
 4 |   value: /opt/spark-distro
 5 | execute:
 6 | - directory: common
 7 |   module_name: common
 8 |   name: common/install
 9 |   script: install
10 | labels: []
11 | modules:
12 |   install: []
13 |   repositories: []
14 | name: common
15 | osbs:
16 |   configuration: {}
17 |   repository: {}
18 | packages:
19 |   install:
20 |   - python36
21 |   repositories: []
22 | ports: []
23 | version: 1.0
24 | volumes: []
25 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/metrics/added/agent-bond.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radanalyticsio/openshift-spark/f360e822b4c4859e717791cf39cb9682dc242c5f/openshift-spark-build/modules/metrics/added/agent-bond.jar


--------------------------------------------------------------------------------
/openshift-spark-build/modules/metrics/added/jolokia-jvm-1.3.6-agent.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radanalyticsio/openshift-spark/f360e822b4c4859e717791cf39cb9682dc242c5f/openshift-spark-build/modules/metrics/added/jolokia-jvm-1.3.6-agent.jar


--------------------------------------------------------------------------------
/openshift-spark-build/modules/metrics/install:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -u
 3 | set -e
 4 | 
 5 | SCRIPT_DIR=$(dirname $0)
 6 | ADDED_DIR=${SCRIPT_DIR}/added
 7 | mkdir -p /opt/metrics
 8 | mv $ADDED_DIR/*.jar /opt/metrics
 9 | 
10 | chown -R 185:0 /opt/metrics && chmod g+rwX /opt/metrics
11 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/metrics/module.yaml:
--------------------------------------------------------------------------------
 1 | artifacts: []
 2 | envs: []
 3 | execute:
 4 | - directory: metrics
 5 |   module_name: metrics
 6 |   name: metrics/install
 7 |   script: install
 8 | labels: []
 9 | modules:
10 |   install: []
11 |   repositories: []
12 | name: metrics
13 | osbs:
14 |   configuration: {}
15 |   repository: {}
16 | packages:
17 |   install: []
18 |   repositories: []
19 | ports: []
20 | version: 1.0
21 | volumes: []
22 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/s2i/added/assemble:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | source $STI_SCRIPTS_PATH/s2i-env-vars
  4 | 
  5 | # Just a word about the directory structure
  6 | # SPARK_HOME == /opt/spark
  7 | # SPARK_INSTALL == /opt/spark-distro
  8 | 
  9 | # Extra things like default configuration files and additional
 10 | # boot scripts may be stored in SPARK_INSTALL
 11 | 
 12 | # At runtime, /opt/spark is a symlink to /opt/spark-distro/distro
 13 | # but /opt/spark-distro/distro does not actually exist yet
 14 | 
 15 | # The Spark tarball will be expanded in /opt/spark-distro using
 16 | # it's original name, for example /opt/spark-distro/spark-2.3.0-bin-hadoop2.7,
 17 | # as a dev aid to tracking and version checking
 18 | 
 19 | # Ultimately, /opt/spark-distro/distro is created as a symlink to the Spark root
 20 | # directory. This double-hop from /opt/spark to the Spark root through symlinks
 21 | # allows the Spark installation to be staged in the base image but completed in
 22 | # the S2I build without expanding permissions
 23 | 
 24 | function match_sum {
 25 |     local sumfile=$1
 26 |     local delim=$2
 27 |     local sha512=$3
 28 |     local initial=$(cat $sumfile | tr -d [:space:] | cut -d"$delim" -f1 | tr [:upper:] [:lower:])
 29 |     local rest=$(cat $sumfile | tr -d [:space:] | cut -d"$delim" --complement -f1 | tr [:upper:] [:lower:])
 30 |     if [ "$sha512" == "$initial" ] || [ "$sha512" == "$rest" ]; then
 31 |         return 0
 32 |     fi
 33 |     return 1
 34 | }
 35 | 
 36 | if [ -f $SPARK_HOME/bin/spark-submit ]; then
 37 |     echo "Spark is installed, nothing to do"
 38 |     exit 1
 39 | else
 40 |     echo "Attempting to install Spark"
 41 |     # If a url has been specfified for spark use it
 42 |     if [ -n "$SPARK_URL" ]; then
 43 |         echo Downloading $SPARK_URL
 44 |         wget $SPARK_URL -P $S2I_SOURCE_DIR
 45 |     fi
 46 |     if [ -n "$SPARK_SHA512_URL" ]; then
 47 |         echo Downloading $SPARK_SHA512_URL
 48 |         wget $SPARK_SHA512_URL -P $S2I_SOURCE_DIR
 49 |     fi
 50 | 
 51 |     for spark in $(ls "$S2I_SOURCE_DIR"); do
 52 | 
 53 |         spark=$S2I_SOURCE_DIR/$spark
 54 |         echo Found $spark
 55 |         echo Checking for valid Spark archive
 56 | 
 57 |         # Is the file a directory? If it contains spark-submit, move it
 58 |         if [ -d "$spark" ]; then
 59 |             if ! [ -f $spark/bin/spark-submit ]; then
 60 |                 echo Ignoring directory $spark, no spark-submit
 61 |                 continue
 62 |             fi
 63 |             echo Installing from directory $spark
 64 |             sparkdir=$SPARK_INSTALL/$(basename $spark)
 65 |             mv $spark $SPARK_INSTALL
 66 |         else
 67 |             # If we can get the table of contents, it's a tar archive, otherwise ignore
 68 |             tar -tf $spark &> /dev/null
 69 |             if [ "$?" -ne 0 ]; then
 70 |                 echo Ignoring $spark, not a tar archive
 71 |                 continue
 72 |             fi
 73 |             echo Validating tar archive $spark
 74 | 
 75 |             # Does the tarball contain a spark-submit?
 76 |             name=$(tar -tzf $spark | grep -e "^[^/]*/bin/spark-submit$")
 77 |             if [ "$?" -ne 0 ]; then
 78 |                 echo Ignoring tarball $spark, no spark-submit
 79 |                 continue
 80 |             else
 81 |                 echo Found valid tar archive, matching checksums
 82 |                 # See if we have an sha512 file to match against
 83 |                 if [ -f "$spark".sha512 ]; then
 84 |                     calcvalue=$(sha512sum "$spark" | cut -d\  -f1)
 85 |                     # split the sha512 file using a colon
 86 |                     match_sum "$spark".sha512 \:  $calcvalue
 87 |                     matched="$?"
 88 |                     if [ "$matched" -ne 0 ]; then
 89 |                         # split the sha512 file using equals sign in case it's BSD
 90 |                         match_sum "$spark".sha512 \=  $calcvalue
 91 |                         matched="$?"
 92 |                     fi
 93 |                     if [ "$matched" -ne 0 ]; then
 94 |                         echo Ignoring tarball $spark, sha512sum did not match
 95 |                         continue
 96 |                     fi
 97 |                 fi
 98 | 
 99 |                 # dname will be the intial directory from the path of spark-submit
100 |                 # we found in the tarball, ie the dir created by tar
101 |                 echo Installing from tarball $spark
102 |                 dname=$(dirname $name | cut -d/ -f 1)
103 |                 sparkdir=$SPARK_INSTALL/$dname
104 |                 tar -xzf $spark -C $SPARK_INSTALL
105 |             fi
106 |         fi
107 | 
108 |         ln -s $sparkdir $SPARK_INSTALL/distro
109 | 
110 |         # Search for the spark entrypoint file and copy it to $SPARK_INSTALL
111 |         entry=$(find $sparkdir/kubernetes -name entrypoint.sh)
112 |         if [ -n "$entry" ]; then
113 |             echo Installing spark native entrypoint for use with spark-on-k8s commands
114 |             cp $entry $SPARK_INSTALL
115 | 
116 |             # We want to get rid of the tini invocation
117 |             sed -i "s@exec .*/tini -s --@exec@" $SPARK_INSTALL/entrypoint.sh
118 |         else
119 |             echo No spark native entrypoint found for use with spark-on-k8s commands
120 |         fi
121 | 
122 | 	# Include the default spark configuration files
123 |         mv --no-clobber "$SPARK_INSTALL"/conf/* "$SPARK_HOME"/conf/
124 | 
125 | 	# If someone included mods in a parallel directory, install them with rsync
126 |         # Don't try to preserve permisions, owner, or group because we don't have
127 |         # any control over how s2i uploaded the files, so there's no use preserving.
128 |         if [ -x /usr/bin/rsync ] && [ -d "$S2I_SOURCE_DIR/modify-spark" ]; then
129 | 	    echo Found a modify-spark directory, running rsync to install changes
130 | 	    rsync -vrltD "$S2I_SOURCE_DIR/modify-spark/" $SPARK_HOME
131 |         fi
132 | 
133 |         # Spark workers need to write to the spark directory to track apps
134 |         chmod -R g+rwX $sparkdir
135 | 
136 |         # Can we run spark-submit?
137 |         $SPARK_HOME/bin/spark-submit --version
138 |         if [ "$?" -eq 0 ]; then
139 |             echo Spark installed successfully
140 |             exit 0
141 | 	else
142 |             echo Cannot run spark-submit, Spark install failed
143 |         fi
144 | 
145 |         # Just in case there is more than one tarball, clean up
146 |         rm -rf $sparkdir
147 |     done
148 | 
149 |     echo no valid Spark distribution found
150 | 
151 |     if [ -n "$DEBUG_ASSEMBLE" ]; then
152 |         echo Looping forever so you can \'oc rsh\'
153 |         while true; do
154 |             sleep 5
155 |         done
156 |     fi
157 |     exit 1
158 | fi
159 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/s2i/added/s2i-env-vars:
--------------------------------------------------------------------------------
1 | # Local vars setup with defaults
2 | S2I_DESTINATION=${S2I_DESTINATION:-/tmp}
3 | S2I_SOURCE_DIR="${S2I_DESTINATION}/src"
4 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/s2i/added/usage:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | if [ -f "$SPARK_HOME"/bin/spark-submit ]; then
 3 | cat <<EOF
 4 | This openshift-spark image will run an Apache Spark master or worker process.
 5 | There are several environment variables which affect its operation:
 6 | 
 7 | SPARK_MASTER_ADDRESS   -- If this variable is unset, a Spark standalone master
 8 |                           process will run.
 9 | 
10 |                           To run a Spark worker process, set this to the URL of
11 |                           the Spark master, for example spark://mymaster:7077
12 | 
13 | SPARK_METRICS_ON       -- If this variable is unset no metrics sink will be
14 |                           configured.
15 | 
16 |                           If it's set to 'prometheus' then the Spark process
17 |                           will be started with prometheus metrics on port 7777.
18 | 
19 |                           If it's set to anything else then the Spark process
20 |                           will be started with jolokia metrics on port 7777.
21 | 
22 | UPDATE_SPARK_CONF_DIR  -- If this variable is set to a directory, the files in
23 |                           that directory will be copied to $SPARK_HOME/conf
24 |                           before the Spark process starts. This can be used
25 |                           to selectively overwrite Spark configuration files.
26 | 
27 |                           Note that if $SPARK_HOME/spark-defaults.conf
28 |                           does not contain any explicit settings for parameters
29 |                           matching the pattern 'spark.ui.reverseProxy*', default
30 |                           settings to turn reverse proxy on will be added to
31 |                           the configuration.
32 | EOF
33 | 
34 | else
35 | cat <<EOF
36 | This is an incomplete openshift-spark image. It contains scripts for running an
37 | Apache Spark master or worker but is missing an actual Spark distribution.
38 | 
39 | To produce a final image, a source-to-image build must be performed which takes
40 | a Spark distribution as input. This can be done in OpenShift or locally using
41 | the s2i tool if it's installed.
42 | 
43 | Build inputs:
44 | -------------
45 | 
46 | The OpenShift method can take either local files or a URL as build input.
47 | For the s2i method, local files are required. Here is an example which
48 | downloads an Apache Spark distribution to a local 'build-input' directory
49 | (including the sha512 file is optional).
50 | 
51 | $ mkdir build-input
52 | $ wget https://archive.apache.org/dist/spark/spark-2.4.0/spark-2.4.0-bin-hadoop2.7.tgz -O build-input/spark-2.4.0-bin-hadoop2.7.tgz
53 | $ wget https://archive.apache.org/dist/spark/spark-2.4.0/spark-2.4.0-bin-hadoop2.7.tgz.sha512 -O build-input/spark-2.4.0-bin-hadoop2.7.tgz.sha512
54 | 
55 | Optionally, your `build-input` directory may contain a modify-spark directory. The structure of this directory should be parallel to the structure
56 | of the top-level directory in the Spark distribution tarball. The contents of this directory will be copied to the Spark installation using rsync,
57 | allowing you to add or overwrite files. To add my.jar to Spark, for example, put it in build-input/modify-spark/jars/my.jar
58 | 
59 | Completing the image with OpenShift:
60 | ------------------------------------
61 | 
62 | Run a binary build specifying the spark distribution, for example:
63 | 
64 | $ oc new-build --name=openshift-spark --docker-image=radanalyticsio/openshift-spark-inc --binary
65 | $ oc start-build openshift-spark --from-file=https://archive.apache.org/dist/spark/spark-2.4.0/spark-2.4.0-bin-hadoop2.7.tgz
66 | 
67 | This will write the completed image to an imagestream called 'openshift-spark'
68 | in the current project. Note that the value of --from-file can also be a local
69 | directory (see Build Inputs above).
70 | 
71 | Completing the image with the s2i tool:
72 | ---------------------------------------
73 | 
74 | s2i build build-input radanalyticsio/openshift-spark-inc openshift-spark
75 | 
76 | This will build a local image named 'openshift-spark:latest' which can
77 | then be uploaded to an image repository.
78 | EOF
79 | fi
80 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/s2i/install:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | SCRIPT_DIR=$(dirname $0)
 6 | ADDED_DIR=${SCRIPT_DIR}/added
 7 | 
 8 | mkdir -p $STI_SCRIPTS_PATH
 9 | cp $ADDED_DIR/* $STI_SCRIPTS_PATH
10 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/s2i/module.yaml:
--------------------------------------------------------------------------------
 1 | artifacts: []
 2 | envs:
 3 | - name: STI_SCRIPTS_PATH
 4 |   value: /usr/libexec/s2i
 5 | - name: PATH
 6 |   value: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin
 7 | - name: SPARK_HOME
 8 |   value: /opt/spark
 9 | - name: SPARK_INSTALL
10 |   value: /opt/spark-distro
11 | execute:
12 | - directory: s2i
13 |   module_name: s2i
14 |   name: s2i/install
15 |   script: install
16 | labels: []
17 | modules:
18 |   install: []
19 |   repositories: []
20 | name: s2i
21 | osbs:
22 |   configuration: {}
23 |   repository: {}
24 | packages:
25 |   install:
26 |   - wget
27 |   repositories: []
28 | ports: []
29 | version: 1.0
30 | volumes: []
31 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/spark/added/spark-entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Licensed to the Apache Software Foundation (ASF) under one or more
 4 | # contributor license agreements.  See the NOTICE file distributed with
 5 | # this work for additional information regarding copyright ownership.
 6 | # The ASF licenses this file to You under the Apache License, Version 2.0
 7 | # (the "License"); you may not use this file except in compliance with
 8 | # the License.  You may obtain a copy of the License at
 9 | #
10 | #    http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | 
19 | # echo commands to the terminal output
20 | set -ex
21 | 
22 | # Check whether there is a passwd entry for the container UID
23 | myuid=$(id -u)
24 | mygid=$(id -g)
25 | set +e
26 | uidentry=$(getent passwd $myuid)
27 | set -e
28 | 
29 | # If there is no passwd entry for the container UID, attempt to create one
30 | if [ -z "$uidentry" ] ; then
31 |     if [ -w /etc/passwd ] ; then
32 |         echo "$myuid:x:$myuid:$mygid:anonymous uid:$SPARK_HOME:/bin/false" >> /etc/passwd
33 |     else
34 |         echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
35 |     fi
36 | fi
37 | 
38 | SPARK_K8S_CMD="$1"
39 | if [ -z "$SPARK_K8S_CMD" ]; then
40 |   echo "No command to execute has been provided." 1>&2
41 |   exit 1
42 | fi
43 | shift 1
44 | 
45 | SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*"
46 | env | grep SPARK_JAVA_OPT_ | sed 's/[^=]*=\(.*\)/\1/g' > /tmp/java_opts.txt
47 | readarray -t SPARK_JAVA_OPTS < /tmp/java_opts.txt
48 | if [ -n "$SPARK_MOUNTED_CLASSPATH" ]; then
49 |   SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_MOUNTED_CLASSPATH"
50 | fi
51 | if [ -n "$SPARK_MOUNTED_FILES_DIR" ]; then
52 |   cp -R "$SPARK_MOUNTED_FILES_DIR/." .
53 | fi
54 | 
55 | case "$SPARK_K8S_CMD" in
56 |   driver)
57 |     CMD=(
58 |       ${JAVA_HOME}/bin/java
59 |       "${SPARK_JAVA_OPTS[@]}"
60 |       -cp "$SPARK_CLASSPATH"
61 |       -Xms$SPARK_DRIVER_MEMORY
62 |       -Xmx$SPARK_DRIVER_MEMORY
63 |       -Dspark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS
64 |       $SPARK_DRIVER_CLASS
65 |       $SPARK_DRIVER_ARGS
66 |     )
67 |     ;;
68 | 
69 |   executor)
70 |     CMD=(
71 |       ${JAVA_HOME}/bin/java
72 |       "${SPARK_JAVA_OPTS[@]}"
73 |       -Xms$SPARK_EXECUTOR_MEMORY
74 |       -Xmx$SPARK_EXECUTOR_MEMORY
75 |       -cp "$SPARK_CLASSPATH"
76 |       org.apache.spark.executor.CoarseGrainedExecutorBackend
77 |       --driver-url $SPARK_DRIVER_URL
78 |       --executor-id $SPARK_EXECUTOR_ID
79 |       --cores $SPARK_EXECUTOR_CORES
80 |       --app-id $SPARK_APPLICATION_ID
81 |       --hostname $SPARK_EXECUTOR_POD_IP
82 |     )
83 |     ;;
84 | 
85 |   init)
86 |     CMD=(
87 |       "$SPARK_HOME/bin/spark-class"
88 |       "org.apache.spark.deploy.k8s.SparkPodInitContainer"
89 |       "$@"
90 |     )
91 |     ;;
92 | 
93 |   *)
94 |     echo "Unknown command: $SPARK_K8S_CMD" 1>&2
95 |     exit 1
96 | esac
97 | 
98 | exec "${CMD[@]}"
99 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/spark/check_for_download:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | echo "checking length of file $1"
3 | if ! [ -s "$1" ]; then
4 |     filename=$(basename $1)
5 |     version=$(echo $filename | cut -d '-' -f2)
6 |     wget https://archive.apache.org/dist/spark/spark-$version/$filename -O $1
7 | fi
8 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/spark/install:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | SCRIPT_DIR=$(dirname $0)
 4 | ADDED_DIR=${SCRIPT_DIR}/added
 5 | ARTIFACTS_DIR=/tmp/artifacts
 6 | 
 7 | # If there is a zero-length spark tarball, find the verison in the
 8 | # name and download from Apache
 9 | fullname=$(find $ARTIFACTS_DIR -name spark-[0-9.]*\.tgz)
10 | /bin/sh -x $SCRIPT_DIR/check_for_download $fullname
11 | 
12 | # Make a place for spark to go (dupe what's done in common in case we're standalone)
13 | if ! [ -d $SPARK_INSTALL ]; then
14 |     mkdir -p $SPARK_INSTALL/conf
15 |     ln -sfn $SPARK_INSTALL/distro $SPARK_HOME
16 | fi
17 | 
18 | pushd $SPARK_INSTALL
19 | cp $fullname .
20 | tar -zxf $(basename $fullname)
21 | ln -s $(basename $fullname .tgz) distro
22 | rm $(basename $fullname)
23 | popd
24 | 
25 | # Add in the configuration files (from the common module) if they don't already exist
26 | mv --no-clobber "$SPARK_INSTALL"/conf/* "$SPARK_HOME"/conf/
27 | 
28 | # Make everything under the spark directory accessible to the group
29 | chown 185:0 $SPARK_INSTALL/distro && chmod g+rwX $SPARK_INSTALL/distro
30 | 
31 | # Search for the spark entrypoint file and copy it to $SPARK_INSTALL
32 | entry=$(find $SPARK_HOME/kubernetes -name entrypoint.sh)
33 | if [ -n "$entry" ]; then
34 |     cp $entry $SPARK_INSTALL
35 | 
36 |     # We want to get rid of the tini invocation
37 |     sed -i "s@exec .*/tini -s --@exec@" $SPARK_INSTALL/entrypoint.sh
38 | fi
39 | 


--------------------------------------------------------------------------------
/openshift-spark-build/modules/spark/module.yaml:
--------------------------------------------------------------------------------
 1 | artifacts:
 2 | - md5: 31e019e35e75a4c55c7efa4464641bf1
 3 |   name: spark-3.0.1-bin-hadoop3.2.tgz
 4 |   target: spark-3.0.1-bin-hadoop3.2.tgz
 5 |   url: https://archive.apache.org/dist/spark/spark-3.0.1/spark-3.0.1-bin-hadoop3.2.tgz
 6 | envs:
 7 | - name: PATH
 8 |   value: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin
 9 | - name: SPARK_HOME
10 |   value: /opt/spark
11 | - name: SPARK_INSTALL
12 |   value: /opt/spark-distro
13 | execute:
14 | - directory: spark
15 |   module_name: spark
16 |   name: spark/install
17 |   script: install
18 | labels: []
19 | modules:
20 |   install: []
21 |   repositories: []
22 | name: spark
23 | osbs:
24 |   configuration: {}
25 |   repository: {}
26 | packages:
27 |   install:
28 |   - wget
29 |   repositories: []
30 | ports: []
31 | version: 1.0
32 | volumes: []
33 | 


--------------------------------------------------------------------------------
/openshift-spark-build/spark-3.0.1-bin-hadoop3.2.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radanalyticsio/openshift-spark/f360e822b4c4859e717791cf39cb9682dc242c5f/openshift-spark-build/spark-3.0.1-bin-hadoop3.2.tgz


--------------------------------------------------------------------------------
/spark-metrics-template.yaml:
--------------------------------------------------------------------------------
  1 | kind: Template
  2 | apiVersion: v1
  3 | template: spark
  4 | metadata:
  5 |   name: spark
  6 | objects:
  7 | 
  8 | - kind: Service
  9 |   apiVersion: v1
 10 |   metadata:
 11 |     name: ${MASTER_NAME}-${SPARK_METRICS_ON}
 12 |     labels:
 13 |       name: ${MASTER_NAME}
 14 |   spec:
 15 |     ports:
 16 |       - protocol: TCP
 17 |         port: 7777
 18 |         targetPort: 7777
 19 |     selector:
 20 |       name: ${MASTER_NAME}
 21 | 
 22 | - kind: Service
 23 |   apiVersion: v1
 24 |   metadata:
 25 |     name: ${MASTER_NAME}
 26 |     labels:
 27 |       name: ${MASTER_NAME}
 28 |   spec:
 29 |     ports:
 30 |       - protocol: TCP
 31 |         port: 7077
 32 |         targetPort: 7077
 33 |     selector:
 34 |       name: ${MASTER_NAME}
 35 | 
 36 | - kind: Service
 37 |   apiVersion: v1
 38 |   metadata:
 39 |     name: ${MASTER_NAME}-webui
 40 |     labels:
 41 |       name: ${MASTER_NAME}
 42 |   spec:
 43 |     ports:
 44 |       - protocol: TCP
 45 |         port: 8080
 46 |         targetPort: 8080
 47 |     selector:
 48 |       name: ${MASTER_NAME}
 49 | 
 50 | - kind: DeploymentConfig
 51 |   apiVersion: v1
 52 |   metadata:
 53 |     name: ${MASTER_NAME}
 54 |   spec:
 55 |     strategy:
 56 |       type: Rolling
 57 |     triggers:
 58 |       - type: ConfigChange
 59 |     replicas: 1
 60 |     selector:
 61 |       name: ${MASTER_NAME}
 62 |     template:
 63 |       metadata:
 64 |         labels:
 65 |           name: ${MASTER_NAME}
 66 |       spec:
 67 |         containers:
 68 |           - name: ${MASTER_NAME}
 69 |             image: ${SPARK_IMAGE}
 70 |             env:
 71 |               - name: SPARK_MASTER_PORT
 72 |                 value: "7077"
 73 |               - name: SPARK_MASTER_WEBUI_PORT
 74 |                 value: "8080"
 75 |               - name: SPARK_METRICS_ON
 76 |                 value: ${SPARK_METRICS_ON}
 77 |               - name: SPARK_USER
 78 |                 value: admin
 79 |             ports:
 80 |               - containerPort: 7077
 81 |                 protocol: TCP
 82 |               - containerPort: 7777
 83 |                 protocol: TCP
 84 |               - containerPort: 8080
 85 |                 protocol: TCP
 86 | 
 87 | - kind: DeploymentConfig
 88 |   apiVersion: v1
 89 |   metadata:
 90 |     name: ${WORKER_NAME}
 91 |   spec:
 92 |     strategy:
 93 |       type: Rolling
 94 |     triggers:
 95 |       - type: ConfigChange
 96 |     replicas: 3
 97 |     selector:
 98 |       name: ${WORKER_NAME}
 99 |     template:
100 |       metadata:
101 |         labels:
102 |           name: ${WORKER_NAME}
103 |       spec:
104 |         containers:
105 |           - name: ${WORKER_NAME}
106 |             image: ${SPARK_IMAGE}
107 |             env:
108 |               - name: SPARK_METRICS_ON
109 |                 value: ${SPARK_METRICS_ON}
110 |               - name: SPARK_MASTER_ADDRESS
111 |                 value: spark://${MASTER_NAME}:7077
112 |               - name: SPARK_MASTER_UI_ADDRESS
113 |                 value: http://${MASTER_NAME}-webui:8080
114 |               - name: SPARK_USER
115 |                 value: admin
116 | parameters:
117 | - name: SPARK_IMAGE
118 |   description: Name of the Spark master/worker image
119 |   value: radanalyticsio/openshift-spark:2.2-latest
120 | - name: MASTER_NAME
121 |   description: Master name used as a service name and a selector
122 |   generate: expression
123 |   from: "spark-master-[a-z0-9]{4}"
124 |   required: true
125 | - name: WORKER_NAME
126 |   description: Worker name used as a selector
127 |   generate: expression
128 |   from: "spark-worker-[a-z0-9]{4}"
129 |   required: true
130 | - name: SPARK_METRICS_ON
131 |   description: Enable metrics services. The default value is "jolokia" (deprecated), consider setting to "prometheus" instead.
132 |   value: "jolokia"
133 |   required: true
134 | 


--------------------------------------------------------------------------------
/tag.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | function usage() {
 4 |     echo
 5 |     echo "Creates a new tag for the current repo based on the spark version specified in image.yaml"
 6 |     echo "and the latest tag."
 7 |     echo
 8 |     echo "Usage: tag.sh"
 9 |     echo
10 |     echo "optional arguments:"
11 |     echo
12 |     echo "  -h                  Show this message"
13 | }
14 | 
15 | while getopts h opt; do
16 |     case $opt in
17 |         h)
18 |             usage
19 |             exit 0
20 |             ;;
21 |         \?)
22 |             echo "Invalid option: -$OPTARG" >&2
23 |             exit 1
24 |             ;;
25 |     esac
26 | done
27 | 
28 | # Extract the current spark version from the image.yaml file
29 | # Works by parsing the line following "name: sparkversion"
30 | VER=$(sed -n '\@name: sparkversion@!b;n;p' image.yaml  | tr -d '[:space:]' | cut -d':' -f2)
31 | 
32 | echo Version from image.yaml is $VER
33 | 
34 | TAG=$(git describe --abbrev=0 --tags)
35 | 
36 | PREFIX=$(echo $TAG | cut -d'-' -f1)
37 | BUILD=$(echo $TAG | cut -d'-' -f2)
38 | 
39 | # If we already have tags for Major.Minor version, just increment the build number
40 | # If we don't already have tags for Major.Minor, start with build 1
41 | newbranch=0
42 | if [ "$PREFIX" == "$VER" ]; then
43 |     TAG="$PREFIX-$((BUILD+1))"
44 | else
45 |     TAG="$VER-1"
46 |     newbranch=1
47 | fi
48 | 
49 | echo "Adding tag $TAG"
50 | git tag "$TAG"
51 | if [ "$?" -eq 0 ]; then
52 |     echo Tag "$TAG" added, don\'t forget to push to upstream
53 |     MAJORMINOR=$(echo $VER | cut -d'.' -f1,2)
54 |     if [ "$newbranch" == 0 ]; then
55 | 	echo "Also, don't forget to rebase branch $MAJORMINOR on master if necessary"
56 |     else
57 | 	echo "Also, looks like a new version of spark. Don't forget to create a $MAJORMINOR branch from master"
58 |     fi
59 | else
60 |     echo Addition of tag "$TAG" failed
61 | fi
62 | 


--------------------------------------------------------------------------------
/template.yaml:
--------------------------------------------------------------------------------
  1 | kind: Template
  2 | apiVersion: v1
  3 | template: spark
  4 | metadata:
  5 |   name: spark
  6 | labels:
  7 |   app: sparkcluster
  8 | objects:
  9 | 
 10 | - kind: Service
 11 |   apiVersion: v1
 12 |   metadata:
 13 |     name: ${MASTER_NAME}
 14 |     labels:
 15 |       name: ${MASTER_NAME}
 16 |   spec:
 17 |     ports:
 18 |       - protocol: TCP
 19 |         port: 7077
 20 |         targetPort: 7077
 21 |     selector:
 22 |       name: ${MASTER_NAME}
 23 | 
 24 | - kind: Service
 25 |   apiVersion: v1
 26 |   metadata:
 27 |     name: ${MASTER_NAME}-webui
 28 |     labels:
 29 |       name: ${MASTER_NAME}
 30 |   spec:
 31 |     ports:
 32 |       - protocol: TCP
 33 |         port: 8080
 34 |         targetPort: 8080
 35 |     selector:
 36 |       name: ${MASTER_NAME}
 37 | 
 38 | - kind: DeploymentConfig
 39 |   apiVersion: v1
 40 |   metadata:
 41 |     name: ${MASTER_NAME}
 42 |   spec:
 43 |     strategy:
 44 |       type: Rolling
 45 |     triggers:
 46 |       - type: ConfigChange
 47 |     replicas: 1
 48 |     selector:
 49 |       name: ${MASTER_NAME}
 50 |     template:
 51 |       metadata:
 52 |         labels:
 53 |           name: ${MASTER_NAME}
 54 |       spec:
 55 |         containers:
 56 |           - name: ${MASTER_NAME}
 57 |             image: ${SPARK_IMAGE}
 58 |             env:
 59 |               - name: SPARK_MASTER_PORT
 60 |                 value: "7077"
 61 |               - name: SPARK_MASTER_WEBUI_PORT
 62 |                 value: "8080"
 63 |             ports:
 64 |               - containerPort: 7077
 65 |                 protocol: TCP
 66 |               - containerPort: 8080
 67 |                 protocol: TCP
 68 | 
 69 | - kind: DeploymentConfig
 70 |   apiVersion: v1
 71 |   metadata:
 72 |     name: ${WORKER_NAME}
 73 |   spec:
 74 |     strategy:
 75 |       type: Rolling
 76 |     triggers:
 77 |       - type: ConfigChange
 78 |     replicas: 3
 79 |     selector:
 80 |       name: ${WORKER_NAME}
 81 |     template:
 82 |       metadata:
 83 |         labels:
 84 |           name: ${WORKER_NAME}
 85 |       spec:
 86 |         containers:
 87 |           - name: ${WORKER_NAME}
 88 |             image: ${SPARK_IMAGE}
 89 |             env:
 90 |               - name: SPARK_MASTER_ADDRESS
 91 |                 value: spark://${MASTER_NAME}:7077
 92 |               - name: SPARK_MASTER_UI_ADDRESS
 93 |                 value: http://${MASTER_NAME}-webui:8080
 94 | 
 95 | parameters:
 96 | - name: SPARK_IMAGE
 97 |   description: Name of the Spark master/worker image
 98 |   value: radanalyticsio/openshift-spark:2.4-latest
 99 | - name: MASTER_NAME
100 |   description: master name used as a service name and a selector
101 |   generate: expression
102 |   from: "spark-master-[a-z0-9]{4}"
103 |   required: true
104 | - name: WORKER_NAME
105 |   description: worker name used as a selector
106 |   generate: expression
107 |   from: "spark-worker-[a-z0-9]{4}"
108 |   required: true
109 | 


--------------------------------------------------------------------------------
/test/common.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | SPARK_TEST_IMAGE=${SPARK_TEST_IMAGE:-}
  4 | 
  5 | SPARK_TEST_LOCAL_IMAGE=${SPARK_TEST_LOCAL_IMAGE:-true}
  6 | 
  7 | # This is all for dealing with registries. External registry requires creds other than the current login
  8 | SPARK_TEST_INTEGRATED_REGISTRY=${SPARK_TEST_INTEGRATED_REGISTRY:-}
  9 | SPARK_TEST_EXTERNAL_REGISTRY=${SPARK_TEST_EXTERNAL_REGISTRY:-}
 10 | SPARK_TEST_EXTERNAL_USER=${SPARK_TEST_EXTERNAL_USER:-}
 11 | SPARK_TEST_EXTERNAL_PASSWORD=${SPARK_TEST_EXTERNAL_PASSWORD:-}
 12 | 
 13 | if [ -z "$SPARK_TEST_IMAGE" ]; then
 14 |     if [ "$SPARK_TEST_LOCAL_IMAGE" == true ]; then
 15 |         SPARK_TEST_IMAGE=spark-testimage:latest
 16 |     else
 17 |         SPARK_TEST_IMAGE=docker.io/radanalyticsio/openshift-spark:latest
 18 |     fi
 19 | fi
 20 | 
 21 | function print_test_env {
 22 |     echo Using image $SPARK_TEST_IMAGE
 23 | 
 24 |     if [ "$SPARK_TEST_LOCAL_IMAGE" != true ]; then
 25 | 	echo SPARK_TEST_LOCAL_IMAGE = $SPARK_TEST_LOCAL_IMAGE, spark image is external, ignoring registry env vars
 26 |     elif [ -n "$SPARK_TEST_EXTERNAL_REGISTRY" ]; then
 27 |         echo Using external registry $SPARK_TEST_EXTERNAL_REGISTRY
 28 |         if [ -z "$SPARK_TEST_EXTERNAL_USER" ]; then
 29 |             echo "Error: SPARK_TEST_EXTERNAL_USER not set!"
 30 | 	    exit 1
 31 |         else
 32 | 	    echo Using external registry user $SPARK_TEST_EXTERNAL_USER
 33 |         fi
 34 |         if [ -z "$SPARK_TEST_EXTERNAL_PASSWORD" ]; then
 35 |             echo "SPARK_TEST_EXTERNAL_PASSWORD not set, assuming current docker login"
 36 |         else
 37 |             echo External registry password set
 38 |         fi
 39 |     elif [ -n "$SPARK_TEST_INTEGRATED_REGISTRY" ]; then
 40 |         echo Using integrated registry $SPARK_TEST_INTEGRATED_REGISTRY
 41 |     else
 42 |         echo Not using external or integrated registry
 43 |     fi
 44 | }
 45 | print_test_env
 46 | 
 47 | function make_image {
 48 |     # The ip address of an internal/external registry may be set to support running against
 49 |     # an openshift that is not "oc cluster up" when using images that have been built locally.
 50 |     # In the case of "oc cluster up", the docker on the host is available from openshift so
 51 |     # no special pushes of images have to be done.
 52 |     # In the case of a "normal" openshift cluster, a local image we'll use for build has to be
 53 |     # available from the designated registry.
 54 |     # If we're using an image already in an external registry, openshift can pull it from
 55 |     # there and we don't have to do anything.
 56 |     local user=
 57 |     local password=
 58 |     local pushproj=
 59 |     local pushimage=
 60 |     local registry=
 61 |     if [ "$SPARK_TEST_LOCAL_IMAGE" == true ]; then
 62 | 	if [ -n  "$SPARK_TEST_EXTERNAL_REGISTRY" ]; then
 63 | 	    user=$SPARK_TEST_EXTERNAL_USER
 64 | 	    password=$SPARK_TEST_EXTERNAL_PASSWORD
 65 | 	    pushproj=$user
 66 | 	    pushimage=scratch-openshift-spark
 67 | 	    registry=$SPARK_TEST_EXTERNAL_REGISTRY
 68 | 	elif [ -n "$SPARK_TEST_INTEGRATED_REGISTRY" ]; then
 69 | 	    user=$(oc whoami)
 70 | 	    password=$(oc whoami -t)
 71 | 	    pushproj=$PROJECT
 72 | 	    pushimage=oshinko-webui
 73 | 	    registry=$SPARK_TEST_INTEGRATED_REGISTRY
 74 | 	fi
 75 |     fi
 76 |     if [ -n "$registry" ]; then
 77 | 	set +e
 78 | 	docker login --help | grep email &> /dev/null
 79 | 	res=$?
 80 | 	set -e
 81 | 	if [ -n "$password" ] && [ -n "$user" ]; then
 82 | 	    if [ "$res" -eq 0 ]; then
 83 | 		docker login -u ${user} -e jack@jack.com -p ${password} ${registry}
 84 | 	    else
 85 | 		docker login -u ${user} -p ${password} ${registry}
 86 | 	    fi
 87 | 	fi
 88 | 	docker tag ${SPARK_TEST_IMAGE} ${registry}/${pushproj}/${pushimage}
 89 | 	docker push ${registry}/${pushproj}/${pushimage}
 90 | 	SPARK_IMAGE=${registry}/${pushproj}/${pushimage}
 91 |     else
 92 | 	SPARK_IMAGE=$SPARK_TEST_IMAGE
 93 |     fi
 94 | }
 95 | 
 96 | function cleanup_app {
 97 |     oc delete dc --all > /dev/null
 98 |     oc delete service --all > /dev/null
 99 |     oc delete route --all > /dev/null
100 |     oc delete template --all > /dev/null
101 |     oc delete pod --all > /dev/null
102 |     os::cmd::try_until_text 'oc get pods' 'No resources found'
103 | }
104 | 
105 | function make_configmap {
106 |     set +e
107 |     oc create configmap test-config --from-file=$RESOURCE_DIR/config
108 |     set -e
109 | }
110 | 
111 | function poll_binary_build() {
112 |     local name
113 |     local source
114 |     local expect_fail
115 |     local from_flag=""
116 |     name=$1
117 |     if [ "$#" -ge 2 ]; then
118 |         source=$2
119 |         # We'll pass a tarball directory to test from-archive and the ability
120 |         # of the image to detect an unpacked directory. We'll use from-file
121 |         # with a directory to test the ability of the image to handle a tarball
122 |         if [[ "$source" == *".tgz" ]]; then
123 | 	    from_flag="--from-archive=$source"
124 |         else
125 | 	    from_flag="--from-file=$source"
126 |         fi
127 |     fi
128 |     if [ "$#" -eq 3 ]; then
129 | 	expect_fail=$3
130 |     else
131 | 	expect_fail=false
132 |     fi
133 |     local tries=0
134 |     local status
135 |     local BUILDNUM
136 | 
137 |     echo "oc start-build $name $from_flag"
138 |     oc start-build $name $from_flag
139 | 
140 | 
141 |     while true; do
142 |         BUILDNUM=$(oc get buildconfig $name --template='{{index .status "lastVersion"}}')
143 | 	if [ "$BUILDNUM" == "0" ]; then
144 | 	    # Buildconfig is brand new, lastVersion hasn't been updated yet
145 | 	    status="starting"
146 | 	else
147 |             status=$(oc get build "$name"-$BUILDNUM --template="{{index .status \"phase\"}}")
148 | 	fi
149 | 	if [ "$status" == "starting" ]; then
150 | 	    echo Build for $name is spinning up, waiting ...
151 | 	    sleep 5
152 | 	elif [ "$status" != "Complete" -a "$status" != "Failed" -a "$status" != "Error" ]; then
153 | 	    echo Build for $name-$BUILDNUM status is $status, waiting ...
154 | 	    sleep 10
155 | 	elif [ "$status" == "Failed" -o "$status" == "Error" ]; then
156 | 	    if [ "$expect_fail" == "true" ]; then
157 | 		return
158 | 	    fi
159 | 	    set +e
160 | 	    oc log buildconfig/$name | grep "Pushing image"
161 | 	    if [ "$?" -eq 0 ]; then
162 | 		tries=$((tries+1))
163 | 		if [ "$tries" -lt 5 ]; then
164 | 		    echo Build failed on push, retrying
165 | 		    sleep 5
166 | 		    oc start-build $name $from_flag
167 | 		    continue
168 | 		fi
169 | 	    fi
170 | 	    oc log buildconfig/$name | tail -100
171 | 	    set -e
172 | 	    return 1
173 | 	else
174 | 	    echo Build for $name-$BUILDNUM status is $status, returning
175 | 	    break
176 | 	fi
177 |     done
178 | }
179 | 
180 | function get_cluster_pod() {
181 |     local count
182 |     count=0
183 | 
184 |     set +e
185 |     while true; do
186 |         POD=$(oc get pod -l deploymentconfig=$1 --template='{{index .items 0 "metadata" "name"}}')
187 |         if [ "$?" -eq 0 ]; then
188 |             break
189 |         fi
190 |         echo Getting cluster pod for $1 failed, trying again
191 |         oc get pods
192 |         sleep 0.5
193 |         count=$((count + 1))
194 |         echo $count
195 |         if [ "$count" -eq 120 ]; then
196 |             return 1
197 |         fi
198 |     done
199 |     set -e
200 | }
201 | 


--------------------------------------------------------------------------------
/test/completed/config-changes.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | THIS=$(readlink -f `dirname "${BASH_SOURCE[0]}"`)
 3 | TOP_DIR=$(echo $THIS | grep -o '.*/openshift-spark')
 4 | 
 5 | source $TOP_DIR/hack/lib/init.sh
 6 | trap os::test::junit::reconcile_output EXIT
 7 | 
 8 | source $TOP_DIR/test/common.sh
 9 | RESOURCE_DIR=$TOP_DIR/test/resources
10 | 
11 | os::test::junit::declare_suite_start "config-changes"
12 | 
13 | # Handles registries, etc, and sets SPARK_IMAGE to the right value
14 | make_image
15 | make_configmap
16 | 
17 | os::cmd::expect_success 'oc new-app --file=$RESOURCE_DIR/test-template.yaml -p MASTER_NAME=master -p WORKER_NAME=worker -p SPARK_IMAGE="$SPARK_IMAGE"'
18 | 
19 | os::cmd::try_until_text 'oc logs dc/master' 'Copying from /etc/config to /opt/spark/conf'
20 | 
21 | os::cmd::try_until_text 'oc logs dc/worker' 'Copying from /etc/config to /opt/spark/conf'
22 | 
23 | #test deletion
24 | os::cmd::try_until_success 'oc delete dc/worker'
25 | 
26 | os::cmd::try_until_success 'oc delete dc/master'
27 | 
28 | #check the pods have been deleted using a label
29 | os::cmd::try_until_text 'oc get pods' 'No resources found.' $((30*second))
30 | 
31 | cleanup_app
32 | 
33 | os::test::junit::declare_suite_end
34 | 


--------------------------------------------------------------------------------
/test/completed/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | THIS=$(readlink -f `dirname "${BASH_SOURCE[0]}"`)
 3 | TOP_DIR=$(echo $THIS | grep -o '.*/openshift-spark')
 4 | 
 5 | source $TOP_DIR/hack/lib/init.sh
 6 | trap os::test::junit::reconcile_output EXIT
 7 | 
 8 | source $TOP_DIR/test/common.sh
 9 | RESOURCE_DIR=$TOP_DIR/test/resources
10 | 
11 | os::test::junit::declare_suite_start "deploy"
12 | 
13 | # Handles registries, etc, and sets SPARK_IMAGE to the right value
14 | make_image
15 | make_configmap
16 | 
17 | os::cmd::expect_success 'oc new-app --file=$RESOURCE_DIR/test-template.yaml -p MASTER_NAME=master -p WORKER_NAME=worker -p SPARK_IMAGE="$SPARK_IMAGE"'
18 | 
19 | #check pods have been created
20 | os::cmd::try_until_text 'oc get pods' 'worker'
21 | 
22 | os::cmd::try_until_text 'oc get pods' 'master'
23 | 
24 | # expose the service
25 | os::cmd::expect_success 'oc expose service/master-webui'
26 | 
27 | # parse the ip
28 | HOST=$(oc get route | grep master-webui | awk '{print $2;}')
29 | 
30 | os::cmd::try_until_text 'curl --silent "$HOST" | grep "Alive Workers" | sed "s,[^0-9],\\ ,g" | tr -d "[:space:]"' "^1$"
31 | 
32 | #test deletion
33 | os::cmd::try_until_success 'oc delete dc/worker'
34 | 
35 | os::cmd::try_until_success 'oc delete dc/master'
36 | 
37 | #check the pods have been deleted using a label
38 | os::cmd::try_until_text 'oc get pods -l deploymentconfig=master' 'No resources found.' $((25*second))
39 | os::cmd::try_until_text 'oc get pods -l deploymentconfig=worker' 'No resources found.' $((25*second))
40 | 
41 | cleanup_app
42 | 
43 | os::test::junit::declare_suite_end
44 | 


--------------------------------------------------------------------------------
/test/completed/deploy_jolokia.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | THIS=$(readlink -f `dirname "${BASH_SOURCE[0]}"`)
 3 | TOP_DIR=$(echo $THIS | grep -o '.*/openshift-spark')
 4 | 
 5 | source $TOP_DIR/hack/lib/init.sh
 6 | trap os::test::junit::reconcile_output EXIT
 7 | 
 8 | source $TOP_DIR/test/common.sh
 9 | RESOURCE_DIR=$TOP_DIR/test/resources
10 | 
11 | os::test::junit::declare_suite_start "deploy_jokokia"
12 | 
13 | # Handles registries, etc, and sets SPARK_IMAGE to the right value
14 | make_image
15 | make_configmap
16 | 
17 | os::cmd::expect_success 'oc new-app --file=$RESOURCE_DIR/test-spark-metrics-template.yaml -p MASTER_NAME=master -p WORKER_NAME=worker -p SPARK_IMAGE="$SPARK_IMAGE" -p SPARK_METRICS_ON=jolokia'
18 | 
19 | # check the master has started the metrics
20 | os::cmd::try_until_text 'oc logs dc/master' 'Starting master with jolokia metrics enabled'
21 | 
22 | # expose the service
23 | os::cmd::expect_success 'oc expose service/master-jolokia'
24 | 
25 | # parse the ip
26 | HOST=$(oc get route | grep master-jolokia | awk '{print $2;}')/jolokia/
27 | echo curling jolokia at $HOST
28 | 
29 | # check its up
30 | os::cmd::try_until_text 'curl --silent --output /dev/null --write-out %{http_code} "$HOST"' '^200$' $((60*second))
31 | 
32 | cleanup_app
33 | 
34 | os::test::junit::declare_suite_end
35 | 


--------------------------------------------------------------------------------
/test/completed/deploy_prometheus.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | THIS=$(readlink -f `dirname "${BASH_SOURCE[0]}"`)
 3 | TOP_DIR=$(echo $THIS | grep -o '.*/openshift-spark')
 4 | 
 5 | source $TOP_DIR/hack/lib/init.sh
 6 | trap os::test::junit::reconcile_output EXIT
 7 | 
 8 | source $TOP_DIR/test/common.sh
 9 | RESOURCE_DIR=$TOP_DIR/test/resources
10 | 
11 | os::test::junit::declare_suite_start "deploy_prom"
12 | 
13 | # Handles registries, etc, and sets SPARK_IMAGE to the right value
14 | make_image
15 | make_configmap
16 | 
17 | os::cmd::expect_success 'oc new-app --file=$RESOURCE_DIR/test-spark-metrics-template.yaml -p MASTER_NAME=master -p WORKER_NAME=worker -p SPARK_IMAGE="$SPARK_IMAGE" -p SPARK_METRICS_ON=prometheus'
18 | 
19 | # check the master has started the metrics
20 | os::cmd::try_until_text 'oc logs dc/master' 'Starting master with prometheus metrics enabled'
21 | 
22 | # expose the service
23 | os::cmd::expect_success 'oc expose service/master-prometheus'
24 | 
25 | # parse the ip
26 | HOST=$(oc get route | grep master-prometheus | awk '{print $2;}')/metrics
27 | echo curling prometheus at $HOST
28 | 
29 | # check its up
30 | os::cmd::try_until_text 'curl --silent --output /dev/null --write-out %{http_code} "$HOST"' '^200$' $((60*second))
31 | 
32 | cleanup_app
33 | 
34 | os::test::junit::declare_suite_end
35 | 


--------------------------------------------------------------------------------
/test/incomplete/app_fail.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | THIS=$(readlink -f `dirname "${BASH_SOURCE[0]}"`)
 3 | TOP_DIR=$(echo $THIS | grep -o '.*/openshift-spark')
 4 | 
 5 | source $TOP_DIR/hack/lib/init.sh
 6 | trap os::test::junit::reconcile_output EXIT
 7 | 
 8 | source $TOP_DIR/test/common.sh
 9 | RESOURCE_DIR=$TOP_DIR/test/resources
10 | 
11 | os::test::junit::declare_suite_start "app_fail"
12 | 
13 | # Handles registries, etc, and sets SPARK_IMAGE to the right value
14 | make_image
15 | make_configmap
16 | 
17 | os::cmd::expect_success 'oc new-app --file=$RESOURCE_DIR/test-template.yaml -p MASTER_NAME=master -p WORKER_NAME=worker -p SPARK_IMAGE="$SPARK_IMAGE"'
18 | 
19 | # If a user tries to use the image as a cluster image without completion, the usage script should run
20 | get_cluster_pod master
21 | os::cmd::try_until_text 'oc logs $POD' 'This is an incomplete openshift-spark image'
22 | 
23 | get_cluster_pod worker
24 | os::cmd::try_until_text 'oc logs $POD' 'This is an incomplete openshift-spark image'
25 | 
26 | cleanup_app
27 | 
28 | os::test::junit::declare_suite_end
29 | 


--------------------------------------------------------------------------------
/test/localcomplete.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script creates completed versions of the incomplete images
 4 | # and tags them into the local docker daemon so that the "completed"
 5 | # suite of tests can be run on them (the same tests are run on the
 6 | # "full" images as well)
 7 | 
 8 | function poll_binary_build() {
 9 |     local name=$1
10 |     local tries=0
11 |     local status
12 |     local BUILDNUM
13 | 
14 |     oc start-build $name --from-file=$RESOURCE_DIR/spark-inputs
15 | 
16 |     while true; do
17 |         BUILDNUM=$(oc get buildconfig $name --template='{{index .status "lastVersion"}}')
18 | 	if [ "$BUILDNUM" == "0" ]; then
19 | 	    # Buildconfig is brand new, lastVersion hasn't been updated yet
20 | 	    status="starting"
21 | 	else
22 |             status=$(oc get build "$name"-$BUILDNUM --template="{{index .status \"phase\"}}")
23 | 	fi
24 | 	if [ "$status" == "starting" ]; then
25 | 	    echo Build for $name is spinning up, waiting ...
26 | 	    sleep 5
27 | 	elif [ "$status" != "Complete" -a "$status" != "Failed" -a "$status" != "Error" ]; then
28 | 	    echo Build for $name-$BUILDNUM status is $status, waiting ...
29 | 	    sleep 10
30 | 	elif [ "$status" == "Failed" -o "$status" == "Error" ]; then
31 | 	    set +e
32 | 	    oc log buildconfig/$name | grep "Pushing image"
33 | 	    if [ "$?" -eq 0 ]; then
34 | 		tries=$((tries+1))
35 | 		if [ "$tries" -lt 5 ]; then
36 | 		    echo Build failed on push, retrying
37 | 		    sleep 5
38 | 		    oc start-build $name --from-file=$RESOURCE_DIR/spark-inputs
39 | 		    continue
40 | 		fi
41 | 	    fi
42 | 	    oc log buildconfig/$name | tail -100
43 | 	    set -e
44 | 	    return 1
45 | 	else
46 | 	    echo Build for $name-$BUILDNUM status is $status, returning
47 | 	    break
48 | 	fi
49 |     done
50 | }
51 | 
52 | RESOURCE_DIR=$(readlink -f `dirname "${BASH_SOURCE[0]}"` | grep -o '.*/openshift-spark/test')/resources
53 | 
54 | oc new-build --name=$2 --docker-image=$1 --binary
55 | 
56 | poll_binary_build $2 
57 | 
58 | id=$(docker images | grep $2 | head -n1 | awk '{print $3}')
59 | echo docker tag "$id" "$2":latest
60 | docker tag "$id" $2:latest
61 | 


--------------------------------------------------------------------------------
/test/prepare.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copies oc binary out of official openshift origin image
 4 | # Note:  this expects the OPENSHIFT_VERSION env variable to be set.
 5 | function download_openshift() {
 6 |   echo "Downloading oc binary for OPENSHIFT_VERSION=${OPENSHIFT_VERSION}"
 7 |   sudo docker cp $(docker create docker.io/openshift/origin:$OPENSHIFT_VERSION):/bin/oc /usr/local/bin/oc
 8 |   oc version
 9 | }
10 | 
11 | function setup_insecure_registry() {
12 | # add insecure-registry and restart docker
13 |  sudo cat /etc/default/docker
14 |  sudo service docker stop
15 |  sudo sed -i -e 's/sock/sock --insecure-registry 172.30.0.0\/16/' /etc/default/docker
16 |  sudo cat /etc/default/docker
17 |  sudo service docker start
18 |  sudo service docker status
19 | }
20 | 
21 | function start_and_verify_openshift() {
22 |   # Sometimes oc cluster up fails with a permission error and works when the test is relaunched.
23 |   # See if a retry within the same test works
24 |   set +e
25 |   built=false
26 |   while true; do
27 |       oc cluster up --base-dir=/home/travis/gopath/src/github.com/radanalyticsio/origin
28 |       if [ "$?" -eq 0 ]; then
29 |           ./travis-check-pods.sh
30 |           if [ "$?" -eq 0 ]; then
31 |               built=true
32 |               break
33 |           fi
34 |       fi
35 |       echo "Retrying oc cluster up after failure"
36 |       oc cluster down
37 |       sleep 5
38 |   done
39 |   set -e
40 |   if [ "$built" == false ]; then
41 |       exit 1
42 |   fi
43 |   # travis-check-pods.sh left us in the default project
44 |   oc project myproject
45 | }
46 | 
47 | setup_insecure_registry
48 | download_openshift
49 | start_and_verify_openshift
50 | 


--------------------------------------------------------------------------------
/test/resources/config/log4j.properties:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Set everything to be logged to the console
19 | log4j.rootCategory=DEBUG, console
20 | log4j.appender.console=org.apache.log4j.ConsoleAppender
21 | log4j.appender.console.target=System.err
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
24 | 
25 | # Set the default spark-shell log level to WARN. When running the spark-shell, the
26 | # log level for this class is used to overwrite the root logger's log level, so that
27 | # the user can have different defaults for the shell and regular Spark apps.
28 | log4j.logger.org.apache.spark.repl.Main=DEBUG
29 | 
30 | # Settings to quiet third party logs that are too verbose
31 | log4j.logger.org.spark_project.jetty=DEBUG
32 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=DEBUG
33 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=DEBUG
34 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=DEBUG
35 | log4j.logger.org.apache.parquet=DEBUG
36 | log4j.logger.parquet=DEBUG
37 | 
38 | # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
39 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
40 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
41 | 


--------------------------------------------------------------------------------
/test/resources/config/spark-defaults.conf:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Default system properties included when running spark-submit.
19 | # This is useful for setting default environmental settings.
20 | 
21 | # Example:
22 | # spark.master                     spark://master:7077
23 | # spark.eventLog.enabled           true
24 | # spark.eventLog.dir               hdfs://namenode:8021/directory
25 | # spark.serializer                 org.apache.spark.serializer.KryoSerializer
26 | # spark.driver.memory              5g
27 | # spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
28 | spark.ui.reverseProxy              false
29 | spark.ui.reverseProxyUrl           /
30 | 


--------------------------------------------------------------------------------
/test/resources/test-configmap.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: configMap
3 | metadata:
4 |   name: test-config
5 |   namespace: default
6 | data:
7 | 


--------------------------------------------------------------------------------
/test/resources/test-spark-metrics-template.yaml:
--------------------------------------------------------------------------------
  1 | kind: Template
  2 | apiVersion: v1
  3 | template: spark
  4 | metadata:
  5 |   name: spark
  6 | objects:
  7 | 
  8 | - kind: Service
  9 |   apiVersion: v1
 10 |   metadata:
 11 |     name: ${MASTER_NAME}-${SPARK_METRICS_ON}
 12 |     labels:
 13 |       name: ${MASTER_NAME}
 14 |   spec:
 15 |     ports:
 16 |       - protocol: TCP
 17 |         port: 7777
 18 |         targetPort: 7777
 19 |     selector:
 20 |       name: ${MASTER_NAME}
 21 | 
 22 | - kind: Service
 23 |   apiVersion: v1
 24 |   metadata:
 25 |     name: ${MASTER_NAME}
 26 |     labels:
 27 |       name: ${MASTER_NAME}
 28 |   spec:
 29 |     ports:
 30 |       - protocol: TCP
 31 |         port: 7077
 32 |         targetPort: 7077
 33 |     selector:
 34 |       name: ${MASTER_NAME}
 35 | 
 36 | - kind: Service
 37 |   apiVersion: v1
 38 |   metadata:
 39 |     name: ${MASTER_NAME}-webui
 40 |     labels:
 41 |       name: ${MASTER_NAME}
 42 |   spec:
 43 |     ports:
 44 |       - protocol: TCP
 45 |         port: 8080
 46 |         targetPort: 8080
 47 |     selector:
 48 |       name: ${MASTER_NAME}
 49 | 
 50 | - kind: DeploymentConfig
 51 |   apiVersion: v1
 52 |   metadata:
 53 |     name: ${MASTER_NAME}
 54 |   spec:
 55 |     strategy:
 56 |       type: Rolling
 57 |     triggers:
 58 |       - type: ConfigChange
 59 |     replicas: 1
 60 |     selector:
 61 |       name: ${MASTER_NAME}
 62 |     template:
 63 |       metadata:
 64 |         labels:
 65 |           name: ${MASTER_NAME}
 66 |       spec:
 67 |         containers:
 68 |           - name: ${MASTER_NAME}
 69 |             image: ${SPARK_IMAGE}
 70 |             imagePullPolicy: IfNotPresent
 71 |             env:
 72 |               - name: SPARK_MASTER_PORT
 73 |                 value: "7077"
 74 |               - name: SPARK_MASTER_WEBUI_PORT
 75 |                 value: "8080"
 76 |               - name: SPARK_METRICS_ON
 77 |                 value: ${SPARK_METRICS_ON}
 78 |               - name: SPARK_USER
 79 |                 value: admin
 80 |             ports:
 81 |               - containerPort: 7077
 82 |                 protocol: TCP
 83 |               - containerPort: 7777
 84 |                 protocol: TCP
 85 |               - containerPort: 8080
 86 |                 protocol: TCP
 87 | 
 88 | - kind: DeploymentConfig
 89 |   apiVersion: v1
 90 |   metadata:
 91 |     name: ${WORKER_NAME}
 92 |   spec:
 93 |     strategy:
 94 |       type: Rolling
 95 |     triggers:
 96 |       - type: ConfigChange
 97 |     replicas: 1
 98 |     selector:
 99 |       name: ${WORKER_NAME}
100 |     template:
101 |       metadata:
102 |         labels:
103 |           name: ${WORKER_NAME}
104 |       spec:
105 |         containers:
106 |           - name: ${WORKER_NAME}
107 |             image: ${SPARK_IMAGE}
108 |             imagePullPolicy: IfNotPresent
109 |             env:
110 |               - name: SPARK_METRICS_ON
111 |                 value: ${SPARK_METRICS_ON}
112 |               - name: SPARK_MASTER_ADDRESS
113 |                 value: spark://${MASTER_NAME}:7077
114 |               - name: SPARK_MASTER_UI_ADDRESS
115 |                 value: http://${MASTER_NAME}-webui:8080
116 |               - name: SPARK_USER
117 |                 value: admin
118 | parameters:
119 | - name: SPARK_IMAGE
120 |   description: Name of the Spark master/worker image
121 |   value: radanalyticsio/openshift-spark:2.2-latest
122 | - name: MASTER_NAME
123 |   description: Master name used as a service name and a selector
124 |   generate: expression
125 |   from: "spark-master-[a-z0-9]{4}"
126 |   required: true
127 | - name: WORKER_NAME
128 |   description: Worker name used as a selector
129 |   generate: expression
130 |   from: "spark-worker-[a-z0-9]{4}"
131 |   required: true
132 | - name: SPARK_METRICS_ON
133 |   description: Enable metrics services. The default value is "jolokia" (deprecated), consider setting to "prometheus" instead.
134 |   value: "jolokia"
135 |   required: true
136 | 


--------------------------------------------------------------------------------
/test/resources/test-template.yaml:
--------------------------------------------------------------------------------
  1 | kind: Template
  2 | apiVersion: v1
  3 | template: spark
  4 | metadata:
  5 |   name: spark
  6 | labels:
  7 |   app: sparkcluster
  8 | objects:
  9 | 
 10 | - kind: Service
 11 |   apiVersion: v1
 12 |   metadata:
 13 |     name: ${MASTER_NAME}
 14 |     labels:
 15 |       name: ${MASTER_NAME}
 16 |   spec:
 17 |     ports:
 18 |       - protocol: TCP
 19 |         port: 7077
 20 |         targetPort: 7077
 21 |     selector:
 22 |       name: ${MASTER_NAME}
 23 | - kind: Service
 24 |   apiVersion: v1
 25 |   metadata:
 26 |     name: ${MASTER_NAME}-webui
 27 |     labels:
 28 |       name: ${MASTER_NAME}
 29 |   spec:
 30 |     ports:
 31 |       - protocol: TCP
 32 |         port: 8080
 33 |         targetPort: 8080
 34 |     selector:
 35 |       name: ${MASTER_NAME}
 36 | 
 37 | - kind: DeploymentConfig
 38 |   apiVersion: v1
 39 |   metadata:
 40 |     name: ${MASTER_NAME}
 41 |   spec:
 42 |     strategy:
 43 |       type: Rolling
 44 |     triggers:
 45 |       - type: ConfigChange
 46 |     replicas: 1
 47 |     selector:
 48 |       name: ${MASTER_NAME}
 49 |     template:
 50 |       metadata:
 51 |         labels:
 52 |           name: ${MASTER_NAME}
 53 |       spec:
 54 |         containers:
 55 |           - name: ${MASTER_NAME}
 56 |             image: ${SPARK_IMAGE}
 57 |             imagePullPolicy: IfNotPresent
 58 |             env:
 59 |               - name: SPARK_MASTER_PORT
 60 |                 value: "7077"
 61 |               - name: SPARK_MASTER_WEBUI_PORT
 62 |                 value: "8080"
 63 |               - name: UPDATE_SPARK_CONF_DIR
 64 |                 value: /etc/config
 65 |             ports:
 66 |               - containerPort: 7077
 67 |                 protocol: TCP
 68 |               - containerPort: 8080
 69 |                 protocol: TCP
 70 |             volumeMounts:
 71 |               - name: config-volume
 72 |                 mountPath: /etc/config
 73 |         volumes:
 74 |           - name: config-volume
 75 |             configMap:
 76 |               name: test-config
 77 | 
 78 | - kind: DeploymentConfig
 79 |   apiVersion: v1
 80 |   metadata:
 81 |     name: ${WORKER_NAME}
 82 |   spec:
 83 |     strategy:
 84 |       type: Rolling
 85 |     triggers:
 86 |       - type: ConfigChange
 87 |     replicas: 1
 88 |     selector:
 89 |       name: ${WORKER_NAME}
 90 |     template:
 91 |       metadata:
 92 |         labels:
 93 |           name: ${WORKER_NAME}
 94 |       spec:
 95 |         containers:
 96 |           - name: ${WORKER_NAME}
 97 |             image: ${SPARK_IMAGE}
 98 |             imagePullPolicy: IfNotPresent
 99 |             env:
100 |               - name: SPARK_MASTER_ADDRESS
101 |                 value: spark://${MASTER_NAME}:7077
102 |               - name: SPARK_MASTER_UI_ADDRESS
103 |                 value: http://${MASTER_NAME}-webui:8080
104 |               - name: UPDATE_SPARK_CONF_DIR
105 |                 value: /etc/config
106 |             volumeMounts:
107 |               - name: config-volume
108 |                 mountPath: /etc/config
109 |         volumes:
110 |           - name: config-volume
111 |             configMap:
112 |               name: test-config
113 | parameters:
114 | - name: SPARK_IMAGE
115 |   description: Name of the Spark master/worker image
116 |   value: radanalyticsio/openshift-spark:2.2-latest
117 | - name: MASTER_NAME
118 |   description: master name used as a service name and a selector
119 |   generate: expression
120 |   from: "spark-master-[a-z0-9]{4}"
121 |   required: true
122 | - name: WORKER_NAME
123 |   description: worker name used as a selector
124 |   generate: expression
125 |   from: "spark-worker-[a-z0-9]{4}"
126 |   required: true
127 | 


--------------------------------------------------------------------------------
/test/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | STARTTIME=$(date +%s)
  3 | 
  4 | # Sourcing common will source hack/lib/init.sh
  5 | source "$(dirname "${BASH_SOURCE}")/../hack/lib/init.sh"
  6 | 
  7 | os::util::environment::setup_time_vars
  8 | 
  9 | function cleanup()
 10 | {
 11 |     out=$?
 12 |     set +e
 13 | 
 14 |     pkill -P $$
 15 |     kill_all_processes
 16 | 
 17 |     os::test::junit::reconcile_output
 18 | 
 19 |     ENDTIME=$(date +%s); echo "$0 took $(($ENDTIME - $STARTTIME)) seconds"
 20 |     os::log::info "Exiting with ${out}"
 21 |     exit $out
 22 | }
 23 | 
 24 | trap "exit" INT TERM
 25 | trap "cleanup" EXIT
 26 | 
 27 | function find_tests() {
 28 |     local test_regex="${2}"
 29 |     local full_test_list=()
 30 |     local selected_tests=()
 31 | 
 32 |     full_test_list=($(find "${1}" -maxdepth 1 -name '*.sh'))
 33 |     if [ "${#full_test_list[@]}" -eq 0 ]; then
 34 |         return 0
 35 |     fi
 36 |     for test in "${full_test_list[@]}"; do
 37 | 	test_rel_path=${test#${test::1}*openshift-spark/test/cmd}
 38 |         if grep -q -E "${test_regex}" <<< "${test_rel_path}"; then
 39 |             selected_tests+=( "${test}" )
 40 |         fi
 41 |     done
 42 | 
 43 |     if [ "${#selected_tests[@]}" -eq 0 ]; then
 44 |         os::log::info "No tests were selected by regex in "${1}
 45 |         return 1
 46 |     else
 47 |         echo "${selected_tests[@]}"
 48 |     fi
 49 | }
 50 | 
 51 | orig_project=$(oc project -q)
 52 | failed_list=""
 53 | failed=false
 54 | 
 55 | dirs=($(find "${OS_ROOT}/test/" -mindepth 1 -type d -not -path "./resources*"))
 56 | for dir in "${dirs[@]}"; do
 57 | 
 58 |     failed_dir=false
 59 | 
 60 |     # Get the list of test files in the current directory
 61 |     set +e
 62 |     output=$(find_tests $dir ${1:-.*})
 63 |     res=$?
 64 |     set -e
 65 |     if [ "$res" -ne 0 ]; then
 66 |         echo $output
 67 |         continue
 68 |     fi
 69 | 
 70 |     # Turn the list of tests into an array and check the length, skip if zero
 71 |     tests=($(echo "$output"))
 72 |     if [ "${#tests[@]}" -eq 0 ]; then
 73 |         continue
 74 |     fi
 75 | 
 76 |     # Create the project here
 77 |     name=$(basename ${dir} .sh)
 78 |     set +e # For some reason the result here from head is not 0 even though we get the desired result
 79 |     namespace=${name}-$(date -Ins | md5sum | tr -dc 'a-z0-9' | fold -w 6 | head -n 1)
 80 |     set -e
 81 |     oc new-project $namespace &> /dev/null
 82 |     oc create sa oshinko &> /dev/null
 83 |     oc policy add-role-to-user admin system:serviceaccount:$namespace:oshinko &> /dev/null
 84 |     echo "++++++ ${dir}"
 85 |     echo Using project $namespace
 86 | 
 87 |     for test in "${tests[@]}"; do
 88 |         echo
 89 |         echo "++++ ${test}"
 90 |         if ! ${test}; then
 91 |             echo "failed: ${test}"
 92 |             failed=true
 93 |             failed_dir=true
 94 |             failed_list=$failed_list'\n\t'$test
 95 |         fi
 96 |     done
 97 |     if [ "$failed_dir" == true -a ${S2I_SAVE_FAIL:-false} == true ]; then
 98 |         echo Leaving project $namespace because of failures
 99 |     else
100 |         oc delete project $namespace
101 |     fi
102 | done
103 | 
104 | oc project $orig_project
105 | if [ "$failed" == true ]; then
106 |     echo "One or more tests failed:"
107 |     echo -e $failed_list'\n'
108 |     exit 1
109 | fi
110 | 


--------------------------------------------------------------------------------
/test/sparkinputs.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -x
 3 | TOP_DIR=$(readlink -f `dirname "${BASH_SOURCE[0]}"` | grep -o '.*/openshift-spark/')
 4 | BUILD_DIR=$TOP_DIR/openshift-spark-build
 5 | 
 6 | # See what spark version the image build used
 7 | fullname=$(find $BUILD_DIR -name "spark-[0-9.]*\.tgz")
 8 | 
 9 | # Download the same version to use as a binary build input
10 | filename=$(basename $fullname)
11 | version=$(echo $filename | cut -d '-' -f2)
12 | mkdir -p $TOP_DIR/test/resources/spark-inputs
13 | pushd $TOP_DIR/test/resources/spark-inputs
14 | if ! [ -f "spark-$version-bin-hadoop2.7.tgz" ]; then
15 |     wget https://archive.apache.org/dist/spark/spark-$version/spark-$version-bin-hadoop2.7.tgz
16 | fi
17 | if ! [ -f "spark-$version-bin-hadoop2.7.tgz.sha512" ]; then
18 |     wget https://archive.apache.org/dist/spark/spark-$version/spark-$version-bin-hadoop2.7.tgz.sha512
19 | fi
20 | echo "spark-$version-bin-hadoop2.7.tgz: FF FF FF FF FF FF CA FE  BE EF CA FE BE EF CA FE" > spark-$version-bin-hadoop2.7.tgz.bad
21 | popd
22 | 
23 | # Make a fake tarball that is missing spark-submit
24 | mkdir -p $TOP_DIR/test/resources/spark-inputs-no-submit
25 | pushd $TOP_DIR/test/resources/spark-inputs-no-submit
26 | mkdir spark-$version-bin-hadoop2.7
27 | touch spark-$version-bin-hadoop2.7/foo
28 | tar -czf spark-$version-bin-hadoop2.7.tgz spark-$version-bin-hadoop2.7
29 | rm -rf spark-$version-bin-hadoop2.7
30 | popd
31 | 
32 | # Make a fake tarball with a spark-submit that returns an error
33 | mkdir -p $TOP_DIR/test/resources/spark-inputs-bad-submit
34 | pushd $TOP_DIR/test/resources/spark-inputs-bad-submit
35 | mkdir -p spark-$version-bin-hadoop2.7/bin
36 | echo "#!/bin/bash" > spark-$version-bin-hadoop2.7/bin/spark-submit
37 | echo "exit 1" >> spark-$version-bin-hadoop2.7/bin/spark-submit
38 | chmod +x spark-$version-bin-hadoop2.7/bin/spark-submit
39 | tar -czf spark-$version-bin-hadoop2.7.tgz spark-$version-bin-hadoop2.7
40 | rm -rf spark-$version-bin-hadoop2.7
41 | popd
42 | 
43 | # Make a fake tarball with a spark-submit that returns success
44 | # Also include some config files so we can test copy-if-not-overwrite
45 | mkdir -p $TOP_DIR/test/resources/spark-inputs-with-conf
46 | pushd $TOP_DIR/test/resources/spark-inputs-with-conf
47 | mkdir -p spark-$version-bin-hadoop2.7/bin
48 | echo "#!/bin/bash" > spark-$version-bin-hadoop2.7/bin/spark-submit
49 | echo "exit 0" >> spark-$version-bin-hadoop2.7/bin/spark-submit
50 | chmod +x spark-$version-bin-hadoop2.7/bin/spark-submit
51 | mkdir -p spark-$version-bin-hadoop2.7/conf
52 | touch spark-$version-bin-hadoop2.7/conf/spark-defaults.conf
53 | touch spark-$version-bin-hadoop2.7/conf/log4j.properties
54 | tar -czf spark-$version-bin-hadoop2.7.tgz spark-$version-bin-hadoop2.7
55 | rm -rf spark-$version-bin-hadoop2.7
56 | popd
57 | 


--------------------------------------------------------------------------------
/travis-check-pods.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | oc login -u system:admin
 4 | oc project default
 5 | 
 6 | while true; do
 7 |     V=$(oc get dc docker-registry --template='{{index .status "latestVersion"}}')
 8 |     P=$(oc get pod docker-registry-$V-deploy --template='{{index .status "phase"}}')
 9 |     if [ "$?" -eq 0 ]; then
10 |         echo phase is $P for docker-registry deploy $V
11 |         if [ "$P" == "Failed" ]; then
12 |             echo "registry deploy failed, try again"
13 |             oc get pods
14 |             oc rollout retry dc/docker-registry
15 |             sleep 10
16 |             continue
17 |         fi
18 |     fi
19 |     REG=$(oc get pod -l deploymentconfig=docker-registry --template='{{index .items 0 "status" "phase"}}')
20 |     if [ "$?" -eq 0 ]; then
21 |         break
22 |     fi
23 |     oc get pods
24 |     echo "Waiting for registry pod"
25 |     sleep 10
26 | done
27 | 
28 | while true; do
29 |     REG=$(oc get pod -l deploymentconfig=docker-registry --template='{{index .items 0 "status" "phase"}}')
30 |     if [ "$?" -ne 0 -o "$REG" == "Error" ]; then
31 |         echo "Registy pod is in error state..."
32 |         exit 1
33 |     fi
34 |     if [ "$REG" == "Running" ]; then
35 |         break
36 |     fi
37 |     sleep 5
38 | done
39 | 
40 | while true; do
41 |     V=$(oc get dc router --template='{{index .status "latestVersion"}}')
42 |     P=$(oc get pod router-$V-deploy --template='{{index .status "phase"}}')
43 |     if [ "$?" -eq 0 ]; then
44 |         echo phase is $P for router deploy $V
45 |         if [ "$P" == "Failed" ]; then
46 |             echo "router deploy failed, try again"
47 |             oc get pods
48 |             oc rollout retry dc/router
49 |             sleep 10
50 |             continue
51 |         fi
52 |     fi
53 |     REG=$(oc get pod -l deploymentconfig=router --template='{{index .items 0 "status" "phase"}}')
54 |     if [ "$?" -eq 0 ]; then
55 |         break
56 |     fi
57 |     oc get pods
58 |     echo "Waiting for router pod"
59 |     sleep 10
60 | done
61 | 
62 | 
63 | while true; do
64 |     REG=$(oc get pod -l deploymentconfig=router --template='{{index .items 0 "status" "phase"}}')
65 |     if [ "$?" -ne 0 -o "$REG" == "Error" ]; then
66 |         echo "Router pod is in error state..."
67 |         exit 1
68 |     fi
69 |     if [ "$REG" == "Running" ]; then
70 |         break
71 |     fi
72 |     sleep 5
73 | done
74 | echo "Registry and router pods are okay"
75 | 


--------------------------------------------------------------------------------