├── tests └── project-a │ ├── project │ ├── datasets │ │ └── .gitignore │ ├── metadata │ │ ├── workflow-run-configuration │ │ │ └── classic.json │ │ └── pipeline-run-configuration │ │ │ └── classic.json │ ├── project-config.json │ └── pipelines-and-workflows │ │ ├── simple.hpl │ │ ├── check-db-connection.hwf │ │ └── main.hwf │ ├── jdbc-drivers │ ├── .gitignore │ └── place-your-jdbc-drivers-here.md │ ├── .gitignore │ ├── config │ ├── project-a-dev.json │ └── project-a-test.json │ └── .bash_history ├── .gitignore ├── examples └── kubernetes │ ├── create-gcp-k8s-hop-cluster.sh │ ├── hop-deployment.yaml │ └── hop-job.yaml ├── resources ├── run.sh ├── get-hop.sh └── load-and-execute.sh ├── Jenkinsfile ├── DEVELOPER-NOTES.md ├── Dockerfile └── README.md /tests/project-a/project/datasets/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/project-a/jdbc-drivers/.gitignore: -------------------------------------------------------------------------------- 1 | *.jar 2 | -------------------------------------------------------------------------------- /tests/project-a/jdbc-drivers/place-your-jdbc-drivers-here.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/project-a/.gitignore: -------------------------------------------------------------------------------- 1 | config/hop/.hoprc 2 | config/hop/config/.hoprc 3 | config/hop/config/history/ 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | config/hop/.hoprc 3 | config/hop/config/.hoprc 4 | config/hop/config/history/ 5 | resources/hop-assemblies-client* 6 | resources/latest_downloaded_version.info 7 | resources/hop/ 8 | -------------------------------------------------------------------------------- /tests/project-a/project/metadata/workflow-run-configuration/classic.json: -------------------------------------------------------------------------------- 1 | { 2 | "engineRunConfiguration": { 3 | "Local": { 4 | "safe_mode": false 5 | } 6 | }, 7 | "name": "classic", 8 | "description": "" 9 | } -------------------------------------------------------------------------------- /tests/project-a/config/project-a-dev.json: -------------------------------------------------------------------------------- 1 | { 2 | "variables" : [ { 3 | "name" : "VAR_ENV_TEST1", 4 | "value" : "c", 5 | "description" : "" 6 | }, { 7 | "name" : "VAR_ENV_TEST2", 8 | "value" : "d", 9 | "description" : "" 10 | } ] 11 | } -------------------------------------------------------------------------------- /tests/project-a/config/project-a-test.json: -------------------------------------------------------------------------------- 1 | { 2 | "variables" : [ { 3 | "name" : "VAR_ENV_TEST1", 4 | "value" : "c", 5 | "description" : "" 6 | }, { 7 | "name" : "VAR_ENV_TEST2", 8 | "value" : "d", 9 | "description" : "" 10 | } ] 11 | } -------------------------------------------------------------------------------- /tests/project-a/project/metadata/pipeline-run-configuration/classic.json: -------------------------------------------------------------------------------- 1 | { 2 | "engineRunConfiguration": { 3 | "Local": { 4 | "feedback_size": "50000", 5 | "rowset_size": "10000", 6 | "safe_mode": false, 7 | "show_feedback": false, 8 | "topo_sort": false, 9 | "gather_metrics": false 10 | } 11 | }, 12 | "name": "classic", 13 | "configurationVariables": [], 14 | "description": "" 15 | } -------------------------------------------------------------------------------- /tests/project-a/project/project-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadataBaseFolder" : "${PROJECT_HOME}/metadata", 3 | "unitTestsBasePath" : "${PROJECT_HOME}", 4 | "dataSetsCsvFolder" : "${PROJECT_HOME}/datasets", 5 | "enforcingExecutionInHome" : true, 6 | "config" : { 7 | "variables" : [ { 8 | "name" : "VAR_PROJECT_TEST1", 9 | "value" : "a" 10 | }, { 11 | "name" : "VAR_PROJECT_TEST2", 12 | "value" : "b" 13 | } ] 14 | } 15 | } -------------------------------------------------------------------------------- /examples/kubernetes/create-gcp-k8s-hop-cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # create project 3 | gcloud projects create k8s-project-hop 4 | # OR if project already exist, set project id 5 | gcloud config set project k8s-project-hop 6 | # set compute zone 7 | # list of available zones 8 | # https://cloud.google.com/compute/docs/regions-zones/#available 9 | gcloud config set compute/zone us-west1-a 10 | # create kubernetes engine cluster 11 | # running command again after enabling API 12 | gcloud container clusters create project-hop-cluster \ 13 | --machine-type=n1-standard-2 \ 14 | --num-nodes=1 15 | # get authentication credentials to interact with cluster 16 | gcloud container clusters get-credentials project-hop-cluster 17 | 18 | gcloud container clusters list 19 | kubectl get nodes 20 | 21 | ## PROJECT HOP SETUP -- START ## 22 | kubectl apply -f hop-job.yaml 23 | kubectl apply -f hop-deployment.yaml -------------------------------------------------------------------------------- /resources/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | # 5 | # When stopping the running hop-server container with 'docker stop' it ended up with an timeout 6 | # and an exitcode > 0 because signals are not catched correctly. 7 | # It was not possible to end the container gracefully with Ctrl-C when it was started without -d 8 | # option which might be annoying on a gratefull exit. Therefore this script ( run.sh ) 9 | # that catches signals coming from the docker host is introduced. 10 | # 11 | 12 | log() { 13 | echo `date '+%Y/%m/%d %H:%M:%S'`" - ${1}" 14 | } 15 | 16 | # 17 | # catch all signals that come from outside the container 18 | # to be able to exit gracefully 19 | # 20 | trapper() { 21 | "$@" & 22 | pid="$!" 23 | log "Running the entrypoint script with PID ${pid}" 24 | trap "log 'Stopping entrypoint script with $pid'; kill -SIGTERM $pid" SIGINT SIGTERM 25 | 26 | while kill -0 $pid > /dev/null 2>&1; do 27 | wait 28 | done 29 | } 30 | 31 | trapper /opt/project-hop/load-and-execute.sh $@ 32 | -------------------------------------------------------------------------------- /tests/project-a/.bash_history: -------------------------------------------------------------------------------- 1 | ls -la 2 | pwd 3 | exit 4 | ls exit 5 | exit 6 | ls -la 7 | hop-run.sh --file=/opt/hop/workspace/pipelines-and-workflows/main.hwf --runconfig=classic --parameters=PARAM_TEST=Hello 8 | ./hop-run.sh --file=/home/hop/workspace/pipelines-and-workflows/main.hwf --runconfig=classic --parameters=PARAM_TEST=Hello 9 | hop-run.sh --file=/home/hop/workspace/pipelines-and-workflows/main.hwf --runconfig=classic --parameters=PARAM_TEST=Hello 10 | ./hop-run.sh --file=/home/hop/pipelines-and-workflows/main.hwf --runconfig=classic --parameters=PARAM_TEST=Hello 11 | /hop-run.sh --file=/home/hop/pipelines-and-workflows/main.hwf --runconfig=classic --parameters=PARAM_TEST=Hello 12 | hop-run.sh --file=/home/hop/pipelines-and-workflows/main.hwf --runconfig=classic --parameters=PARAM_TEST=Hello 13 | exit 14 | pwd 15 | exit 16 | pwd 17 | cd ~ 18 | ls -la 19 | echo $HOP_CONFIG_DIRECTORY 20 | echo $HOP_RUN_CONFIG 21 | export HOP_CONFIG_DIRECTORY=~/config/hop/config/ 22 | cd /opt/project-hop/hop/ 23 | ./hop-run.sh --file=/home/hop/pipelines-and-workflows/main.hwf --environment=project-a-dev --runconfig=classic --parameters=PARAM_LOG_MESSAGE=Hello,PARAM_WAIT_FOR_X_MINUTES=1 24 | exit 25 | -------------------------------------------------------------------------------- /examples/kubernetes/hop-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: hop-deployment 5 | labels: 6 | app: hop 7 | spec: 8 | replicas: 1 9 | selector: 10 | matchLabels: 11 | app: hop 12 | template: 13 | metadata: 14 | labels: 15 | app: hop 16 | spec: 17 | initContainers: 18 | - name: clone-git-repo 19 | image: alpine/git 20 | volumeMounts: 21 | - name: git-repo-volume 22 | mountPath: /tmp/git-repo 23 | readOnly: false 24 | command: ['sh', '-c', 25 | 'cd /tmp/git-repo; git clone https://github.com/diethardsteiner/hop-docker.git; chmod -R 777 *; echo $(ls -la)'] 26 | containers: 27 | - name: hop-server 28 | image: diethardsteiner/project-hop:0.20-20200505.141953-75 29 | volumeMounts: 30 | - name: git-repo-volume 31 | mountPath: /files 32 | readOnly: false 33 | env: 34 | - name: HOP_LOG_LEVEL 35 | value: "Basic" 36 | - name: HOP_CONFIG_DIRECTORY 37 | value: "/files/hop-docker/project-a/config/hop/config" 38 | resources: 39 | requests: 40 | memory: "4Gi" 41 | cpu: "1" 42 | volumes: 43 | - name: git-repo-volume 44 | emptyDir: {} -------------------------------------------------------------------------------- /resources/get-hop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script used to fetch the latest snapshot version of project hop 3 | 4 | set -ex 5 | 6 | #Branch name variable 7 | echo Branch Parameter: ${BRANCH_NAME} 8 | 9 | # Artifactory location 10 | SERVER=https://repository.apache.org/content/repositories 11 | 12 | # Use Snapshot when branch is master else latest release 13 | if [[ "${BRANCH_NAME}" = "master" ]] 14 | then 15 | REPO=snapshots 16 | else 17 | REPO=releases 18 | fi 19 | 20 | # Maven artifact location 21 | NAME=hop-client 22 | ARTIFACT=org/apache/hop/${NAME} 23 | URL_PATH=${SERVER}/${REPO}/${ARTIFACT} 24 | VERSION=$( curl -s "${URL_PATH}/maven-metadata.xml" -o - | grep '' | sed 's/.*\([^<]*\)<\/version>.*/\1/' ) 25 | echo version: ${VERSION} 26 | BUILD=$( curl -s "${URL_PATH}/${VERSION}/maven-metadata.xml" | grep '' | head -1 | sed 's/.*\([^<]*\)<\/value>.*/\1/' ) 27 | echo build: ${BUILD} 28 | 29 | #If build is empty then use version (release) 30 | if [ -z "$build" ] 31 | then 32 | build=${VERSION} 33 | fi 34 | 35 | ZIP=${NAME}-${BUILD}.zip 36 | URL=${URL_PATH}/${VERSION}/${ZIP} 37 | 38 | # Download 39 | echo ${URL} 40 | curl -q -N ${URL} -o ${DEPLOYMENT_PATH}/hop.zip 41 | 42 | # Unzip 43 | unzip -q ${DEPLOYMENT_PATH}/hop.zip -d ${DEPLOYMENT_PATH} 44 | chmod -R 700 ${DEPLOYMENT_PATH}/hop 45 | 46 | # Cleanup 47 | rm ${DEPLOYMENT_PATH}/hop.zip 48 | 49 | -------------------------------------------------------------------------------- /examples/kubernetes/hop-job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: hop-job 5 | spec: 6 | template: 7 | metadata: 8 | labels: 9 | app: hop-job 10 | spec: 11 | restartPolicy: OnFailure 12 | initContainers: 13 | - name: clone-git-repo 14 | image: alpine/git 15 | volumeMounts: 16 | - name: git-repo-volume 17 | mountPath: /tmp/git-repo 18 | readOnly: false 19 | command: ['sh', '-c', 20 | 'cd /tmp/git-repo; git clone https://github.com/diethardsteiner/hop-docker.git; chmod -R 777 *; echo $(ls -la)'] 21 | containers: 22 | - name: hop 23 | image: diethardsteiner/project-hop:0.20-20200505.141953-75 24 | volumeMounts: 25 | - name: git-repo-volume 26 | mountPath: /files 27 | readOnly: false 28 | env: 29 | - name: HOP_LOG_LEVEL 30 | value: "Basic" 31 | - name: HOP_FILE_PATH 32 | value: "/files/hop-docker/project-a/pipelines-and-workflows/main.hwf" 33 | - name: HOP_CONFIG_DIRECTORY 34 | value: "/files/hop-docker/project-a/config/hop/config" 35 | - name: HOP_ENVIRONMENT_NAME 36 | value: "project-a-dev" 37 | - name: HOP_RUN_CONFIG 38 | value: "classic" 39 | - name: HOP_RUN_PARAMETERS 40 | value: "PARAM_LOG_MESSAGE=Hello,PARAM_WAIT_FOR_X_MINUTES=2" 41 | volumes: 42 | - name: git-repo-volume 43 | emptyDir: {} -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | node { 2 | properties([ 3 | [$class: 'BuildDiscarderProperty', 4 | strategy: [ 5 | $class: 'LogRotator', 6 | artifactNumToKeepStr: '5', 7 | daysToKeepStr: '30'] 8 | ], 9 | disableConcurrentBuilds(), 10 | rateLimitBuilds([count: 1, durationName: 'minute', userBoost: false]), 11 | pipelineTriggers([upstream(upstreamProjects: 'hop', threshold: hudson.model.Result.SUCCESS)]), 12 | parameters([ 13 | string(name: 'PRM_BRANCHNAME', defaultValue: "master"), 14 | string(name: 'PRM_BUILD_NUMBER', defaultValue: "0"), 15 | ]), 16 | ]) 17 | 18 | try{ 19 | stage('Checkout') { 20 | checkout scm 21 | } 22 | 23 | stage('Upstream Variables') { 24 | echo "upstream Branch: ${params.PRM_BRANCHNAME}" 25 | echo "upstream Build Number: ${params.PRM_BUILD_NUMBER}" 26 | } 27 | 28 | 29 | stage('Build image') { 30 | docker.withRegistry('', 'dockerhub') { 31 | if("${params.PRM_BRANCHNAME}" == "master"){ 32 | // Create container with snapshot tag 33 | def customImage = docker.build("projecthop/hop:snapshot" , "--build-arg BRANCH_NAME=${params.PRM_BRANCHNAME} .") 34 | customImage.push() 35 | } else 36 | { 37 | // create container with release tag 38 | def customImage = docker.build("projecthop/hop:${params.PRM_BRANCHNAME}", "--build-arg BRANCH_NAME=${params.PRM_BRANCHNAME} .") 39 | customImage.push() 40 | } 41 | 42 | /* Push the container to the custom Registry */ 43 | 44 | } 45 | } 46 | 47 | stage('Cleanup'){ 48 | if("${params.PRM_BRANCHNAME}" == "master"){ 49 | 50 | sh 'docker rmi projecthop/hop:snapshot' 51 | } 52 | else 53 | { 54 | sh "docker rmi projecthop/hop:${params.PRM_BRANCHNAME}" 55 | } 56 | } 57 | 58 | } finally 59 | { 60 | cleanWs() 61 | } 62 | } -------------------------------------------------------------------------------- /tests/project-a/project/pipelines-and-workflows/simple.hpl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | New pipeline 5 | 6 | 7 | 8 | Normal 9 | 10 | 11 | N 12 | 1000 13 | 100 14 | - 15 | 2020/04/18 20:48:43.845 16 | - 17 | 2020/04/18 20:48:43.845 18 | H4sIAAAAAAAAAAMAAAAAAAAAAAA= 19 | N 20 | 21 | 22 | 23 | 24 | 25 | Generate rows 26 | Select values 27 | Y 28 | 29 | 30 | 31 | Generate rows 32 | RowGenerator 33 | 34 | Y 35 | 36 | 1 37 | 38 | none 39 | 40 | 41 | 42 | 43 | a 44 | String 45 | 46 | 47 | 48 | 49 | aa 50 | -1 51 | -1 52 | N 53 | 54 | 55 | 10 56 | N 57 | 5000 58 | now 59 | FiveSecondsAgo 60 | 61 | 62 | 458 63 | 304 64 | 65 | 66 | 67 | Select values 68 | SelectValues 69 | 70 | Y 71 | 72 | 1 73 | 74 | none 75 | 76 | 77 | 78 | 79 | a 80 | 81 | N 82 | 83 | 84 | 85 | 656 86 | 304 87 | 88 | 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /resources/load-and-execute.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ####################################################################### 3 | # "PROJECT_STARTUP_JOB" 4 | # path to Kettle job from within volume 5 | # 6 | # "KETTLE_LOG_LEVEL" 7 | # values are [Basic / Debug] 8 | ####################################################################### 9 | 10 | set -Eeuo pipefail 11 | 12 | BASENAME="${0##*/}" 13 | 14 | log() { 15 | echo `date '+%Y/%m/%d %H:%M:%S'`" - ${1}" 16 | } 17 | 18 | # write the hop-server config to a configuration file 19 | # to avoid the password of the server being shown in ps 20 | # 21 | # bind the server to 0.0.0.0 to be able to expose the port 22 | # out of the docker container 23 | # 24 | write_server_config() { 25 | HOP_SERVER_USER=${HOP_SERVER_USER:-cluster} 26 | HOP_SERVER_PASS=${HOP_SERVER_PASS:-cluster} 27 | HOP_SERVER_MASTER=${HOP_SERVER_MASTER:-Y} 28 | 29 | log "Writing a hop-server config file to /tmp/hopserver.xml" 30 | 31 | echo "master0.0.0.08080${HOP_SERVER_MASTER}${HOP_SERVER_USER}${HOP_SERVER_PASS}" > /tmp/hopserver.xml 32 | } 33 | 34 | # retrieve files from volume 35 | # ... done via Dockerfile via specifying a volume ... 36 | 37 | 38 | log "Registering project config with Hop" 39 | log "${DEPLOYMENT_PATH}/hop/hop-conf.sh --project=${HOP_PROJECT_NAME} --project-create --project-home='${HOP_PROJECT_DIRECTORY}' --project-config-file='${HOP_PROJECT_CONFIG_FILE_NAME}'" 40 | 41 | ${DEPLOYMENT_PATH}/hop/hop-conf.sh \ 42 | --project=${HOP_PROJECT_NAME} \ 43 | --project-create \ 44 | --project-home="${HOP_PROJECT_DIRECTORY}" \ 45 | --project-config-file="${HOP_PROJECT_CONFIG_FILE_NAME}" 46 | 47 | log "Registering environment config with Hop" 48 | log "${DEPLOYMENT_PATH}/hop/hop-conf.sh --environment-create --environment=${HOP_ENVIRONMENT_NAME} --environment-project=${HOP_PROJECT_NAME} --environment-config-files='${HOP_ENVIRONMENT_CONFIG_FILE_NAME_PATHS}'" 49 | 50 | ${DEPLOYMENT_PATH}/hop/hop-conf.sh \ 51 | --environment=${HOP_ENVIRONMENT_NAME} \ 52 | --environment-create \ 53 | --environment-project=${HOP_PROJECT_NAME} \ 54 | --environment-config-files="${HOP_ENVIRONMENT_CONFIG_FILE_NAME_PATHS}" 55 | 56 | if [ -z "${HOP_FILE_PATH}" ] 57 | then 58 | write_server_config 59 | log "Starting a hop-server on port 8080" 60 | ${DEPLOYMENT_PATH}/hop/hop-server.sh /tmp/hopserver.xml 61 | else 62 | 63 | log "Running a single hop workflow / pipeline (${HOP_FILE_PATH})" 64 | ${DEPLOYMENT_PATH}/hop/hop-run.sh \ 65 | --file=${HOP_FILE_PATH} \ 66 | --project=${HOP_PROJECT_NAME} \ 67 | --environment=${HOP_ENVIRONMENT_NAME} \ 68 | --runconfig=${HOP_RUN_CONFIG} \ 69 | --level=${HOP_LOG_LEVEL} \ 70 | --parameters=${HOP_RUN_PARAMETERS} \ 71 | 2>&1 | tee ${HOP_LOG_PATH} 72 | fi 73 | 74 | -------------------------------------------------------------------------------- /DEVELOPER-NOTES.md: -------------------------------------------------------------------------------- 1 | # Local Development 2 | 3 | ## Environments 4 | 5 | 6 | Environment | Purpose 7 | --- |---- 8 | `project-a-dev` | Run processes locally without the Docker container. Requires Hop to be installed locally. 9 | `project-a-test` | Run processes within the Docker container 10 | 11 | 12 | ## How to run the workflow locally 13 | 14 | To just test the workflow locally without Docker follow the steps outlined below. **Amend paths** to your local setup. 15 | 16 | 17 | Define where you'd like to store the **Hop config**: 18 | 19 | ``` 20 | # Define location of the global Hop config 21 | # workaround due to bug, see https://project-hop.atlassian.net/browse/HOP-463 22 | 23 | export HOP_CONFIG_DIRECTORY=~/config/hop 24 | echo "{}" | ${HOP_CONFIG_DIRECTORY}/hop-config.json 25 | ``` 26 | 27 | Register the `project-a-dev` with your **Hop config**: 28 | 29 | > **Note**: This should point to the `tests/project-a` folder within the `hop-docker` repo. 30 | 31 | 32 | ``` 33 | # Create Hop project 34 | ./hop-conf.sh \ 35 | --project=project-a \ 36 | --project-create \ 37 | --project-home="/Users/diethardsteiner/git/hop-docker/tests/project-a" \ 38 | --project-config-file=project-config.json \ 39 | --project-metadata-base='${PROJECT_HOME}/metadata' \ 40 | --project-datasets-base='${PROJECT_HOME}/datasets' \ 41 | --project-unit-tests-base='${PROJECT_HOME}' \ 42 | --project-variables=VAR_PROJECT_TEST1=a,VAR_PROJECT_TEST2=b \ 43 | --project-enforce-execution=true 44 | 45 | 46 | # Create Hop environment 47 | 48 | ## -- OPEN -- USE git repo config file instead --- ## 49 | 50 | ./hop-conf.sh \ 51 | --environment=project-a-dev \ 52 | --environment-create \ 53 | --environment-project=project-a \ 54 | --environment-purpose=development \ 55 | --environment-config-files="/Users/diethardsteiner/config/project-a/project-a-dev.json" 56 | 57 | # Set variables for the env config 58 | ./hop-conf.sh \ 59 | --config-file="/Users/diethardsteiner/config/project-a/project-a-dev.json" \ 60 | --config-file-set-variables=VAR_ENV_TEST1=c,VAR_ENV_TEST2=d 61 | ```` 62 | 63 | Now you are ready to run the test processes: 64 | 65 | ``` 66 | ./hop-run.sh \ 67 | --file='${PROJECT_HOME}/pipelines-and-workflows/simple.hpl' \ 68 | --project=project-a \ 69 | --environment=project-a-dev \ 70 | --runconfig=classic 71 | 72 | ./hop-run.sh \ 73 | --file='${PROJECT_HOME}/pipelines-and-workflows/main.hwf' \ 74 | --project=project-a \ 75 | --environment=project-a-dev \ 76 | --runconfig=classic \ 77 | --parameters=PARAM_LOG_MESSAGE=Hello,PARAM_WAIT_FOR_X_MINUTES=1 78 | ``` 79 | 80 | 81 | ## How to run the workflow within the Docker container 82 | 83 | If you spin up a docker container with the hop server running: 84 | 85 | ``` 86 | ./hop-run.sh --file=/home/hop/hop-docker/project-a/pipelines-and-workflows/main.hwf \ 87 | --project=project-a \ 88 | --environment=project-a-test \ 89 | --runconfig=classic \ 90 | --parameters=PARAM_LOG_MESSAGE=Hello,PARAM_WAIT_FOR_X_MINUTES=1 91 | ``` 92 | 93 | ## Tests 94 | 95 | ### JDBC Drivers 96 | 97 | There is currently one test for loading external JDBC drivers. The test example relies on a **PostgreSQL** database being available. Place the drivers in `hop-docker/tests/project-a/jdbc-drivers`. -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM openjdk:8-alpine 2 | MAINTAINER Project Hop Team 3 | # Argument Branch name, used to download correct version 4 | ARG BRANCH_NAME 5 | ENV BRANCH_NAME=$BRANCH_NAME 6 | # path to where the artefacts should be deployed to 7 | ENV DEPLOYMENT_PATH=/opt/project-hop 8 | # volume mount point 9 | ENV VOLUME_MOUNT_POINT=/files 10 | # parent directory in which the hop config artefacts live 11 | # ENV HOP_HOME= ... 12 | # specify the hop log level 13 | ENV HOP_LOG_LEVEL=Basic 14 | # path to hop workflow or pipeline e.g. ~/project/main.hwf 15 | ENV HOP_FILE_PATH= 16 | # file path to hop log file, e.g. ~/hop.err.log 17 | ENV HOP_LOG_PATH=$DEPLOYMENT_PATH/hop.err.log 18 | # path to hop config directory 19 | # ENV /files/project= DISABLED for now 20 | # path to jdbc drivers 21 | ENV HOP_SHARED_JDBC_DIRECTORY= 22 | # name of the Hop project to use 23 | ENV HOP_PROJECT_NAME= 24 | # path to the home of the hop project. should start with `/files`. 25 | ENV HOP_PROJECT_DIRECTORY= 26 | # name of the project config file including file extension 27 | ENV HOP_PROJECT_CONFIG_FILE_NAME=project-config.json 28 | # environment to use with hop run 29 | ENV HOP_ENVIRONMENT_NAME= 30 | # comma separated list of paths to environment config files (including filename and file extension). paths should start with `/files`. 31 | ENV HOP_ENVIRONMENT_CONFIG_FILE_NAME_PATHS= 32 | # hop run configuration to use 33 | ENV HOP_RUN_CONFIG= 34 | # parameters that should be passed on to the hop-run command 35 | # specify as comma separated list, e.g. PARAM_1=aaa,PARAM_2=bbb 36 | ENV HOP_RUN_PARAMETERS= 37 | # any JRE settings you want to pass on 38 | # The “-XX:+AggressiveHeap” tells the container to use all memory assigned to the container. 39 | # this removed the need to calculate the necessary heap Xmx 40 | ENV HOP_OPTIONS=-XX:+AggressiveHeap 41 | 42 | # Define en_US. 43 | # ENV LANGUAGE en_US.UTF-8 44 | # ENV LANG en_US.UTF-8 45 | # ENV LC_ALL en_US.UTF-8 46 | # ENV LC_CTYPE en_US.UTF-8 47 | # ENV LC_MESSAGES en_US.UTF-8 48 | 49 | # INSTALL REQUIRED PACKAGES AND ADJUST LOCALE 50 | # procps: The package includes the programs ps, top, vmstat, w, kill, free, slabtop, and skill 51 | 52 | RUN apk update \ 53 | && apk add --no-cache bash curl procps \ 54 | && rm -rf /var/cache/apk/* \ 55 | && mkdir ${DEPLOYMENT_PATH} \ 56 | && mkdir ${VOLUME_MOUNT_POINT} \ 57 | && adduser -D -s /bin/bash -h /home/hop hop \ 58 | && chown hop:hop ${DEPLOYMENT_PATH} \ 59 | && chown hop:hop ${VOLUME_MOUNT_POINT} 60 | # && sed -i 's/^# en_US.UTF-8 UTF-8$/en_US.UTF-8 UTF-8/g' /etc/locale.gen \ 61 | # && locale-gen \ 62 | # && update-locale LANG=${LANG} LC_ALL={LC_ALL} 63 | 64 | # copy the hop package from the local resources folder to the container image directory 65 | COPY --chown=hop:hop ./resources/get-hop.sh ${DEPLOYMENT_PATH}/get-hop.sh 66 | COPY --chown=hop:hop ./resources/run.sh ${DEPLOYMENT_PATH}/run.sh 67 | COPY --chown=hop:hop ./resources/load-and-execute.sh ${DEPLOYMENT_PATH}/load-and-execute.sh 68 | 69 | 70 | # Fetch the specified hop version 71 | RUN ${DEPLOYMENT_PATH}/get-hop.sh \ 72 | && chown -R hop:hop ${DEPLOYMENT_PATH}/hop \ 73 | && chmod 700 ${DEPLOYMENT_PATH}/hop/*.sh 74 | 75 | EXPOSE 8080 76 | 77 | # make volume available so that hop pipeline and workflow files can be provided easily 78 | VOLUME ["/files"] 79 | USER hop 80 | ENV PATH=$PATH:${DEPLOYMENT_PATH}/hop 81 | WORKDIR /home/hop 82 | # CMD ["/bin/bash"] 83 | ENTRYPOINT ["/bin/bash", "/opt/project-hop/run.sh"] 84 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # hop-docker 3 | 4 | A **Hop Docker image** supporting both **short-lived** and **long-lived** setups. 5 | 6 | 7 | ## Container Folder Structure 8 | 9 | 10 | Directory | Description 11 | --- |--- 12 | `/opt/project-hop` | location of the hop package 13 | `/files` | here you should mount a directory that contains the **hop and project config** as well as the **workflows and pipelines**. 14 | 15 | ## Environment Variables 16 | 17 | You can provide values for the following environment variables: 18 | 19 | 20 | Environment Variable | Required | Description 21 | --- |---- |--- 22 | `HOP_LOG_LEVEL` | No | Specify the log level. Default: `Basic`. Optional. 23 | `HOP_FILE_PATH` | Yes | Path to hop workflow or pipeline 24 | `HOP_LOG_PATH` | No | File path to hop log file 25 | `HOP_CONFIG_DIRECTORY` | No | Path to the Hop config folder. DISABLED for now. 26 | `HOP_PROJECT_NAME` | Yes | Name of the Hop project to use 27 | `HOP_PROJECT_DIRECTORY` | Yes | Path to the home of the hop project. Should start with `/files`. 28 | `HOP_PROJECT_CONFIG_FILE_NAME` | No | Name of the project config file including file extension. Defaults to `project-config.json`. 29 | `HOP_ENVIRONMENT_NAME` | Yes | Name of the Hop run environment to use 30 | `HOP_ENVIRONMENT_CONFIG_FILE_NAME_PATHS` | Yes | comma separated list of paths to environment config files (including filename and file extension). paths should start with `/files`. 31 | `HOP_RUN_CONFIG` | Yes | Name of the Hop run configuration to use 32 | `HOP_RUN_PARAMETERS` | No | Parameters that should be passed on to the hop-run command. Specify as comma separated list, e.g. `PARAM_1=aaa,PARAM_2=bbb`. Optional. 33 | `HOP_OPTIONS` | No | Any JRE options you want to set 34 | `HOP_SHARED_JDBC_DIRECTORY` | No | Path to the directory where the JDCB drivers are located 35 | `HOP_SERVER_USER` | No | Username for hop-server, only valid in long-lived containers. Default `cluster` 36 | `HOP_SERVER_PASS` | No | Password for hop-server user, only valid in long-lived containers. Default `cluster` 37 | 38 | The `Required` column relates to running a short-lived container. 39 | 40 | ## How to run the Container 41 | 42 | The most common use case will be that you run a **short-lived container** to just complete one Hop workflow or pipeline. 43 | 44 | Example for running a **workflow**: 45 | 46 | ```bash 47 | docker run -it --rm \ 48 | --env HOP_LOG_LEVEL=Basic \ 49 | --env HOP_FILE_PATH='${PROJECT_HOME}/pipelines-and-workflows/main.hwf' \ 50 | --env HOP_PROJECT_DIRECTORY=/files/project \ 51 | --env HOP_PROJECT_NAME=project-a \ 52 | --env HOP_ENVIRONMENT_NAME=project-a-test \ 53 | --env HOP_ENVIRONMENT_CONFIG_FILE_NAME_PATHS=/files/config/project-a-test.json \ 54 | --env HOP_RUN_CONFIG=classic \ 55 | --env HOP_RUN_PARAMETERS=PARAM_LOG_MESSAGE=Hello,PARAM_WAIT_FOR_X_MINUTES=1 \ 56 | -v /path/to/local/dir:/files \ 57 | --name my-simple-hop-container \ 58 | docker pull projecthop/hop: 59 | ``` 60 | 61 | If you need a **long-lived container**, this option is also available. Run this command e.g.: 62 | 63 | ```bash 64 | docker run -it --rm \ 65 | --env HOP_LOG_LEVEL=Basic \ 66 | --env HOP_PROJECT_DIRECTORY=/files/project \ 67 | --env HOP_PROJECT_NAME=project-a \ 68 | --env HOP_ENVIRONMENT_NAME=project-a-test \ 69 | --env HOP_ENVIRONMENT_CONFIG_FILE_NAME_PATHS=/files/config/project-a-test.json \ 70 | --env HOP_SERVER_USER=admin \ 71 | --env HOP_SERVER_PASS=admin \ 72 | -p 8080:8080 73 | -v /path/to/local/dir:/files \ 74 | --name my-simple-hop-container \ 75 | docker pull projecthop/hop: 76 | ``` 77 | 78 | You can then access the hop-server UI from your dockerhost at `http://localhost:8080` 79 | 80 | # Shortcomings 81 | 82 | Currently the `hop-server` support is minimal. 83 | 84 | -------------------------------------------------------------------------------- /tests/project-a/project/pipelines-and-workflows/check-db-connection.hwf: -------------------------------------------------------------------------------- 1 | 2 | 3 | check-db-connection 4 | Y 5 | 6 | 7 | 8 | 0 9 | - 10 | 2020/06/05 18:20:49.203 11 | - 12 | 2020/06/05 18:20:49.203 13 | 14 | 15 | PARAM_POSTGRESQL_DB_CONNECTION_DATABASE 16 | test 17 | 18 | 19 | 20 | PARAM_POSTGRESQL_DB_CONNECTION_HOSTNAME 21 | localhost 22 | 23 | 24 | 25 | PARAM_POSTGRESQL_DB_CONNECTION_PASSWORD 26 | 27 | 28 | 29 | 30 | PARAM_POSTGRESQL_DB_CONNECTION_PORT 31 | 5432 32 | 33 | 34 | 35 | PARAM_POSTGRESQL_DB_CONNECTION_USERNAME 36 | diethardsteiner 37 | 38 | 39 | 40 | N 41 | 42 | 43 | START 44 | 45 | SPECIAL 46 | 47 | Y 48 | N 49 | N 50 | 0 51 | 0 52 | 60 53 | 12 54 | 0 55 | 1 56 | 1 57 | N 58 | 0 59 | 318 60 | 100 61 | 62 | 63 | 64 | Check DB connections 65 | 66 | CHECK_DB_CONNECTIONS 67 | 68 | 69 | 70 | postgresql-connection 71 | 0 72 | millisecond 73 | 74 | 75 | N 76 | 0 77 | 320 78 | 208 79 | 80 | 81 | 82 | Success 83 | 84 | SUCCESS 85 | 86 | N 87 | 0 88 | 323 89 | 325 90 | 91 | 92 | 93 | Abort workflow 94 | 95 | ABORT 96 | 97 | 98 | N 99 | 0 100 | 512 101 | 208 102 | 103 | 104 | 105 | 106 | 107 | START 108 | Check DB connections 109 | 0 110 | 0 111 | Y 112 | Y 113 | Y 114 | 115 | 116 | Check DB connections 117 | Success 118 | 0 119 | 0 120 | Y 121 | Y 122 | N 123 | 124 | 125 | Check DB connections 126 | Abort workflow 127 | 0 128 | 0 129 | Y 130 | N 131 | N 132 | 133 | 134 | 135 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /tests/project-a/project/pipelines-and-workflows/main.hwf: -------------------------------------------------------------------------------- 1 | 2 | 3 | New workflow 4 | 5 | 6 | 7 | 0 8 | - 9 | 2020/04/16 17:20:54.976 10 | - 11 | 2020/04/16 17:20:54.976 12 | 13 | 14 | PARAM_LOG_MESSAGE 15 | 16 | 17 | 18 | 19 | PARAM_WAIT_FOR_X_MINUTES 20 | 21 | 22 | 23 | 24 | N 25 | 26 | 27 | START 28 | 29 | SPECIAL 30 | 31 | Y 32 | N 33 | N 34 | 0 35 | 0 36 | 60 37 | 12 38 | 0 39 | 1 40 | 1 41 | N 42 | 0 43 | 144 44 | 64 45 | 46 | 47 | 48 | Write to log 49 | 50 | WRITE_TO_LOG 51 | 52 | ==================== 53 | 54 | I am just writing something to the log. 55 | So you know I did actually some work ... 56 | 57 | Ok, let's at least print out the value 58 | of a parameters: 59 | 60 | PARAM_TEST: ${PARAM_LOG_MESSAGE} 61 | PROP_SAY_SOMETHING: ${PROP_SAY_SOMETHING} 62 | 63 | ==================== 64 | Basic 65 | 66 | N 67 | 0 68 | 144 69 | 192 70 | 71 | 72 | 73 | Success 74 | 75 | SUCCESS 76 | 77 | N 78 | 0 79 | 144 80 | 432 81 | 82 | 83 | 84 | Wait for 85 | 86 | DELAY 87 | 88 | ${PARAM_WAIT_FOR_X_MINUTES} 89 | 1 90 | N 91 | 0 92 | 144 93 | 320 94 | 95 | 96 | 97 | 98 | 99 | START 100 | Write to log 101 | 0 102 | 0 103 | Y 104 | Y 105 | Y 106 | 107 | 108 | Write to log 109 | Wait for 110 | 0 111 | 0 112 | Y 113 | Y 114 | N 115 | 116 | 117 | Wait for 118 | Success 119 | 0 120 | 0 121 | Y 122 | Y 123 | N 124 | 125 | 126 | 127 | 128 | Added to easily help debugging the k8s job container 129 | 208 130 | 320 131 | 333 132 | 26 133 | .AppleSystemUIFont 134 | 13 135 | N 136 | N 137 | 0 138 | 0 139 | 0 140 | 255 141 | 205 142 | 112 143 | 100 144 | 100 145 | 100 146 | 147 | 148 | 149 | 150 | --------------------------------------------------------------------------------