├── .dockerignore ├── .gitignore ├── Dockerfile ├── Makefile ├── README.md ├── entrypoint.sh ├── kettle.properties ├── sample ├── dummy.kjb └── dummy.ktr └── spoon /.dockerignore: -------------------------------------------------------------------------------- 1 | data/ 2 | sample/ 3 | docker-compose.yml 4 | README.md 5 | .gitignore 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | .swt/ 3 | .java/ 4 | .pentaho/ 5 | .Xauthority 6 | .ipynb_checkpoints/ 7 | pdi-ce-7.1.0.0-12.zip 8 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM openjdk:8 2 | MAINTAINER Andre Pereira andrespp@gmail.com 3 | 4 | # Set Environment Variables 5 | ENV PDI_VERSION=7.1 PDI_BUILD=7.1.0.0-12 \ 6 | PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/data-integration \ 7 | KETTLE_HOME=/data-integration 8 | 9 | # Download PDI 10 | RUN wget --progress=dot:giga http://downloads.sourceforge.net/project/pentaho/Data%20Integration/${PDI_VERSION}/pdi-ce-${PDI_BUILD}.zip \ 11 | && unzip -q *.zip \ 12 | && rm -f *.zip \ 13 | && mkdir /jobs 14 | 15 | # Aditional Drivers 16 | WORKDIR $KETTLE_HOME 17 | 18 | RUN wget https://downloads.sourceforge.net/project/jtds/jtds/1.3.1/jtds-1.3.1-dist.zip \ 19 | && unzip jtds-1.3.1-dist.zip -d lib/ \ 20 | && rm jtds-1.3.1-dist.zip \ 21 | && wget https://github.com/FirebirdSQL/jaybird/releases/download/v3.0.4/Jaybird-3.0.4-JDK_1.8.zip \ 22 | && unzip Jaybird-3.0.4-JDK_1.8.zip -d lib \ 23 | && rm -rf lib/docs/ Jaybird-3.0.4-JDK_1.8.zip 24 | 25 | # First time run 26 | RUN pan.sh -file ./plugins/platform-utils-plugin/samples/showPlatformVersion.ktr \ 27 | && kitchen.sh -file samples/transformations/files/test-job.kjb 28 | 29 | # Install xauth 30 | RUN apt-get update && apt-get install -y xauth 31 | 32 | #VOLUME /jobs 33 | 34 | COPY entrypoint.sh / 35 | ENTRYPOINT ["/entrypoint.sh"] 36 | CMD ["help"] 37 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | IMAGE=pdi 2 | APP=spoon 3 | 4 | .PHONY: help 5 | help: 6 | @echo "Usage: make [target]" 7 | @echo 8 | @echo "Targets:" 9 | @echo " help\t\tPrint this help" 10 | @echo " test\t\tLookup for docker and docker-compose binaries" 11 | @echo " setup\t\tBuild docker images" 12 | @echo " run [app]\tRun app defined in '\$$APP' (spoon by default)" 13 | @echo "" 14 | @echo "Example: make run APP=spoon" 15 | 16 | .PHONY: test 17 | test: 18 | @which docker 19 | @which docker-compose 20 | @which xauth 21 | 22 | .PHONY: setup 23 | setup: Dockerfile 24 | docker image build -t $(IMAGE) . 25 | 26 | .PHONY: run 27 | run: 28 | @echo $(APP) 29 | docker run -it --rm -v /tmp/.X11-unix/:/tmp/.X11-unix/:ro \ 30 | -v $$(pwd):/root/data \ 31 | -e XAUTH=$$(xauth list|grep `uname -n` | cut -d ' ' -f5) -e "DISPLAY" \ 32 | $(IMAGE) $(APP) 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Docker Pentaho Data Integration 2 | =============================== 3 | 4 | # Introduction 5 | 6 | DockerFile for [Pentaho Data Integration](https://sourceforge.net/projects/pentaho/) (a.k.a kettel / PDI) 7 | 8 | This image is intendend to allow execution os PDI transformations and jobs throught command line and run PDI's UI (`Spoon`). PDI server (`Carter`) is available on this image. 9 | 10 | # Quick start 11 | 12 | ## Basic Syntax 13 | 14 | ``` 15 | $ docker container run --rm andrespp/pdi 16 | 17 | Usage: /entrypoint.sh COMMAND 18 | 19 | Pentaho Data Integration (PDI) 20 | 21 | Options: 22 | runj filename Run job file 23 | runt filename Run transformation file 24 | spoon Run spoon (GUI) 25 | help Print this help 26 | 27 | ``` 28 | 29 | ## Running Transformations 30 | 31 | ``` 32 | $ docker container run --rm -v $(pwd):/jobs andrespp/pdi runt sample/dummy.ktr 33 | ``` 34 | 35 | ## Running Jobs 36 | 37 | ``` 38 | $ docker container run --rm -v $(pwd):/jobs andrespp/pdi runj sample/dummy.kjb 39 | ``` 40 | 41 | ## Running Spoon (UI) 42 | 43 | ### Using `docker run` 44 | 45 | ``` 46 | $ docker run -it --rm -v /tmp/.X11-unix/:/tmp/.X11-unix/:ro \ 47 | -v $(pwd):/jobs \ 48 | -e XAUTH=$(xauth list|grep `uname -n` | cut -d ' ' -f5) -e "DISPLAY" \ 49 | --name spoon \ 50 | andrespp/pdi spoon 51 | ``` 52 | 53 | ### Using startup script (Installing) 54 | 55 | In order to run the container as if the application was installed locally, download the `spoon` script to a directory in you $PATH, for example: 56 | 57 | ```bash 58 | $ sudo curl -fsSL https://raw.githubusercontent.com/andrespp/docker-pdi/master/spoon \ 59 | -o /usr/local/bin/spoon 60 | $ sudo chmod +x /usr/local/bin/spoon 61 | ``` 62 | 63 | Then you'll be able to run JupyterLab in the current directory simply by calling `jlab`: 64 | 65 | ```bash 66 | $ spoon 67 | ``` 68 | 69 | 70 | ## Custom `kettle.properties` 71 | 72 | In order to use a custom `kettle.properties`, you need to leave the file available in `/jobs/kettle.properties`. 73 | 74 | ```bash 75 | $ # Custom properties in $(pwd)/kettle.properties 76 | $ docker container run --rm -v $(pwd):/jobs andrespp/pdi runj sample/dummy.kjb 77 | ``` 78 | 79 | # Environment variables 80 | 81 | This image uses several environment variables in order to control its behavior, and some of them may be required 82 | 83 | | Environment variable | Default value | Note | 84 | | -------------------- | ------------- | -----| 85 | | PDI\_VERSION | 7.1 | | 86 | | | | | 87 | 88 | # Issues 89 | 90 | If you have any problems with or questions about this image, please contact me 91 | through a [GitHub issue](https://github.com/andrespp/docker-pdi/issues). 92 | 93 | -------------------------------------------------------------------------------- /entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Sets script to fail if any command fails. 4 | set -e 5 | 6 | set_xauth() { 7 | echo xauth add $DISPLAY . $XAUTH 8 | touch /.Xauthority 9 | xauth add $DISPLAY . $XAUTH 10 | } 11 | 12 | custom_properties() { 13 | if [ -f /jobs/kettle.properties ] ; then 14 | cp /jobs/kettle.properties $KETTLE_HOME 15 | fi 16 | } 17 | 18 | run_pan() { 19 | custom_properties 20 | echo ./pan.sh -file $@ 21 | pan.sh -file /jobs/$@ 22 | } 23 | 24 | run_kitchen() { 25 | custom_properties 26 | echo ./kitchen.sh -file $@ 27 | kitchen.sh -file /jobs/$@ 28 | } 29 | 30 | run_spoon() { 31 | custom_properties 32 | set_xauth 33 | echo /data-integration/spoon.sh 34 | /data-integration/spoon.sh 35 | } 36 | 37 | print_usage() { 38 | echo " 39 | 40 | Usage: $0 COMMAND 41 | 42 | Pentaho Data Integration (PDI) 43 | 44 | Options: 45 | runj filename Run job file 46 | runt filename Run transformation file 47 | spoon Run spoon (GUI) 48 | help Print this help 49 | " 50 | } 51 | 52 | case "$1" in 53 | help) 54 | print_usage 55 | ;; 56 | runt) 57 | shift 1 58 | run_pan "$@" 59 | ;; 60 | runj) 61 | shift 1 62 | run_kitchen "$@" 63 | ;; 64 | spoon) 65 | run_spoon 66 | ;; 67 | *) 68 | exec "$@" 69 | esac 70 | -------------------------------------------------------------------------------- /kettle.properties: -------------------------------------------------------------------------------- 1 | # Here are a few examples of variables to set: 2 | # 3 | # PRODUCTION_SERVER = hercules 4 | # TEST_SERVER = zeus 5 | # DEVELOPMENT_SERVER = thor 6 | # 7 | # Note: lines like these with a # in front of it are comments 8 | # 9 | -------------------------------------------------------------------------------- /sample/dummy.kjb: -------------------------------------------------------------------------------- 1 | 2 | 3 | dummy 4 | 5 | 6 | 7 | 0 8 | / 9 | - 10 | 2018/07/22 19:16:17.847 11 | - 12 | 2018/07/22 19:16:17.847 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | ID_JOB 26 | Y 27 | ID_JOB 28 | 29 | 30 | CHANNEL_ID 31 | Y 32 | CHANNEL_ID 33 | 34 | 35 | JOBNAME 36 | Y 37 | JOBNAME 38 | 39 | 40 | STATUS 41 | Y 42 | STATUS 43 | 44 | 45 | LINES_READ 46 | Y 47 | LINES_READ 48 | 49 | 50 | LINES_WRITTEN 51 | Y 52 | LINES_WRITTEN 53 | 54 | 55 | LINES_UPDATED 56 | Y 57 | LINES_UPDATED 58 | 59 | 60 | LINES_INPUT 61 | Y 62 | LINES_INPUT 63 | 64 | 65 | LINES_OUTPUT 66 | Y 67 | LINES_OUTPUT 68 | 69 | 70 | LINES_REJECTED 71 | Y 72 | LINES_REJECTED 73 | 74 | 75 | ERRORS 76 | Y 77 | ERRORS 78 | 79 | 80 | STARTDATE 81 | Y 82 | STARTDATE 83 | 84 | 85 | ENDDATE 86 | Y 87 | ENDDATE 88 | 89 | 90 | LOGDATE 91 | Y 92 | LOGDATE 93 | 94 | 95 | DEPDATE 96 | Y 97 | DEPDATE 98 | 99 | 100 | REPLAYDATE 101 | Y 102 | REPLAYDATE 103 | 104 | 105 | LOG_FIELD 106 | Y 107 | LOG_FIELD 108 | 109 | 110 | EXECUTING_SERVER 111 | N 112 | EXECUTING_SERVER 113 | 114 | 115 | EXECUTING_USER 116 | N 117 | EXECUTING_USER 118 | 119 | 120 | START_JOB_ENTRY 121 | N 122 | START_JOB_ENTRY 123 | 124 | 125 | CLIENT 126 | N 127 | CLIENT 128 | 129 | 130 | 131 | 132 | 133 |
134 | 135 | 136 | ID_BATCH 137 | Y 138 | ID_BATCH 139 | 140 | 141 | CHANNEL_ID 142 | Y 143 | CHANNEL_ID 144 | 145 | 146 | LOG_DATE 147 | Y 148 | LOG_DATE 149 | 150 | 151 | JOBNAME 152 | Y 153 | TRANSNAME 154 | 155 | 156 | JOBENTRYNAME 157 | Y 158 | STEPNAME 159 | 160 | 161 | LINES_READ 162 | Y 163 | LINES_READ 164 | 165 | 166 | LINES_WRITTEN 167 | Y 168 | LINES_WRITTEN 169 | 170 | 171 | LINES_UPDATED 172 | Y 173 | LINES_UPDATED 174 | 175 | 176 | LINES_INPUT 177 | Y 178 | LINES_INPUT 179 | 180 | 181 | LINES_OUTPUT 182 | Y 183 | LINES_OUTPUT 184 | 185 | 186 | LINES_REJECTED 187 | Y 188 | LINES_REJECTED 189 | 190 | 191 | ERRORS 192 | Y 193 | ERRORS 194 | 195 | 196 | RESULT 197 | Y 198 | RESULT 199 | 200 | 201 | NR_RESULT_ROWS 202 | Y 203 | NR_RESULT_ROWS 204 | 205 | 206 | NR_RESULT_FILES 207 | Y 208 | NR_RESULT_FILES 209 | 210 | 211 | LOG_FIELD 212 | N 213 | LOG_FIELD 214 | 215 | 216 | COPY_NR 217 | N 218 | COPY_NR 219 | 220 | 221 | 222 | 223 | 224 |
225 | 226 | 227 | ID_BATCH 228 | Y 229 | ID_BATCH 230 | 231 | 232 | CHANNEL_ID 233 | Y 234 | CHANNEL_ID 235 | 236 | 237 | LOG_DATE 238 | Y 239 | LOG_DATE 240 | 241 | 242 | LOGGING_OBJECT_TYPE 243 | Y 244 | LOGGING_OBJECT_TYPE 245 | 246 | 247 | OBJECT_NAME 248 | Y 249 | OBJECT_NAME 250 | 251 | 252 | OBJECT_COPY 253 | Y 254 | OBJECT_COPY 255 | 256 | 257 | REPOSITORY_DIRECTORY 258 | Y 259 | REPOSITORY_DIRECTORY 260 | 261 | 262 | FILENAME 263 | Y 264 | FILENAME 265 | 266 | 267 | OBJECT_ID 268 | Y 269 | OBJECT_ID 270 | 271 | 272 | OBJECT_REVISION 273 | Y 274 | OBJECT_REVISION 275 | 276 | 277 | PARENT_CHANNEL_ID 278 | Y 279 | PARENT_CHANNEL_ID 280 | 281 | 282 | ROOT_CHANNEL_ID 283 | Y 284 | ROOT_CHANNEL_ID 285 | 286 | 287 | N 288 | 289 | 290 | 291 | DUMMY 292 | 293 | SPECIAL 294 | N 295 | Y 296 | N 297 | 0 298 | 0 299 | 60 300 | 12 301 | 0 302 | 1 303 | 1 304 | N 305 | Y 306 | 0 307 | 208 308 | 48 309 | 310 | 311 | START 312 | 313 | SPECIAL 314 | Y 315 | N 316 | N 317 | 0 318 | 0 319 | 60 320 | 12 321 | 0 322 | 1 323 | 1 324 | N 325 | Y 326 | 0 327 | 32 328 | 48 329 | 330 | 331 | 332 | 333 | START 334 | DUMMY 335 | 0 336 | 0 337 | Y 338 | Y 339 | Y 340 | 341 | 342 | 343 | 344 | 345 | -------------------------------------------------------------------------------- /sample/dummy.ktr: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dummy 5 | 6 | 7 | 8 | Normal 9 | / 10 | 11 | 12 | 13 | 14 | 15 | 16 |
17 | 18 | 19 | 20 | 21 | ID_BATCH 22 | Y 23 | ID_BATCH 24 | 25 | 26 | CHANNEL_ID 27 | Y 28 | CHANNEL_ID 29 | 30 | 31 | TRANSNAME 32 | Y 33 | TRANSNAME 34 | 35 | 36 | STATUS 37 | Y 38 | STATUS 39 | 40 | 41 | LINES_READ 42 | Y 43 | LINES_READ 44 | 45 | 46 | 47 | LINES_WRITTEN 48 | Y 49 | LINES_WRITTEN 50 | 51 | 52 | 53 | LINES_UPDATED 54 | Y 55 | LINES_UPDATED 56 | 57 | 58 | 59 | LINES_INPUT 60 | Y 61 | LINES_INPUT 62 | 63 | 64 | 65 | LINES_OUTPUT 66 | Y 67 | LINES_OUTPUT 68 | 69 | 70 | 71 | LINES_REJECTED 72 | Y 73 | LINES_REJECTED 74 | 75 | 76 | 77 | ERRORS 78 | Y 79 | ERRORS 80 | 81 | 82 | STARTDATE 83 | Y 84 | STARTDATE 85 | 86 | 87 | ENDDATE 88 | Y 89 | ENDDATE 90 | 91 | 92 | LOGDATE 93 | Y 94 | LOGDATE 95 | 96 | 97 | DEPDATE 98 | Y 99 | DEPDATE 100 | 101 | 102 | REPLAYDATE 103 | Y 104 | REPLAYDATE 105 | 106 | 107 | LOG_FIELD 108 | Y 109 | LOG_FIELD 110 | 111 | 112 | EXECUTING_SERVER 113 | N 114 | EXECUTING_SERVER 115 | 116 | 117 | EXECUTING_USER 118 | N 119 | EXECUTING_USER 120 | 121 | 122 | CLIENT 123 | N 124 | CLIENT 125 | 126 | 127 | 128 | 129 | 130 |
131 | 132 | 133 | 134 | ID_BATCH 135 | Y 136 | ID_BATCH 137 | 138 | 139 | SEQ_NR 140 | Y 141 | SEQ_NR 142 | 143 | 144 | LOGDATE 145 | Y 146 | LOGDATE 147 | 148 | 149 | TRANSNAME 150 | Y 151 | TRANSNAME 152 | 153 | 154 | STEPNAME 155 | Y 156 | STEPNAME 157 | 158 | 159 | STEP_COPY 160 | Y 161 | STEP_COPY 162 | 163 | 164 | LINES_READ 165 | Y 166 | LINES_READ 167 | 168 | 169 | LINES_WRITTEN 170 | Y 171 | LINES_WRITTEN 172 | 173 | 174 | LINES_UPDATED 175 | Y 176 | LINES_UPDATED 177 | 178 | 179 | LINES_INPUT 180 | Y 181 | LINES_INPUT 182 | 183 | 184 | LINES_OUTPUT 185 | Y 186 | LINES_OUTPUT 187 | 188 | 189 | LINES_REJECTED 190 | Y 191 | LINES_REJECTED 192 | 193 | 194 | ERRORS 195 | Y 196 | ERRORS 197 | 198 | 199 | INPUT_BUFFER_ROWS 200 | Y 201 | INPUT_BUFFER_ROWS 202 | 203 | 204 | OUTPUT_BUFFER_ROWS 205 | Y 206 | OUTPUT_BUFFER_ROWS 207 | 208 | 209 | 210 | 211 | 212 |
213 | 214 | 215 | ID_BATCH 216 | Y 217 | ID_BATCH 218 | 219 | 220 | CHANNEL_ID 221 | Y 222 | CHANNEL_ID 223 | 224 | 225 | LOG_DATE 226 | Y 227 | LOG_DATE 228 | 229 | 230 | LOGGING_OBJECT_TYPE 231 | Y 232 | LOGGING_OBJECT_TYPE 233 | 234 | 235 | OBJECT_NAME 236 | Y 237 | OBJECT_NAME 238 | 239 | 240 | OBJECT_COPY 241 | Y 242 | OBJECT_COPY 243 | 244 | 245 | REPOSITORY_DIRECTORY 246 | Y 247 | REPOSITORY_DIRECTORY 248 | 249 | 250 | FILENAME 251 | Y 252 | FILENAME 253 | 254 | 255 | OBJECT_ID 256 | Y 257 | OBJECT_ID 258 | 259 | 260 | OBJECT_REVISION 261 | Y 262 | OBJECT_REVISION 263 | 264 | 265 | PARENT_CHANNEL_ID 266 | Y 267 | PARENT_CHANNEL_ID 268 | 269 | 270 | ROOT_CHANNEL_ID 271 | Y 272 | ROOT_CHANNEL_ID 273 | 274 | 275 | 276 | 277 | 278 |
279 | 280 | 281 | ID_BATCH 282 | Y 283 | ID_BATCH 284 | 285 | 286 | CHANNEL_ID 287 | Y 288 | CHANNEL_ID 289 | 290 | 291 | LOG_DATE 292 | Y 293 | LOG_DATE 294 | 295 | 296 | TRANSNAME 297 | Y 298 | TRANSNAME 299 | 300 | 301 | STEPNAME 302 | Y 303 | STEPNAME 304 | 305 | 306 | STEP_COPY 307 | Y 308 | STEP_COPY 309 | 310 | 311 | LINES_READ 312 | Y 313 | LINES_READ 314 | 315 | 316 | LINES_WRITTEN 317 | Y 318 | LINES_WRITTEN 319 | 320 | 321 | LINES_UPDATED 322 | Y 323 | LINES_UPDATED 324 | 325 | 326 | LINES_INPUT 327 | Y 328 | LINES_INPUT 329 | 330 | 331 | LINES_OUTPUT 332 | Y 333 | LINES_OUTPUT 334 | 335 | 336 | LINES_REJECTED 337 | Y 338 | LINES_REJECTED 339 | 340 | 341 | ERRORS 342 | Y 343 | ERRORS 344 | 345 | 346 | LOG_FIELD 347 | N 348 | LOG_FIELD 349 | 350 | 351 | 352 | 353 | 354 |
355 | 356 | 357 | ID_BATCH 358 | Y 359 | ID_BATCH 360 | 361 | 362 | CHANNEL_ID 363 | Y 364 | CHANNEL_ID 365 | 366 | 367 | LOG_DATE 368 | Y 369 | LOG_DATE 370 | 371 | 372 | METRICS_DATE 373 | Y 374 | METRICS_DATE 375 | 376 | 377 | METRICS_CODE 378 | Y 379 | METRICS_CODE 380 | 381 | 382 | METRICS_DESCRIPTION 383 | Y 384 | METRICS_DESCRIPTION 385 | 386 | 387 | METRICS_SUBJECT 388 | Y 389 | METRICS_SUBJECT 390 | 391 | 392 | METRICS_TYPE 393 | Y 394 | METRICS_TYPE 395 | 396 | 397 | METRICS_VALUE 398 | Y 399 | METRICS_VALUE 400 | 401 | 402 | 403 | 404 | 405 |
406 | 407 | 0.0 408 | 0.0 409 | 410 | 10000 411 | 50 412 | 50 413 | N 414 | Y 415 | 50000 416 | Y 417 | 418 | N 419 | 1000 420 | 100 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | - 430 | 2018/07/22 19:15:58.364 431 | - 432 | 2018/07/22 19:15:58.364 433 | 434 | N 435 | 436 | 437 | 438 | 439 | 440 | 441 | Dummy (do nothing) 442 | Dummy 443 | 444 | Y 445 | 446 | 1 447 | 448 | none 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 160 460 | 96 461 | Y 462 | 463 | 464 | 465 | 466 | 467 | 468 | N 469 | 470 | -------------------------------------------------------------------------------- /spoon: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #set -x 3 | 4 | IMAGE=andrespp/pdi 5 | CONTAINER_NAME=spoon 6 | 7 | is_running(){ 8 | # Returns (echo) CONTAINER ID if container exists and is running, and 9 | # zero length string otherwise 10 | id=$(docker ps -q -f status=running -f name=$CONTAINER_NAME) 11 | echo $id 12 | } 13 | 14 | ## main() 15 | 16 | # Check container status 17 | CONTAINER_ID=$(is_running) 18 | 19 | if [ -z $CONTAINER_ID ] ; then 20 | # Container not running, start it 21 | docker run -it --rm -v /tmp/.X11-unix/:/tmp/.X11-unix/:ro \ 22 | -v $(pwd):/jobs \ 23 | -e XAUTH=$(xauth list|grep `uname -n` | cut -d ' ' -f5) -e "DISPLAY" \ 24 | --name $CONTAINER_NAME \ 25 | $IMAGE spoon 26 | else 27 | echo Spoon already running \($CONTAINER_ID\). 28 | fi 29 | --------------------------------------------------------------------------------