├── .dockerignore
├── .gitignore
├── Dockerfile
├── Makefile
├── README.md
├── entrypoint.sh
├── kettle.properties
├── sample
├── dummy.kjb
└── dummy.ktr
└── spoon
/.dockerignore:
--------------------------------------------------------------------------------
1 | data/
2 | sample/
3 | docker-compose.yml
4 | README.md
5 | .gitignore
6 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | data/
2 | .swt/
3 | .java/
4 | .pentaho/
5 | .Xauthority
6 | .ipynb_checkpoints/
7 | pdi-ce-7.1.0.0-12.zip
8 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM openjdk:8
2 | MAINTAINER Andre Pereira andrespp@gmail.com
3 |
4 | # Set Environment Variables
5 | ENV PDI_VERSION=7.1 PDI_BUILD=7.1.0.0-12 \
6 | PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/data-integration \
7 | KETTLE_HOME=/data-integration
8 |
9 | # Download PDI
10 | RUN wget --progress=dot:giga http://downloads.sourceforge.net/project/pentaho/Data%20Integration/${PDI_VERSION}/pdi-ce-${PDI_BUILD}.zip \
11 | && unzip -q *.zip \
12 | && rm -f *.zip \
13 | && mkdir /jobs
14 |
15 | # Aditional Drivers
16 | WORKDIR $KETTLE_HOME
17 |
18 | RUN wget https://downloads.sourceforge.net/project/jtds/jtds/1.3.1/jtds-1.3.1-dist.zip \
19 | && unzip jtds-1.3.1-dist.zip -d lib/ \
20 | && rm jtds-1.3.1-dist.zip \
21 | && wget https://github.com/FirebirdSQL/jaybird/releases/download/v3.0.4/Jaybird-3.0.4-JDK_1.8.zip \
22 | && unzip Jaybird-3.0.4-JDK_1.8.zip -d lib \
23 | && rm -rf lib/docs/ Jaybird-3.0.4-JDK_1.8.zip
24 |
25 | # First time run
26 | RUN pan.sh -file ./plugins/platform-utils-plugin/samples/showPlatformVersion.ktr \
27 | && kitchen.sh -file samples/transformations/files/test-job.kjb
28 |
29 | # Install xauth
30 | RUN apt-get update && apt-get install -y xauth
31 |
32 | #VOLUME /jobs
33 |
34 | COPY entrypoint.sh /
35 | ENTRYPOINT ["/entrypoint.sh"]
36 | CMD ["help"]
37 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | IMAGE=pdi
2 | APP=spoon
3 |
4 | .PHONY: help
5 | help:
6 | @echo "Usage: make [target]"
7 | @echo
8 | @echo "Targets:"
9 | @echo " help\t\tPrint this help"
10 | @echo " test\t\tLookup for docker and docker-compose binaries"
11 | @echo " setup\t\tBuild docker images"
12 | @echo " run [app]\tRun app defined in '\$$APP' (spoon by default)"
13 | @echo ""
14 | @echo "Example: make run APP=spoon"
15 |
16 | .PHONY: test
17 | test:
18 | @which docker
19 | @which docker-compose
20 | @which xauth
21 |
22 | .PHONY: setup
23 | setup: Dockerfile
24 | docker image build -t $(IMAGE) .
25 |
26 | .PHONY: run
27 | run:
28 | @echo $(APP)
29 | docker run -it --rm -v /tmp/.X11-unix/:/tmp/.X11-unix/:ro \
30 | -v $$(pwd):/root/data \
31 | -e XAUTH=$$(xauth list|grep `uname -n` | cut -d ' ' -f5) -e "DISPLAY" \
32 | $(IMAGE) $(APP)
33 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Docker Pentaho Data Integration
2 | ===============================
3 |
4 | # Introduction
5 |
6 | DockerFile for [Pentaho Data Integration](https://sourceforge.net/projects/pentaho/) (a.k.a kettel / PDI)
7 |
8 | This image is intendend to allow execution os PDI transformations and jobs throught command line and run PDI's UI (`Spoon`). PDI server (`Carter`) is available on this image.
9 |
10 | # Quick start
11 |
12 | ## Basic Syntax
13 |
14 | ```
15 | $ docker container run --rm andrespp/pdi
16 |
17 | Usage: /entrypoint.sh COMMAND
18 |
19 | Pentaho Data Integration (PDI)
20 |
21 | Options:
22 | runj filename Run job file
23 | runt filename Run transformation file
24 | spoon Run spoon (GUI)
25 | help Print this help
26 |
27 | ```
28 |
29 | ## Running Transformations
30 |
31 | ```
32 | $ docker container run --rm -v $(pwd):/jobs andrespp/pdi runt sample/dummy.ktr
33 | ```
34 |
35 | ## Running Jobs
36 |
37 | ```
38 | $ docker container run --rm -v $(pwd):/jobs andrespp/pdi runj sample/dummy.kjb
39 | ```
40 |
41 | ## Running Spoon (UI)
42 |
43 | ### Using `docker run`
44 |
45 | ```
46 | $ docker run -it --rm -v /tmp/.X11-unix/:/tmp/.X11-unix/:ro \
47 | -v $(pwd):/jobs \
48 | -e XAUTH=$(xauth list|grep `uname -n` | cut -d ' ' -f5) -e "DISPLAY" \
49 | --name spoon \
50 | andrespp/pdi spoon
51 | ```
52 |
53 | ### Using startup script (Installing)
54 |
55 | In order to run the container as if the application was installed locally, download the `spoon` script to a directory in you $PATH, for example:
56 |
57 | ```bash
58 | $ sudo curl -fsSL https://raw.githubusercontent.com/andrespp/docker-pdi/master/spoon \
59 | -o /usr/local/bin/spoon
60 | $ sudo chmod +x /usr/local/bin/spoon
61 | ```
62 |
63 | Then you'll be able to run JupyterLab in the current directory simply by calling `jlab`:
64 |
65 | ```bash
66 | $ spoon
67 | ```
68 |
69 |
70 | ## Custom `kettle.properties`
71 |
72 | In order to use a custom `kettle.properties`, you need to leave the file available in `/jobs/kettle.properties`.
73 |
74 | ```bash
75 | $ # Custom properties in $(pwd)/kettle.properties
76 | $ docker container run --rm -v $(pwd):/jobs andrespp/pdi runj sample/dummy.kjb
77 | ```
78 |
79 | # Environment variables
80 |
81 | This image uses several environment variables in order to control its behavior, and some of them may be required
82 |
83 | | Environment variable | Default value | Note |
84 | | -------------------- | ------------- | -----|
85 | | PDI\_VERSION | 7.1 | |
86 | | | | |
87 |
88 | # Issues
89 |
90 | If you have any problems with or questions about this image, please contact me
91 | through a [GitHub issue](https://github.com/andrespp/docker-pdi/issues).
92 |
93 |
--------------------------------------------------------------------------------
/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Sets script to fail if any command fails.
4 | set -e
5 |
6 | set_xauth() {
7 | echo xauth add $DISPLAY . $XAUTH
8 | touch /.Xauthority
9 | xauth add $DISPLAY . $XAUTH
10 | }
11 |
12 | custom_properties() {
13 | if [ -f /jobs/kettle.properties ] ; then
14 | cp /jobs/kettle.properties $KETTLE_HOME
15 | fi
16 | }
17 |
18 | run_pan() {
19 | custom_properties
20 | echo ./pan.sh -file $@
21 | pan.sh -file /jobs/$@
22 | }
23 |
24 | run_kitchen() {
25 | custom_properties
26 | echo ./kitchen.sh -file $@
27 | kitchen.sh -file /jobs/$@
28 | }
29 |
30 | run_spoon() {
31 | custom_properties
32 | set_xauth
33 | echo /data-integration/spoon.sh
34 | /data-integration/spoon.sh
35 | }
36 |
37 | print_usage() {
38 | echo "
39 |
40 | Usage: $0 COMMAND
41 |
42 | Pentaho Data Integration (PDI)
43 |
44 | Options:
45 | runj filename Run job file
46 | runt filename Run transformation file
47 | spoon Run spoon (GUI)
48 | help Print this help
49 | "
50 | }
51 |
52 | case "$1" in
53 | help)
54 | print_usage
55 | ;;
56 | runt)
57 | shift 1
58 | run_pan "$@"
59 | ;;
60 | runj)
61 | shift 1
62 | run_kitchen "$@"
63 | ;;
64 | spoon)
65 | run_spoon
66 | ;;
67 | *)
68 | exec "$@"
69 | esac
70 |
--------------------------------------------------------------------------------
/kettle.properties:
--------------------------------------------------------------------------------
1 | # Here are a few examples of variables to set:
2 | #
3 | # PRODUCTION_SERVER = hercules
4 | # TEST_SERVER = zeus
5 | # DEVELOPMENT_SERVER = thor
6 | #
7 | # Note: lines like these with a # in front of it are comments
8 | #
9 |
--------------------------------------------------------------------------------
/sample/dummy.kjb:
--------------------------------------------------------------------------------
1 |
2 |
3 | dummy
4 |
5 |
6 |
7 | 0
8 | /
9 | -
10 | 2018/07/22 19:16:17.847
11 | -
12 | 2018/07/22 19:16:17.847
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | ID_JOB
26 | Y
27 | ID_JOB
28 |
29 |
30 | CHANNEL_ID
31 | Y
32 | CHANNEL_ID
33 |
34 |
35 | JOBNAME
36 | Y
37 | JOBNAME
38 |
39 |
40 | STATUS
41 | Y
42 | STATUS
43 |
44 |
45 | LINES_READ
46 | Y
47 | LINES_READ
48 |
49 |
50 | LINES_WRITTEN
51 | Y
52 | LINES_WRITTEN
53 |
54 |
55 | LINES_UPDATED
56 | Y
57 | LINES_UPDATED
58 |
59 |
60 | LINES_INPUT
61 | Y
62 | LINES_INPUT
63 |
64 |
65 | LINES_OUTPUT
66 | Y
67 | LINES_OUTPUT
68 |
69 |
70 | LINES_REJECTED
71 | Y
72 | LINES_REJECTED
73 |
74 |
75 | ERRORS
76 | Y
77 | ERRORS
78 |
79 |
80 | STARTDATE
81 | Y
82 | STARTDATE
83 |
84 |
85 | ENDDATE
86 | Y
87 | ENDDATE
88 |
89 |
90 | LOGDATE
91 | Y
92 | LOGDATE
93 |
94 |
95 | DEPDATE
96 | Y
97 | DEPDATE
98 |
99 |
100 | REPLAYDATE
101 | Y
102 | REPLAYDATE
103 |
104 |
105 | LOG_FIELD
106 | Y
107 | LOG_FIELD
108 |
109 |
110 | EXECUTING_SERVER
111 | N
112 | EXECUTING_SERVER
113 |
114 |
115 | EXECUTING_USER
116 | N
117 | EXECUTING_USER
118 |
119 |
120 | START_JOB_ENTRY
121 | N
122 | START_JOB_ENTRY
123 |
124 |
125 | CLIENT
126 | N
127 | CLIENT
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 | ID_BATCH
137 | Y
138 | ID_BATCH
139 |
140 |
141 | CHANNEL_ID
142 | Y
143 | CHANNEL_ID
144 |
145 |
146 | LOG_DATE
147 | Y
148 | LOG_DATE
149 |
150 |
151 | JOBNAME
152 | Y
153 | TRANSNAME
154 |
155 |
156 | JOBENTRYNAME
157 | Y
158 | STEPNAME
159 |
160 |
161 | LINES_READ
162 | Y
163 | LINES_READ
164 |
165 |
166 | LINES_WRITTEN
167 | Y
168 | LINES_WRITTEN
169 |
170 |
171 | LINES_UPDATED
172 | Y
173 | LINES_UPDATED
174 |
175 |
176 | LINES_INPUT
177 | Y
178 | LINES_INPUT
179 |
180 |
181 | LINES_OUTPUT
182 | Y
183 | LINES_OUTPUT
184 |
185 |
186 | LINES_REJECTED
187 | Y
188 | LINES_REJECTED
189 |
190 |
191 | ERRORS
192 | Y
193 | ERRORS
194 |
195 |
196 | RESULT
197 | Y
198 | RESULT
199 |
200 |
201 | NR_RESULT_ROWS
202 | Y
203 | NR_RESULT_ROWS
204 |
205 |
206 | NR_RESULT_FILES
207 | Y
208 | NR_RESULT_FILES
209 |
210 |
211 | LOG_FIELD
212 | N
213 | LOG_FIELD
214 |
215 |
216 | COPY_NR
217 | N
218 | COPY_NR
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 | ID_BATCH
228 | Y
229 | ID_BATCH
230 |
231 |
232 | CHANNEL_ID
233 | Y
234 | CHANNEL_ID
235 |
236 |
237 | LOG_DATE
238 | Y
239 | LOG_DATE
240 |
241 |
242 | LOGGING_OBJECT_TYPE
243 | Y
244 | LOGGING_OBJECT_TYPE
245 |
246 |
247 | OBJECT_NAME
248 | Y
249 | OBJECT_NAME
250 |
251 |
252 | OBJECT_COPY
253 | Y
254 | OBJECT_COPY
255 |
256 |
257 | REPOSITORY_DIRECTORY
258 | Y
259 | REPOSITORY_DIRECTORY
260 |
261 |
262 | FILENAME
263 | Y
264 | FILENAME
265 |
266 |
267 | OBJECT_ID
268 | Y
269 | OBJECT_ID
270 |
271 |
272 | OBJECT_REVISION
273 | Y
274 | OBJECT_REVISION
275 |
276 |
277 | PARENT_CHANNEL_ID
278 | Y
279 | PARENT_CHANNEL_ID
280 |
281 |
282 | ROOT_CHANNEL_ID
283 | Y
284 | ROOT_CHANNEL_ID
285 |
286 |
287 | N
288 |
289 |
290 |
291 | DUMMY
292 |
293 | SPECIAL
294 | N
295 | Y
296 | N
297 | 0
298 | 0
299 | 60
300 | 12
301 | 0
302 | 1
303 | 1
304 | N
305 | Y
306 | 0
307 | 208
308 | 48
309 |
310 |
311 | START
312 |
313 | SPECIAL
314 | Y
315 | N
316 | N
317 | 0
318 | 0
319 | 60
320 | 12
321 | 0
322 | 1
323 | 1
324 | N
325 | Y
326 | 0
327 | 32
328 | 48
329 |
330 |
331 |
332 |
333 | START
334 | DUMMY
335 | 0
336 | 0
337 | Y
338 | Y
339 | Y
340 |
341 |
342 |
343 |
344 |
345 |
--------------------------------------------------------------------------------
/sample/dummy.ktr:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | dummy
5 |
6 |
7 |
8 | Normal
9 | /
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | ID_BATCH
22 | Y
23 | ID_BATCH
24 |
25 |
26 | CHANNEL_ID
27 | Y
28 | CHANNEL_ID
29 |
30 |
31 | TRANSNAME
32 | Y
33 | TRANSNAME
34 |
35 |
36 | STATUS
37 | Y
38 | STATUS
39 |
40 |
41 | LINES_READ
42 | Y
43 | LINES_READ
44 |
45 |
46 |
47 | LINES_WRITTEN
48 | Y
49 | LINES_WRITTEN
50 |
51 |
52 |
53 | LINES_UPDATED
54 | Y
55 | LINES_UPDATED
56 |
57 |
58 |
59 | LINES_INPUT
60 | Y
61 | LINES_INPUT
62 |
63 |
64 |
65 | LINES_OUTPUT
66 | Y
67 | LINES_OUTPUT
68 |
69 |
70 |
71 | LINES_REJECTED
72 | Y
73 | LINES_REJECTED
74 |
75 |
76 |
77 | ERRORS
78 | Y
79 | ERRORS
80 |
81 |
82 | STARTDATE
83 | Y
84 | STARTDATE
85 |
86 |
87 | ENDDATE
88 | Y
89 | ENDDATE
90 |
91 |
92 | LOGDATE
93 | Y
94 | LOGDATE
95 |
96 |
97 | DEPDATE
98 | Y
99 | DEPDATE
100 |
101 |
102 | REPLAYDATE
103 | Y
104 | REPLAYDATE
105 |
106 |
107 | LOG_FIELD
108 | Y
109 | LOG_FIELD
110 |
111 |
112 | EXECUTING_SERVER
113 | N
114 | EXECUTING_SERVER
115 |
116 |
117 | EXECUTING_USER
118 | N
119 | EXECUTING_USER
120 |
121 |
122 | CLIENT
123 | N
124 | CLIENT
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 | ID_BATCH
135 | Y
136 | ID_BATCH
137 |
138 |
139 | SEQ_NR
140 | Y
141 | SEQ_NR
142 |
143 |
144 | LOGDATE
145 | Y
146 | LOGDATE
147 |
148 |
149 | TRANSNAME
150 | Y
151 | TRANSNAME
152 |
153 |
154 | STEPNAME
155 | Y
156 | STEPNAME
157 |
158 |
159 | STEP_COPY
160 | Y
161 | STEP_COPY
162 |
163 |
164 | LINES_READ
165 | Y
166 | LINES_READ
167 |
168 |
169 | LINES_WRITTEN
170 | Y
171 | LINES_WRITTEN
172 |
173 |
174 | LINES_UPDATED
175 | Y
176 | LINES_UPDATED
177 |
178 |
179 | LINES_INPUT
180 | Y
181 | LINES_INPUT
182 |
183 |
184 | LINES_OUTPUT
185 | Y
186 | LINES_OUTPUT
187 |
188 |
189 | LINES_REJECTED
190 | Y
191 | LINES_REJECTED
192 |
193 |
194 | ERRORS
195 | Y
196 | ERRORS
197 |
198 |
199 | INPUT_BUFFER_ROWS
200 | Y
201 | INPUT_BUFFER_ROWS
202 |
203 |
204 | OUTPUT_BUFFER_ROWS
205 | Y
206 | OUTPUT_BUFFER_ROWS
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 | ID_BATCH
216 | Y
217 | ID_BATCH
218 |
219 |
220 | CHANNEL_ID
221 | Y
222 | CHANNEL_ID
223 |
224 |
225 | LOG_DATE
226 | Y
227 | LOG_DATE
228 |
229 |
230 | LOGGING_OBJECT_TYPE
231 | Y
232 | LOGGING_OBJECT_TYPE
233 |
234 |
235 | OBJECT_NAME
236 | Y
237 | OBJECT_NAME
238 |
239 |
240 | OBJECT_COPY
241 | Y
242 | OBJECT_COPY
243 |
244 |
245 | REPOSITORY_DIRECTORY
246 | Y
247 | REPOSITORY_DIRECTORY
248 |
249 |
250 | FILENAME
251 | Y
252 | FILENAME
253 |
254 |
255 | OBJECT_ID
256 | Y
257 | OBJECT_ID
258 |
259 |
260 | OBJECT_REVISION
261 | Y
262 | OBJECT_REVISION
263 |
264 |
265 | PARENT_CHANNEL_ID
266 | Y
267 | PARENT_CHANNEL_ID
268 |
269 |
270 | ROOT_CHANNEL_ID
271 | Y
272 | ROOT_CHANNEL_ID
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 | ID_BATCH
282 | Y
283 | ID_BATCH
284 |
285 |
286 | CHANNEL_ID
287 | Y
288 | CHANNEL_ID
289 |
290 |
291 | LOG_DATE
292 | Y
293 | LOG_DATE
294 |
295 |
296 | TRANSNAME
297 | Y
298 | TRANSNAME
299 |
300 |
301 | STEPNAME
302 | Y
303 | STEPNAME
304 |
305 |
306 | STEP_COPY
307 | Y
308 | STEP_COPY
309 |
310 |
311 | LINES_READ
312 | Y
313 | LINES_READ
314 |
315 |
316 | LINES_WRITTEN
317 | Y
318 | LINES_WRITTEN
319 |
320 |
321 | LINES_UPDATED
322 | Y
323 | LINES_UPDATED
324 |
325 |
326 | LINES_INPUT
327 | Y
328 | LINES_INPUT
329 |
330 |
331 | LINES_OUTPUT
332 | Y
333 | LINES_OUTPUT
334 |
335 |
336 | LINES_REJECTED
337 | Y
338 | LINES_REJECTED
339 |
340 |
341 | ERRORS
342 | Y
343 | ERRORS
344 |
345 |
346 | LOG_FIELD
347 | N
348 | LOG_FIELD
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 | ID_BATCH
358 | Y
359 | ID_BATCH
360 |
361 |
362 | CHANNEL_ID
363 | Y
364 | CHANNEL_ID
365 |
366 |
367 | LOG_DATE
368 | Y
369 | LOG_DATE
370 |
371 |
372 | METRICS_DATE
373 | Y
374 | METRICS_DATE
375 |
376 |
377 | METRICS_CODE
378 | Y
379 | METRICS_CODE
380 |
381 |
382 | METRICS_DESCRIPTION
383 | Y
384 | METRICS_DESCRIPTION
385 |
386 |
387 | METRICS_SUBJECT
388 | Y
389 | METRICS_SUBJECT
390 |
391 |
392 | METRICS_TYPE
393 | Y
394 | METRICS_TYPE
395 |
396 |
397 | METRICS_VALUE
398 | Y
399 | METRICS_VALUE
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 | 0.0
408 | 0.0
409 |
410 | 10000
411 | 50
412 | 50
413 | N
414 | Y
415 | 50000
416 | Y
417 |
418 | N
419 | 1000
420 | 100
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 | -
430 | 2018/07/22 19:15:58.364
431 | -
432 | 2018/07/22 19:15:58.364
433 |
434 | N
435 |
436 |
437 |
438 |
439 |
440 |
441 | Dummy (do nothing)
442 | Dummy
443 |
444 | Y
445 |
446 | 1
447 |
448 | none
449 |
450 |
451 |
452 |
453 |
454 |
455 |
457 |
458 |
459 | 160
460 | 96
461 | Y
462 |
463 |
464 |
465 |
466 |
467 |
468 | N
469 |
470 |
--------------------------------------------------------------------------------
/spoon:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #set -x
3 |
4 | IMAGE=andrespp/pdi
5 | CONTAINER_NAME=spoon
6 |
7 | is_running(){
8 | # Returns (echo) CONTAINER ID if container exists and is running, and
9 | # zero length string otherwise
10 | id=$(docker ps -q -f status=running -f name=$CONTAINER_NAME)
11 | echo $id
12 | }
13 |
14 | ## main()
15 |
16 | # Check container status
17 | CONTAINER_ID=$(is_running)
18 |
19 | if [ -z $CONTAINER_ID ] ; then
20 | # Container not running, start it
21 | docker run -it --rm -v /tmp/.X11-unix/:/tmp/.X11-unix/:ro \
22 | -v $(pwd):/jobs \
23 | -e XAUTH=$(xauth list|grep `uname -n` | cut -d ' ' -f5) -e "DISPLAY" \
24 | --name $CONTAINER_NAME \
25 | $IMAGE spoon
26 | else
27 | echo Spoon already running \($CONTAINER_ID\).
28 | fi
29 |
--------------------------------------------------------------------------------