├── .gitignore
├── .pre-commit-config.yaml
├── .secrets.baseline
├── LICENSE
├── README.md
├── RemoteDataPlane
├── README.md
└── deploy_operator.sh
├── RemoteEngine
├── LICENSE
├── README.md
├── docker
│ ├── README.md
│ └── dsengine.sh
└── kubernetes
│ ├── README.md
│ └── launch.sh
├── Resources
└── datastage.png
├── dsjob
├── Readme.md
├── blogs
│ ├── GitIntegration.md
│ ├── ParamSet.md
│ ├── SkipOnReplace.md
│ ├── StoragePath.md
│ ├── StoragePath.mov
│ ├── conn.zip
│ ├── export-import.md
│ ├── file1
│ ├── file2
│ ├── gitapi.png
│ ├── gitbulkcommit.png
│ ├── gitbulkcommit2.png
│ ├── gitbulkcommit3.png
│ ├── gitcommitpr.png
│ ├── gitcommitstatus.png
│ ├── gitconfiguration.png
│ ├── gitcontextcommit.png
│ ├── gitpull.png
│ ├── gitpull2.png
│ ├── gitpull3.png
│ ├── gitrepo.png
│ ├── gitrepo2.png
│ ├── gitstatus.png
│ ├── migrateConnection.md
│ ├── paramset.zip
│ └── sequencer.md
├── changelog.md
├── dsjob.4.6.2.md
├── dsjob.4.6.4.md
├── dsjob.4.6.6.md
├── dsjob.4.7.0.md
├── dsjob.4.7.1.md
├── dsjob.4.7.2.md
├── dsjob.4.7.3.md
├── dsjob.4.7.4.md
├── dsjob.4.8.0.md
├── dsjob.4.8.1.md
├── dsjob.4.8.2.md
├── dsjob.4.8.3.md
├── dsjob.4.8.4.md
├── dsjob.4.8.5.md
├── dsjob.5.0.0.md
├── dsjob.5.0.1.md
├── dsjob.5.0.2.md
├── dsjob.5.0.3.md
├── dsjob.5.1.0.md
├── dsjob.5.1.1.md
├── dsjob.5.1.2.md
├── dsjob.5.1.3.md
├── export-import.md
└── incoming-changes.md
└── utils
└── getstacks
├── README.md
└── getstacks.tar.gz
/.gitignore:
--------------------------------------------------------------------------------
1 | *.env
2 |
3 | #docker
4 | docker/*.env
5 | docker/*.json
6 |
7 | # Logs
8 | *.log
9 | logs
10 | npm-debug.log*
11 |
12 | # Runtime data
13 | *.pid
14 | *.seed
15 | pids
16 |
17 | .history
18 |
19 | # Directory for instrumented libs generated by jscoverage/JSCover
20 | lib-cov
21 |
22 | # Coverage directory used by tools like istanbul
23 | coverage
24 |
25 | # nyc test coverage
26 | .nyc_output
27 |
28 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
29 | .grunt
30 |
31 | # node-waf configuration
32 | .lock-wscript
33 |
34 | # Compiled binary addons (http://nodejs.org/api/addons.html)
35 | .build
36 | build/
37 | build/Release
38 |
39 | # Local Config
40 | /config/app-local-cloud.json
41 | /config/app-local.json
42 | /config/keys/*.pem
43 | /config/runtime.json
44 | config/app-local-icp4d.json
45 | config/app-local-ugi.json
46 | config/app-local.json
47 |
48 | # Dependency directories
49 | jspm_packages
50 | node_modules
51 |
52 | # Optional npm cache directory
53 | .npm
54 |
55 | # Optional REPL history
56 | !/scripts/build
57 | .DS_Store
58 | .cache
59 | .cache-main
60 | .cache-tests
61 | .classpath
62 | .gradle
63 | .idea
64 | .node_repl_history
65 | .project
66 | .settings
67 | .tmpBin
68 | .vscode
69 | /.build
70 | /.idea
71 | /.npmrc
72 | /.sass-cache
73 | /__test__reports__
74 | /boilerplate-node-ui-react.sublime-workspace
75 | /coverage
76 | /package-lock.json
77 | /portal-datalake.sublime-workspace
78 | /temp
79 | _sprites.scss
80 | bin/
81 | fingerprints*.json
82 | fingerprints-en-webpack.json
83 | i18n/messages_en.json
84 | local-deploy.json
85 | package-lock.json
86 | target/
87 | test-report.xml
88 |
89 | # Ignore Gradle GUI config
90 | gradle-app.setting
91 | # Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
92 | !gradle-wrapper.jar
93 | # Cache of project
94 | .gradletasknamecache
95 | # # Work around https://youtrack.jetbrains.com/issue/IDEA-116898
96 | # gradle/wrapper/gradle-wrapper.properties
97 |
98 | /app_names_routes.txt
99 | /k8s/Secrets.json
100 | /k8s/Secrets.yaml
101 | /k8s/Secrets_ypqa.yaml
102 | /k8s/build
103 | /k8s/fixDeploy.sh
104 | /k8s/updateSecrets.sh
105 |
106 | #Ignore version.json file created at time of Docker build
107 | version.json
108 | cpd-dev.sh
109 |
110 | # docker history scan
111 | docker-history.txt
112 | .docker.secrets.local
113 | diff_file.txt
114 | tmpfile1.txt
115 | tmpfile2.txt
116 |
117 | # env files
118 | /templates/*.env
119 | /tempates/*.env.bak
120 | /tempates/*.bak
121 | /templates/*.json
122 | /tempates/*.json.bak
123 |
124 |
125 | execute_*.sh
126 | input-file-*.txt
127 | loop.sh
128 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # This is an example configuration to enable detect-secrets in the pre-commit hook.
2 | # Add this file to the root folder of your repository.
3 | #
4 | # Read pre-commit hook framework https://pre-commit.com/ for more details about the structure of config yaml file and how git pre-commit would invoke each hook.
5 | #
6 | # This line indicates we will use the hook from ibm/detect-secrets to run scan during committing phase.
7 | # Whitewater/whitewater-detect-secrets would sync code to ibm/detect-secrets upon merge.
8 | repos:
9 | - repo: https://github.com/ibm/detect-secrets
10 | # If you desire to use a specific version of detect-secrets, you can replace `master` with other git revisions such as branch, tag or commit sha.
11 | # You are encouraged to use static refs such as tags, instead of branch name
12 | #
13 | # Running "pre-commit autoupdate" would automatically updates rev to latest tag
14 | rev: 0.13.1+ibm.60.dss
15 | hooks:
16 | - id: detect-secrets # pragma: whitelist secret
17 | # Add options for detect-secrets-hook binary. You can run `detect-secrets-hook --help` to list out all possible options.
18 | # You may also run `pre-commit run detect-secrets` to preview the scan result.
19 | # when "--baseline" without "--use-all-plugins", pre-commit scan with just plugins in baseline file
20 | # when "--baseline" with "--use-all-plugins", pre-commit scan with all available plugins
21 | # add "--fail-on-non-audited" to fail pre-commit for unaudited potential secrets
22 | args: [--baseline, .secrets.baseline, --use-all-plugins ]
23 |
--------------------------------------------------------------------------------
/.secrets.baseline:
--------------------------------------------------------------------------------
1 | {
2 | "exclude": {
3 | "files": "^.secrets.baseline$",
4 | "lines": null
5 | },
6 | "generated_at": "2024-07-15T23:41:46Z",
7 | "plugins_used": [
8 | {
9 | "name": "AWSKeyDetector"
10 | },
11 | {
12 | "name": "ArtifactoryDetector"
13 | },
14 | {
15 | "name": "AzureStorageKeyDetector"
16 | },
17 | {
18 | "base64_limit": 4.5,
19 | "name": "Base64HighEntropyString"
20 | },
21 | {
22 | "name": "BasicAuthDetector"
23 | },
24 | {
25 | "name": "BoxDetector"
26 | },
27 | {
28 | "name": "CloudantDetector"
29 | },
30 | {
31 | "ghe_instance": "github.ibm.com",
32 | "name": "GheDetector"
33 | },
34 | {
35 | "name": "GitHubTokenDetector"
36 | },
37 | {
38 | "hex_limit": 3,
39 | "name": "HexHighEntropyString"
40 | },
41 | {
42 | "name": "IbmCloudIamDetector"
43 | },
44 | {
45 | "name": "IbmCosHmacDetector"
46 | },
47 | {
48 | "name": "JwtTokenDetector"
49 | },
50 | {
51 | "keyword_exclude": null,
52 | "name": "KeywordDetector"
53 | },
54 | {
55 | "name": "MailchimpDetector"
56 | },
57 | {
58 | "name": "NpmDetector"
59 | },
60 | {
61 | "name": "PrivateKeyDetector"
62 | },
63 | {
64 | "name": "SlackDetector"
65 | },
66 | {
67 | "name": "SoftlayerDetector"
68 | },
69 | {
70 | "name": "SquareOAuthDetector"
71 | },
72 | {
73 | "name": "StripeDetector"
74 | },
75 | {
76 | "name": "TwilioKeyDetector"
77 | }
78 | ],
79 | "results": {
80 | "RemoteEngine/docker/dsengine.sh": [
81 | {
82 | "hashed_secret": "bb589d0621e5472f470fa3425a234c74b1e202e8",
83 | "is_secret": false,
84 | "is_verified": false,
85 | "line_number": 62,
86 | "type": "Secret Keyword",
87 | "verified_result": null
88 | },
89 | {
90 | "hashed_secret": "1c2b0d17c738509518ecc6efa233ee6c10e724f2",
91 | "is_secret": false,
92 | "is_verified": false,
93 | "line_number": 75,
94 | "type": "Basic Auth Credentials",
95 | "verified_result": null
96 | },
97 | {
98 | "hashed_secret": "2ed27655a74e3ca714894fe3c62bed0a21ac3b47",
99 | "is_secret": false,
100 | "is_verified": false,
101 | "line_number": 916,
102 | "type": "Secret Keyword",
103 | "verified_result": null
104 | },
105 | {
106 | "hashed_secret": "1be9d971d8e1386599bbf14604affce386b4763a",
107 | "is_secret": false,
108 | "is_verified": false,
109 | "line_number": 1664,
110 | "type": "Secret Keyword",
111 | "verified_result": null
112 | }
113 | ],
114 | "RemoteEngine/kubernetes/README.md": [
115 | {
116 | "hashed_secret": "e25ae4043adfb28a7d1d9ef353e5bf40210163b1",
117 | "is_secret": false,
118 | "is_verified": false,
119 | "line_number": 99,
120 | "type": "Secret Keyword",
121 | "verified_result": null
122 | },
123 | {
124 | "hashed_secret": "4711f50a26d6945585138f0b3104b2fd45f15585",
125 | "is_secret": false,
126 | "is_verified": false,
127 | "line_number": 102,
128 | "type": "Secret Keyword",
129 | "verified_result": null
130 | }
131 | ],
132 | "RemoteEngine/kubernetes/launch.sh": [
133 | {
134 | "hashed_secret": "226c9e7c2c21dabbe187f357a1e7434650c59bc6",
135 | "is_secret": false,
136 | "is_verified": false,
137 | "line_number": 24,
138 | "type": "Secret Keyword",
139 | "verified_result": null
140 | },
141 | {
142 | "hashed_secret": "890fd9944e838c997f832023ca59eacf30a31191",
143 | "is_secret": false,
144 | "is_verified": false,
145 | "line_number": 25,
146 | "type": "Secret Keyword",
147 | "verified_result": null
148 | },
149 | {
150 | "hashed_secret": "bbccdf2efb33b52e6c9d0a14dd70b2d415fbea6e",
151 | "is_secret": false,
152 | "is_verified": false,
153 | "line_number": 661,
154 | "type": "Secret Keyword",
155 | "verified_result": null
156 | },
157 | {
158 | "hashed_secret": "c2df5d3d760ff42f33fb38e2534d4c1b7ddde3ab",
159 | "is_secret": false,
160 | "is_verified": false,
161 | "line_number": 719,
162 | "type": "Secret Keyword",
163 | "verified_result": null
164 | },
165 | {
166 | "hashed_secret": "3f81e91d69a8a61ffbf19297eb0791ad54ce5690",
167 | "is_secret": false,
168 | "is_verified": false,
169 | "line_number": 782,
170 | "type": "Secret Keyword",
171 | "verified_result": null
172 | },
173 | {
174 | "hashed_secret": "d394018477f2d57205d31c3a1c89c83c3d4cdae1",
175 | "is_secret": false,
176 | "is_verified": false,
177 | "line_number": 789,
178 | "type": "Secret Keyword",
179 | "verified_result": null
180 | },
181 | {
182 | "hashed_secret": "b41e7e6d5bc9c3ea603575859ef1ace590c9dd15",
183 | "is_secret": false,
184 | "is_verified": false,
185 | "line_number": 805,
186 | "type": "Secret Keyword",
187 | "verified_result": null
188 | }
189 | ],
190 | "dsjob/blogs/SkipOnReplace.md": [
191 | {
192 | "hashed_secret": "829c3804401b0727f70f73d4415e162400cbe57b",
193 | "is_secret": false,
194 | "is_verified": false,
195 | "line_number": 397,
196 | "type": "Secret Keyword",
197 | "verified_result": null
198 | }
199 | ],
200 | "dsjob/dsjob.4.6.2.md": [
201 | {
202 | "hashed_secret": "0d665c0bf6fcb516c8087552f2460572afd7ada7",
203 | "is_secret": false,
204 | "is_verified": false,
205 | "line_number": 55,
206 | "type": "Secret Keyword",
207 | "verified_result": null
208 | }
209 | ],
210 | "dsjob/dsjob.4.6.4.md": [
211 | {
212 | "hashed_secret": "0d665c0bf6fcb516c8087552f2460572afd7ada7",
213 | "is_secret": false,
214 | "is_verified": false,
215 | "line_number": 56,
216 | "type": "Secret Keyword",
217 | "verified_result": null
218 | }
219 | ],
220 | "dsjob/dsjob.4.6.6.md": [
221 | {
222 | "hashed_secret": "0d665c0bf6fcb516c8087552f2460572afd7ada7",
223 | "is_secret": false,
224 | "is_verified": false,
225 | "line_number": 58,
226 | "type": "Secret Keyword",
227 | "verified_result": null
228 | }
229 | ],
230 | "dsjob/dsjob.4.7.0.md": [
231 | {
232 | "hashed_secret": "0d665c0bf6fcb516c8087552f2460572afd7ada7",
233 | "is_secret": false,
234 | "is_verified": false,
235 | "line_number": 62,
236 | "type": "Secret Keyword",
237 | "verified_result": null
238 | }
239 | ],
240 | "dsjob/dsjob.4.7.1.md": [
241 | {
242 | "hashed_secret": "0d665c0bf6fcb516c8087552f2460572afd7ada7",
243 | "is_secret": false,
244 | "is_verified": false,
245 | "line_number": 62,
246 | "type": "Secret Keyword",
247 | "verified_result": null
248 | }
249 | ],
250 | "dsjob/dsjob.4.7.2.md": [
251 | {
252 | "hashed_secret": "0d665c0bf6fcb516c8087552f2460572afd7ada7",
253 | "is_secret": false,
254 | "is_verified": false,
255 | "line_number": 62,
256 | "type": "Secret Keyword",
257 | "verified_result": null
258 | }
259 | ],
260 | "dsjob/dsjob.4.7.3.md": [
261 | {
262 | "hashed_secret": "0d665c0bf6fcb516c8087552f2460572afd7ada7",
263 | "is_secret": false,
264 | "is_verified": false,
265 | "line_number": 62,
266 | "type": "Secret Keyword",
267 | "verified_result": null
268 | }
269 | ],
270 | "dsjob/dsjob.4.7.4.md": [
271 | {
272 | "hashed_secret": "0d665c0bf6fcb516c8087552f2460572afd7ada7",
273 | "is_secret": false,
274 | "is_verified": false,
275 | "line_number": 62,
276 | "type": "Secret Keyword",
277 | "verified_result": null
278 | }
279 | ],
280 | "dsjob/dsjob.4.8.0.md": [
281 | {
282 | "hashed_secret": "0d665c0bf6fcb516c8087552f2460572afd7ada7",
283 | "is_secret": false,
284 | "is_verified": false,
285 | "line_number": 65,
286 | "type": "Secret Keyword",
287 | "verified_result": null
288 | }
289 | ],
290 | "dsjob/dsjob.4.8.1.md": [
291 | {
292 | "hashed_secret": "0d665c0bf6fcb516c8087552f2460572afd7ada7",
293 | "is_secret": false,
294 | "is_verified": false,
295 | "line_number": 65,
296 | "type": "Secret Keyword",
297 | "verified_result": null
298 | }
299 | ],
300 | "dsjob/dsjob.4.8.2.md": [
301 | {
302 | "hashed_secret": "0d665c0bf6fcb516c8087552f2460572afd7ada7",
303 | "is_secret": false,
304 | "is_verified": false,
305 | "line_number": 65,
306 | "type": "Secret Keyword",
307 | "verified_result": null
308 | }
309 | ],
310 | "dsjob/dsjob.4.8.3.md": [
311 | {
312 | "hashed_secret": "d1da57683505716a1a8716658c4432742355360a",
313 | "is_secret": false,
314 | "is_verified": false,
315 | "line_number": 65,
316 | "type": "Secret Keyword",
317 | "verified_result": null
318 | }
319 | ],
320 | "dsjob/dsjob.4.8.4.md": [
321 | {
322 | "hashed_secret": "d1da57683505716a1a8716658c4432742355360a",
323 | "is_secret": false,
324 | "is_verified": false,
325 | "line_number": 65,
326 | "type": "Secret Keyword",
327 | "verified_result": null
328 | }
329 | ],
330 | "dsjob/dsjob.4.8.5.md": [
331 | {
332 | "hashed_secret": "d1da57683505716a1a8716658c4432742355360a",
333 | "is_secret": false,
334 | "is_verified": false,
335 | "line_number": 67,
336 | "type": "Secret Keyword",
337 | "verified_result": null
338 | }
339 | ],
340 | "dsjob/dsjob.5.0.0.md": [
341 | {
342 | "hashed_secret": "d1da57683505716a1a8716658c4432742355360a",
343 | "is_secret": false,
344 | "is_verified": false,
345 | "line_number": 67,
346 | "type": "Secret Keyword",
347 | "verified_result": null
348 | }
349 | ]
350 | },
351 | "version": "0.13.1+ibm.60.dss",
352 | "word_list": {
353 | "file": null,
354 | "hash": null
355 | }
356 | }
357 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DataStage
2 |
3 |
4 |
5 | **DataStage** is a data integration tool that allows users to seamlessly connect to source data wherever it resides, transform the data with a robust selection of pre-built, no-code stages, and load the processed data into a target location.
6 |
7 | * Learn more about DataStage on IBM Cloud at https://cloud.ibm.com/services/datastage
8 |
9 | * Learn more about DataStage on IBM Cloud Pak for Data at https://www.ibm.com/docs/en/cloud-paks/cp-data/5.1.x?topic=data-transforming-datastage
10 |
11 | This repository contains samples, scripts and various resources to use with DataStage.
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/RemoteDataPlane/README.md:
--------------------------------------------------------------------------------
1 | # DataStage PXRuntime on Remote Data Plane
2 |
3 | To support deploying DataStage PXRuntime on a remote data plane, the DataStage operator needs to be deployed to the management namespace of the physical location associated with the remote data plane.
4 |
5 | ## Requirements
6 |
7 | - Deploy the physical location and associate it with a [remote data plane](https://www.ibm.com/docs/en/cloud-paks/cp-data/5.0.x?topic=instances-deploying-remote-data-plane)
8 |
9 | - Configure the [global pull secret](https://www.ibm.com/docs/en/cloud-paks/cp-data/5.0.x?topic=cluster-updating-global-image-pull-secret)
10 |
11 | Note: If using a private registry, an [image content source policy](https://www.ibm.com/docs/en/cloud-paks/cp-data/5.0.x?topic=registry-configuring-image-content-source-policy) will need to be configured. [Image mirroring](https://www.ibm.com/docs/en/cloud-paks/cp-data/5.0.x?topic=registry-mirroring-images-directly-private-container) will also be needed if the DataStage images has not been mirrored to this private registry.
12 |
13 | ## Deploying the DataStage operator
14 |
15 | To deploy the operator on your physical location, login to the cluster via `oc` with cluster-admin role and run the command below. The latest operator will be deploy when version is omitted.
16 |
17 | ```
18 | ./deploy_operator.sh --namespace
19 | ```
20 |
21 | # Using PXRuntime on a Remote Data Plane
22 | To use a PXRuntime instance on a remote data plane with a project, a runtime environment must be created for that instance and that runtime environment must be selected as the project default. All resources needed at runtime will be created on the PXRuntime instance. As a result, the jobs in this project may not run on other PXRuntime instances.
23 |
24 | Create a PXRuntime instance on the remote data plane:
25 | 1. On the `Instance details` page of the `New service instance` wizard for DataStace PX Runtime, select `Data Plane` instead of `Namespace`
26 | 2. Select the data plane with the physical location where the DataStage operator has been deployed
27 |
28 | Note: Since this is not the default instance, only users that has been granted access to this instance will be able to run DataStage jobs on it.
29 |
30 | Creating runtime environment:
31 | 1. From the project's `Manage` tab, select `Environments`
32 | 2. On the Environments page, select the `Templates` tab and click on `New template`
33 | 3. On the `New environment` dialog, select `DataStage` as the type and select the PXRuntime instance from the remote data plane for the hardware configuration.
34 |
35 | Setting project default:
36 | 1. From the project's `Manage` tab, select `DataStage`
37 | 2. Select the runtime environment created previously as the default
--------------------------------------------------------------------------------
/RemoteDataPlane/deploy_operator.sh:
--------------------------------------------------------------------------------
1 | OPERATOR_REGISTRY="icr.io/cpopen"
2 | OPERATOR_DIGEST="sha256:c0af884eca4c68411f53a00dfb4bd486092c99977e161ef47ac1ed0602fb5e20"
3 | kubernetesCLI="oc"
4 |
5 | supportedVersions="5.0.0 5.0.1 5.0.2 5.0.3 5.1.0 5.1.1 5.1.2 5.1.3 5.2.0"
6 | assetVersions="500 501 502 503 510 511 512 513 520"
7 | imageDigests="sha256:c0af884eca4c68411f53a00dfb4bd486092c99977e161ef47ac1ed0602fb5e20 sha256:e21e3503e6f7e212109d104a4ef5a505ee0ca172d77eda9f65174bb104b8be07 sha256:c2c27cf0826e1f96aa523ec68374680ce1f7f8f4cc5512c28962933b22aabbfe sha256:0797ab7ed8d6c9aa644a6ca9468b279248d8deaf8afdf905464d44f4dd1824c3 sha256:07327f8ce59d24990a00b45ea1b2393b64b1d299130372855b9de4ed69e608e2 sha256:be24dd5fb73e40177810a0ff71ee885ddf0883ab3f8b790a6620a705848406c5 sha256:f6c7e12cd8d0cd981becb0f5f9abb6b1d833a10beb71a00d33e270d2f7fa2da8 sha256:4a53892a469c6b9b751a4cc2449378bfb0b15bfe1f3c0dd5056eeaf1587c82a4 sha256:06d91aac99dee5359ad21cc004c7f8f5999da1845c0a5dbdfbcab9b921a2f797"
8 | version="5.0.0"
9 |
10 | verify_args() {
11 | # check if oc cli available
12 | which oc > /dev/null
13 | if [ $? -ne 0 ]; then
14 | echo "Unable to locate oc cli"
15 | exit 3
16 | fi
17 |
18 | # check if the specified namespace exists and is a management namespace
19 | oc get namespace $namespace &> /dev/null
20 | if [ $? -ne 0 ]; then
21 | echo "Namespace $namespace not found."
22 | exit 3
23 | fi
24 | oc -n $namespace get cm physical-location-info-cm &> /dev/null
25 | if [ $? -ne 0 ]; then
26 | echo "The specified namespace $namespace is not a management namespace. Unable to locate the configmap physical-location-info-cm."
27 | exit 3
28 | fi
29 |
30 | # TODO set digest based on version in subsequent release
31 | if [[ ! $supportedVersions =~ (^|[[:space:]])$version($|[[:space:]]) ]]; then
32 | echo "Unsupported version ${version}. Supported versions: ${supportedVersions}"
33 | exit 3
34 | fi
35 | }
36 |
37 | check_version() {
38 | if [ -z $skipVersionCheck ]; then
39 | hub_url=`oc -n $namespace get cm physical-location-info-cm -o jsonpath='{.data.CPD_HUB_URL}'`
40 | if [ -z $hub_url ]; then
41 | echo "Unable to retrieve version from control plane. Defaulting version to ${version}".
42 | return 0
43 | fi
44 | asset_version=`curl -ks https://${hub_url}/data_intg/v3/assets/version`
45 |
46 | versionsArray=(${supportedVersions})
47 | assetVersionsArray=(${assetVersions})
48 | digestsArray=(${imageDigests})
49 |
50 | if [ ${#versionsArray[@]} -ne ${#assetVersionsArray[@]} ]; then
51 | echo "Mismatch size for '${supportedVersions}' and '${assetVersions}'"
52 | exit 1
53 | fi
54 | arraylength=${#versionsArray[@]}
55 |
56 | for (( i=0; i<${arraylength}; i++ ));
57 | do
58 | assetVersion="${assetVersionsArray[$i]}\.[0-9]+\.[0-9]+"
59 | echo "${asset_version}" | grep -E "${assetVersion}" &> /dev/null
60 | if [[ $? -eq 0 ]]; then
61 | version="${versionsArray[$i]}"
62 | OPERATOR_DIGEST="${digestsArray[$i]}"
63 | echo "Version determined from control plane: $version"
64 | echo "OPERATOR_DIGEST: ${OPERATOR_DIGEST}"
65 | break;
66 | fi
67 | done
68 | else
69 | versionsArray=(${supportedVersions})
70 | digestsArray=(${imageDigests})
71 | for (( i=0; i<${arraylength}; i++ ));
72 | do
73 | ventry=${versionsArray[$i]}
74 | if [ "$ventry" == "$version" ]; then
75 | OPERATOR_DIGEST="${digestsArray[$i]}"
76 | break;
77 | fi
78 | done
79 | fi
80 | }
81 |
82 | upgrade_pxruntimes() {
83 | # upgrade pxruntime instaces to the same version
84 | instance_count=`oc -n $namespace get pxruntime 2> /dev/null | wc -l | tr -d ' '`
85 | if [ $instance_count -gt 0 ]; then
86 | echo "Updating PXRuntime instances in $namespace to version ${version}"
87 | oc -n ${namespace} get pxruntime 2> /dev/null | awk 'NR>1 { print $1 }' | xargs -I % oc -n ${namespace} patch pxruntime % --type=merge -p "{\"spec\":{\"version\": \"${version}\"}}"
88 | fi
89 | }
90 |
91 | create_pxruntime_crd() {
92 | cat < [--version ]"
428 | echo "--namespace: the management namespace to deploy the DataStage operator into"
429 | echo "--version: the version of the operator to deploy. The following versions are supported: ${supportedVersions}"
430 | echo ""
431 | exit 3
432 | }
433 |
434 | while [ $# -gt 0 ]
435 | do
436 | case $1 in
437 | --namespace|-n)
438 | shift
439 | namespace="${1}"
440 | ;;
441 | --digest)
442 | shift
443 | OPERATOR_DIGEST="${1}"
444 | ;;
445 | --version)
446 | shift
447 | version="${1}"
448 | skipVersionCheck="true"
449 | ;;
450 | *)
451 | echo "Unknown parameter '${1}'"
452 | handle_badusage
453 | ;;
454 | esac
455 | if [ $# -gt 0 ]
456 | then
457 | shift
458 | fi
459 | done
460 |
461 | if [[ -z $namespace ]]; then
462 | handle_badusage
463 | fi
464 |
465 | verify_args
466 | check_version
467 | create_pxruntime_crd
468 | create_service_account
469 | create_role
470 | create_role_binding
471 | create_operator_deployment
472 | create_cr_role
473 | create_cr_role_binging
474 | upgrade_pxruntimes
475 |
--------------------------------------------------------------------------------
/RemoteEngine/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/RemoteEngine/README.md:
--------------------------------------------------------------------------------
1 | # DataStage Remote Engine
2 |
3 | **Remote Engine** for DataStage on IBM Cloud allows you to setup an execution engine to run your workloads at a cloud location of your choice. Currently this technology is in a closed Beta.
4 |
5 | The setup can be done locally on Linux or Mac machines as a Docker instance, or as a service on a Kubernetes cluster. See the README.md in the respective folder for setup.
6 |
--------------------------------------------------------------------------------
/RemoteEngine/docker/README.md:
--------------------------------------------------------------------------------
1 | # DataStage Remote Engine using Docker
2 |
3 | ## License
4 | ### Cloud
5 | [IBM DataStage as a Service Anywhere](https://www.ibm.com/support/customer/csol/terms/?ref=i126-9243-06-11-2023-zz-en)
6 | ### CP4D
7 | [IBM DataStage Enterprise and IBM DataStage Enterprise Plus](https://www14.software.ibm.com/cgi-bin/weblap/lap.pl?li_formnum=L-QFYS-RTJPJH)
8 |
9 | ## Limitations
10 | 1. The script might not work on Apple `M` processors as it runs x86 software via an emulation layer.
11 | 2. The script is not tested on Windows OS.
12 |
13 | ## Pre-Requisites
14 | 1. If you are specifically deploying a remote engine for IBM Cloud, you must have DataStage provisioned on IBM Cloud with an `Anywhere` plan. You can see different plans with details on https://cloud.ibm.com/catalog/services/datastage.
15 | 1. Software that must be installed on the system.
16 | 1. `docker` or `podman`
17 | 1. `jq`
18 | 1. `git` (optional)
19 | 1. You must have atleast 50GB of free space in `/var` in order to deploy the engine container. 200GB of free space is recommended.
20 | 1. You must have at least 2 cores and 4 GB memory, recommended is 4 cores and 8 GB memory, or more.
21 | 1. Recommended OS: Red Hat Enterprise Linux (RHEL 8.8, 8.10, 9.2 and 9.4), Ubuntu (20.04, 22.04, 24.04).
22 | 1. Ensure that the virtual machine allows outbound traffic to the following URLs:
23 | 1. icr.io
24 | 1. If you are specifically deploying a remote engine for IBM Cloud:
25 | 1. iam.cloud.ibm.com
26 | 1. dataplatform.cloud.ibm.com and api.dataplatform.cloud.ibm.com - if using Dallas datacenter
27 | 1. eu-de.dataplatform.cloud.ibm.com and api.dataplatform.cloud.ibm.com - if using the Frankfurt data center
28 | 1. au-syd.dai.cloud.ibm.com and api.au-syd.dai.cloud.ibm.com - if using the Sydney data center
29 | 1. ca-tor.dai.cloud.ibm.com and api.ca-tor.dai.cloud.ibm.com - if using the Toronto data center
30 | 1. cloud-object-storage.appdomain.cloud (the url could have a prefixed region eg. .s3.cloud-object-store.appdomain.cloud), so recommendation is to allow `*.cloud-object-storage.appdomain.cloud` to accomodate such variations.
31 |
32 |
33 | ## Requirements
34 | 1. Clone this repo on the Remote Engine server: `git clone https://github.com/IBM/DataStage.git`.
35 | 1. If you already have this repo cloned, go to the root directory and run `git pull` to get the latest changes.
36 | 1. If you are specifically deploying a remote engine for IBM Cloud, specify your IBM Cloud API Key. The API key is required for registering the remote engine to your Cloud Pak for Data project on IBM Cloud. To generate a new API key (https://cloud.ibm.com/docs/account?topic=account-userapikey&interface=ui), open https://cloud.ibm.com in your browser.
37 | 1. Click Manage > Access (IAM). Then, on the left side menu, select "API Keys" to open the "API Keys" page (URL: https://cloud.ibm.com/iam/apikeys).
38 | 2. Ensure that My IBM Cloud API keys is selected in the View list.
39 | 3. Click Create an IBM Cloud API key, and then specify a name and description.
40 | * Note: Once you close this pop-up window, you will not be able to access the value of this API Key again.
41 | 1. Your Encryption Key and IV. These can be generated by executing the command
42 | ```bash
43 | openssl enc -aes-256-cbc -k secret -P -md sha1 -pbkdf2
44 | ```
45 | Sample output:
46 | ```
47 | $ openssl enc -aes-256-cbc -k secret -P -md sha1 -pbkdf2
48 | salt=5334474DF6ECB3CC
49 | key=2A928E95489FCC163D46872040B9B24DC44E28A734B7681C8A3F0168F23E2A13
50 | iv =45990395FEB2B39C34B51D998E0E2E1B
51 | ```
52 | From this output, the `key` and `iv` are used as the Encryption Key and initialization vector (IV) respectively.
53 | 1. Your Project ID(s). This is the comma separated list of IDs of the projects on Cloud Pak for Data project that you want to use with this Remote Engine instance whether it is for IBM Cloud or CP4D. (If you are using the DataStage in Frankfurt data center then please use https://eu-de.dataplatform.cloud.ibm.com. If you are using the DataStage in Sydney data center then please use https://au-syd.dai.cloud.ibm.com. If you are using the DataStage in Toronto data center then please use https://ca-tor.dai.cloud.ibm.com.). You can retrieve this value by opening the project that you want to use with this Remote Engine and selecting the Manage tab > General to view the Project ID.
54 | 1. If you are specifically deploying a remote engine for IBM Cloud, the IBM Cloud Container Registry API Key. This API key will be used to download the images needed to run Remote Engine for IBM Cloud. Currently, clients need to request this API Key once they have provisioned a DataStage-aaS Plan and it needs to be requested via IBM Cloud Support: https://cloud.ibm.com/unifiedsupport.
55 | 1. If you are specifically deploying a remote engine for CP4D, the IBM Entitlement API key. This API key will be used to download the images needed to run Remote Engine for CP4D. Please follow https://www.ibm.com/docs/en/cloud-paks/cp-data/5.0.x?topic=information-obtaining-your-entitlement-api-key for instructions on how to obtain your IBM Entitlement API Key.
56 |
57 | ## Usage
58 |
59 | ### 1a. Start an engine on the Remote Engine server for IBM Cloud
60 | The `dsengine.sh` script can be invoked from the `docker` folder of this project. Note that the name in the below command can be changed from `my_remote_engine_01` to your preferred name.
61 | ```bash
62 | # create/start a local remote engine instance for IBM Cloud
63 | ./dsengine.sh start -n 'my_remote_engine_01' \
64 | -a "$IBMCLOUD_APIKEY" \
65 | -e "$ENCRYPTION_KEY" \
66 | -i "$ENCRYPTION_IV" \
67 | -p "$IBMCLOUD_CONTAINER_REGISTRY_APIKEY" \
68 | --project-id "$PROJECT_ID1,$PROJECT_ID2,$PROJECT_ID3,..."
69 |
70 | ```
71 | Once the script execution has completed, this engine needs to be selected in the project settings by going to the project, navigating to `Manage` > `DataStage` and selecting the appropriate engine under the `Settings` tab > `Remote` environments.
72 | ### 1b. Start an engine on the Remote Engine server for Cloud Pak for Data Instances
73 | The `dsengine.sh` script can be invoked from the `docker` folder of this project. Note that the name in the below command can be changed from `my_remote_engine_01` to your preferred name.
74 | ```bash
75 | # create/start a local remote engine instance for CP4D instance
76 | ./dsengine.sh start -n 'my_remote_engine_01' \
77 | -e "$ENCRYPTION_KEY" \
78 | -i "$ENCRYPTION_IV" \
79 | -p "$IBM_ENTITLED_REGISTRY_APIKEY" \
80 | --project-id "$PROJECT_ID1,$PROJECT_ID2,$PROJECT_ID3,..." \
81 | --home "cp4d" \
82 | --zen-url "CP4D_ZEN_URL" \
83 | --cp4d-user "CP4D_USERNAME" \
84 | --cp4d-apikey "CP4D_API_KEY"
85 |
86 | ```
87 | Once the script execution has completed, this engine needs to be selected in the project settings by going to the project, navigating to `Manage` > `DataStage` and selecting the appropriate engine under the `Settings` tab > `Remote` environments.
88 |
89 | #### Optional start flags
90 |
91 | While starting a remote engine, following optional flags can be used in addition to the ones shown above. These can be seen via the help flag on the start subcommand: `./dsengine.sh start --help`.
92 |
93 | 1. `--memory `: Sets the maximum amount of memory the engine can use. The value takes a positive integer, followed by a suffix of m/M, g/G, to indicate megabytes or gigabytes. Default is `4G`.
94 | 1. `--cpus `: Sets the maximum amount of cpu resources the engine can use. The value takes a positive number. Default is `2` cores.
95 | 1. `--pids-limit `: Set the PID limit of the container (defaults to -1 for unlimited pids for the container)
96 | 1. `--volume-dir `: Sets the directory to be used as the volume directory for persistent storage. Default location is `/tmp/docker/volumes`. The volume directory will be updated with the following top level file structure:
97 |
98 | ```
99 | /scratch
100 | /ds-storage
101 | /_runtime
102 | /_runtime/px-storage
103 | ```
104 | Once the remote engine is up and running, additional files and folders will be created inside the above folders as needed by the engine.
105 | If you are planning to create multiple engines on the same machine, then they should use different volume directories.
106 | 1. `--home `: Sets the target IBM Cloud enviroment to either `ypprod` (Dallas data center - default), `frprod` (Frankfurt data center), `sydprod` (Sydney data center), or `torprod` (Toronto data center). The project associated with this engine instance must be in same data center.
107 | 1. `--select-version`: Set to true if you want to choose a specific version of remote engine. By default, this flag is set to false and the latest version is used.
108 | 1. `--security-opt `: Specify the security-opt to be used to run the container.
109 | 1. `--cap-drop `: Specify the cap-drop to be used to run the container.
110 | 1. `--set-user `: Specify the username to be used to run the container. If not set, the current user is used.
111 | 1. `--set-group `: Specify the group to be used to run the container.
112 | 1. `--additional-users `: Comma separated list of ids (IAM IDs for cloud, check https://cloud.ibm.com/docs/account?topic=account-identity-overview for details; uids/usernames for cp4d) that can also control remote engine besides the owner.
113 | 1. `--mount-dir ""`: Specify folder you want to mount on the container. This flag can be specified multiple times.
114 | * Note: This flag can be used to mount a scratch directory using `--mount-dir ":/opt/ibm/PXService/Server/scratch"`. This will override the default scratch directory that is either created in `/tmp` or in the directory specified for `--volume-dir`.
115 | 1. `--relabel-selinux-mounts`: [true]. Appends the :z option to SELinux volume bind mounts.
116 | 1. `--host-network`: Set this to the port number the docker remote engine should use on the host VM to connect with the network. Enabling this will set the docker network to host.
117 | 1. `--add-host :`: Add a : entry to the /etc/hosts file of the container. This flag can be specified multiple times.
118 | 1. `--proxy http://:@:`: Specify a proxy URL. The username and password can be skipped based on how the proxy is configured.
119 | 1. `--proxy-cacert `: Specify the location of the custom CA store for the specified proxy - if it is using a self signed certificate.
120 | 1. `--krb5-conf `: Specify the location of the Kerberos config file if using Kerberos Authentication.
121 | 1. `--krb5-conf-dir `: Specify the directory of multiple Kerberos config files if using Kerberos Authentication. (Only supported with --krb5-conf, the krb5.conf file needs to include "includedir /etc/krb5-config-files/krb5-config-dir" line).
122 | 1. `--import-db2z-license `: Specify the location of the DB2Z license to import.
123 | 1. `--force-renew`: Set to true if you want to remove the existing remote engine container. By default, this flag is set to false and if a stopped existing container is found, it is restarted or if a running existing container is found, the script is aborted.
124 | 1. `--zen-url`: CP4D zen url of the cluster (required if --home is used with "cp4d").
125 | 1. `--cp4d-user`: CP4D username used to log into the cluster (required if --home is used with "cp4d").
126 | 1. `--cp4d-apikey`: CP4D apikey used to authenticate with the cluster. Go to "Profile and settings" when logged in to get your api key for the connection. (required if --home is used with "cp4d").
127 | 1. `--registry`: Custom container registry to pull images from. Must also set -u and -p options to login to the registry as well as either --digest or --image-tag for IBM Cloud.
128 | 1. `-u | --user`: User to login to a custom container registry (required if --registry is set).
129 | 1. `--digest`: Digest to pull the ds-px-runtime image from the registry (required if --registry is set and --image-tag is not set).
130 | 1. `--image-tag`: Image tag to pull the ds-px-runtime image from the registry (required if --registry is set and --digest is not set).
131 | 1. `--skip-docker-login`: [true | false]. Skips Docker login to container registry if that step is not needed.
132 | 1. `--env-vars`: Semi-colon separated list of key=value pairs of environment variables to set (eg. key1=value1;key2=value2;key3=value3;...). Whitespaces are ignored.
133 | * Remote Engine specific environment variables:
134 | * REMOTE_ENGINE_BATCH_SIZE - Set to an integer representing the maximum number of jobs that remote engine will pull at one time. Default value is 5.
135 | * APT_USE_REMOTE_APP - Set to "force" to make remote engine avoid forking section leader processes. Can avoid inheriting unwanted open resources from the conductor. Default is unset.
136 | * ENABLE_DS_METRICS - Set to "true" to have the remote engine send metrics to a configured DataStage metrics repository. See the [IBM Cloud](https://dataplatform.cloud.ibm.com/docs/content/dstage/dsnav/topics/ds_metrics.html?context=cpdaas&audience=wdp) or [Cloud Pak for Data](https://www.ibm.com/docs/en/software-hub/5.1.x?topic=administering-storing-persisting-metrics) documentation for more information.
137 |
138 |
139 | ### 2. Update an engine
140 |
141 | Update the remote engine instance to a new version. The update command gathers information from the existing docker container, then stops the existing container, removes it and spins up a new container with the same name. Hence a container must be running prior to running the the update command. You can simply re-run the start command that you started the container with.
142 |
143 | ```bash
144 | # this will update the container with name 'my_remote_engine_01'
145 | ./dsengine.sh update -n "my_remote_engine_01" \
146 | -p "$IBMCLOUD_CONTAINER_REGISTRY_APIKEY"
147 | ```
148 |
149 | #### Optional update flags
150 |
151 | While updating a remote engine, following optional flags can be used in addition to the ones shown above. These can be seen via the help flag on the start subcommand: `./dsengine.sh update --help`.
152 |
153 | 1. `--select-version`: Set to true if you want to choose a specific version of remote engine. By default, this flag is set to false and the latest version is used.
154 | 1. `--security-opt `: Specify the security-opt to be used to run the container.
155 | 1. `--cap-drop `: Specify the cap-drop to be used to run the container.
156 | 1. `--additional-users `: Comma separated list of ids (IAM IDs for cloud, check https://cloud.ibm.com/docs/account?topic=account-identity-overview for details; uids/usernames for cp4d) that can also control remote engine besides the owner.
157 | 1. `--proxy http://:@:`: Specify a proxy URL. The username and password can be skipped based on how the proxy is configured.
158 | 1. `--proxy-cacert `: Specify the location of the custom CA store for the specified proxy - if it is using a self signed certificate.
159 | 1. `--krb5-conf `: Specify the location of the Kerberos config file if using Kerberos Authentication.
160 | 1. `--krb5-conf-dir `: Specify the directory of multiple Kerberos config files if using Kerberos Authentication. (Only supported with --krb5-conf, the krb5.conf file needs to include "includedir /etc/krb5-config-files/krb5-config-dir" line).
161 | 1. `--import-db2z-license `: Specify the location of the DB2Z license to import.
162 | 1. `--registry`: Custom container registry to pull images from. Must also set -u and -p options to login to the registry as well as either --digest or --image-tag for IBM Cloud.
163 | 1. `-u | --user`: User to login to a custom container registry (required if --registry is set).
164 | 1. `--digest`: Digest to pull the ds-px-runtime image from the registry (required if --registry is set and --image-tag is not set).
165 | 1. `--image-tag`: Image tag to pull the ds-px-runtime image from the registry (required if --registry is set and --digest is not set).
166 | 1. `--skip-docker-login`: [true | false]. Skips Docker login to container registry if that step is not needed.
167 | 1. `--env-vars`: Semi-colon separated list of key=value pairs of environment variables to set (eg. key1=value1;key2=value2;key3=value3;...). Whitespaces are ignored.
168 | * Remote Engine specific environment variables:
169 | * REMOTE_ENGINE_BATCH_SIZE - Set to an integer representing the maximum number of jobs that remote engine will pull at one time. Default value is 5.
170 | * APT_USE_REMOTE_APP - Set to "force" to make remote engine avoid forking section leader processes. Can avoid inheriting unwanted open resources from the conductor. Default is unset.
171 | * ENABLE_DS_METRICS - Set to "true" to have the remote engine send metrics to a configured DataStage metrics repository. See the [IBM Cloud](https://dataplatform.cloud.ibm.com/docs/content/dstage/dsnav/topics/ds_metrics.html?context=cpdaas&audience=wdp) or [Cloud Pak for Data](https://www.ibm.com/docs/en/software-hub/5.1.x?topic=administering-storing-persisting-metrics) documentation for more information.
172 |
173 |
174 | ### 3. Stop an engine
175 |
176 | Stop the remote engine container.
177 | ```bash
178 | # stop a local remote engine instance with name 'my_remote_engine_01'
179 | ./dsengine.sh stop -n 'my_remote_engine_01'
180 |
181 | ```
182 | Note that if the `./dsengine.sh start` is used when a container is stopped, the script will simply start the stopped container.
183 |
184 | ### 4a. Cleanup/Uninstall a remote engine for IBM Cloud
185 | This is NOT needed use this if you want to update the engine. This is only needed if you want to completely remove the engine container, delete the volume directories and deregister the remote engine from the associated project.
186 | ```bash
187 | # cleanup a remote engine instance with name 'my_remote_engine_01'
188 | ./dsengine.sh cleanup -n 'my_remote_engine_01' \
189 | -a "$IBMCLOUD_APIKEY" \
190 | --project-id "$PROJECT_ID"
191 | ```
192 | ### 4b. Cleanup/Uninstall a remote engine for Cloud Pak for Data Instances
193 | This is NOT needed use this if you want to update the engine. This is only needed if you want to completely remove the engine container, delete the volume directories and deregister the remote engine from the associated project.
194 | ```bash
195 | # cleanup a remote engine instance with name 'my_remote_engine_01'
196 | ./dsengine.sh cleanup -n 'my_remote_engine_01' \
197 | --project-id "$PROJECT_ID" \
198 | --home "cp4d"
199 | ```
200 |
201 | ### Logs
202 | #### Container-level logs
203 | * Location: Stored under /var/lib/containers/storage/overlay/... on the host.
204 | * Purpose: Captures minimal container-level output (stdout/stderr) and can be accessed using podman logs . (this is often empty or non-critical)
205 | #### Primary Remote Engine logs
206 | * Initial Location: Typically written to /logs directory on the container and bind-mounted to /var/lib/containers/storage/overlay/... on the host (by default).
207 | * Archived Location: Older logs are rotated and archived as ZIP files under /ds-storage/service_log_archive in the container, which is bind-mounted to /ds-storage/service_log_archive on the host.
208 | * Purpose:
209 | * trace.log – Active detailed trace log of Remote Engine runtime (job interactions, service calls).
210 | * messages.log – Higher-level system logs (job polling activity, engine heartbeats, etc.).
211 | #### Workload Management (WLM) logs
212 | * Location: Stored in /px-storage/PXRuntime/WLM/logs/ inside the container, bind-mounted to /px-storage/PXRuntime/WLM/logs on the host.
213 | * Purpose: Captures CPU and memory usage metrics, job scheduling events, and system resource distribution among running DataStage pods.
214 | #### Extracting Service Logs as Archive to Local Machine
215 | ```bash
216 | docker cp {container id}:/logs - > remoteenginelogs.tar
217 | # OR
218 | podman cp {container id}:/logs - > remoteenginelogs.tar
219 | ```
220 |
221 | ### Troubleshooting
222 |
223 | 1. Flow execution fails with incorrect host or IP Address in the log
224 |
225 | If the script finishes succesfully, and you are able to see the engine in your project but the run still fails, you can check if container hosts file contains the host IP address. If so, you can remove the host IP address mapping so that the container uses 127.0.0.1 to resolve the hostname. This issue has been seen primarily when using podman. You can edit this file using the below steps
226 | 1. Find the running container name or id using
227 | ```bash
228 | docker ps
229 | # OR
230 | podman ps
231 | ```
232 | 1. Exec into the container:
233 | ```bash
234 | docker exec --user root -it bash
235 | # OR
236 | podman exec --user root -it bash
237 | ```
238 | 1. Edit the hosts file inside the container
239 | ```bash
240 | nano /etc/hosts
241 | ```
242 | 1. Remove the line that contains the IP address of the host and the host name
243 | 1. Press `ctrl + x` to save and press `y` to exit from nano.
244 | 1. Retry the flow.
245 |
246 | Note that this fix will need to be re-applied whenever the current container is removed, eg. updating to a new image.
247 |
248 | 1. Making sure the URLs are allowlisted
249 |
250 | For URLs mentioned in the [pre-requisites](https://github.com/IBM/DataStage/blob/main/RemoteEngine/docker/README.md#pre-requisites) section above, you can use ping from the host vm to make sure the URLs are accessible. Eg.
251 | ```
252 | $ ping api.dataplatform.cloud.ibm.com
253 | PING api.dataplatform.cloud.ibm.com (172.66.129.176) 56(84) bytes of data.
254 | 64 bytes from 172.66.129.176 (172.66.129.176): icmp_seq=1 ttl=53 time=6.66 ms
255 | 64 bytes from 172.66.129.176 (172.66.129.176): icmp_seq=2 ttl=53 time=6.90 ms
256 | 64 bytes from 172.66.129.176 (172.66.129.176): icmp_seq=3 ttl=53 time=6.82 ms
257 | 64 bytes from 172.66.129.176 (172.66.129.176): icmp_seq=4 ttl=53 time=6.79 ms
258 | 64 bytes from 172.66.129.176 (172.66.129.176): icmp_seq=5 ttl=53 time=6.87 ms
259 | ^C
260 | --- api.dataplatform.cloud.ibm.com ping statistics ---
261 | 5 packets transmitted, 5 received, 0% packet loss, time 4007ms
262 | rtt min/avg/max/mdev = 6.662/6.806/6.898/0.081 ms
263 | ```
264 |
265 | 1. If the API Key Changes in DataStage aaS with Anywhere:
266 |
267 | If your API Key changes in DataStage aaS with Anywhere, you have to recreate the remote engine with the new API Key. In order to keep your remote engine with the same engine ID:
268 | 1. Stop the remote engine container
269 | 1. Remove/delete the remote engine container
270 | 1. Run `dsengine.sh start` again with the new API key
271 |
272 | 1. If you are getting "password missing" errors when running flows after stopping, removing, then restarting remote engine containers:
273 |
274 | We strongly encourage you keep the same remote engine and just run `./dsengine.sh update` whenever you need to update the remote engine or to run `./dsengine.sh cleanup` to completely remove the remote engine and run `./dsengine.sh start` with a new remote engine name (-n), a new encryption key (-e), and a new initialization vector (-i) to start a completely new remote engine instance. Stopping a remote engine, removing/deleting it, and then running `./dsengine.sh start` again with the same remote engine name (-n), but with a new encryption key (-e) and a new initialization vector (-i), you are essentially creating a new remote engine and may cause unexpected behaviors with old remote engine environments with the same name. If for some reason you absolutely need to stop, remove, and start a container with the same name, please reuse the same encryption key (-e) and initialization vector (-i) as the old remote engine to prevent "password missing" errors when running flows.
275 |
276 | 1. Insufficient storage for the Remote Engine container image
277 | ```bash
278 | Error: writing blob: adding layer with blob "sha256:a8698704d0df7dd5be511fe1d9ed25f4801aa235b313cce295605a068bbf26c0"/""/"sha256:ac8a65b88f9323576d8c7978f45a3f1ea070f0d09a6428447fc8fd87eb5af13e": unpacking failed (error: exit status 1; output: open /usr/share/zoneinfo/zone.tab: no space left on device)
279 | docker run return code: 125.
280 | ```
281 | Docker/Podman container storage layers, metadata, and volumes are stored in `/var` by default. It is recommended to have at least **50 GB** of free space in `/var` for installing the Remote Engine container image.
282 |
283 | Check the available storage:
284 | ```bash
285 | df -h /var
286 | ```
287 | Example output:
288 | ```bash
289 | Filesystem Size Used Avail Use% Mounted on
290 | /dev/xvda4 8.8G 42.4G 52.8G 81% /
291 | ```
292 | #### Option 1: Clean up container storage
293 | To help cleanup used disk space within `/var`, run:
294 | ```bash
295 | docker system prune
296 | # OR
297 | podman system prune
298 | ```
299 | > [!IMPORTANT] This removes all unused images, including manually pulled ones.
300 |
301 | > [!TIP] Caution your client on the purpose of this command before executing.
302 | #### Option 2: Modify storage.conf
303 | Storage.conf is the configuration file that controls how container runtimes like Podman handle storage — particularly how and where they store images, layers, containers, and temporary files.
304 | ###### Part 1 - Mounting additional volumes
305 | If the customer has an additional volume that has been mounted already, then skip these steps.
306 | 1. Check for the volume that needs to be mounted:
307 | ```bash
308 | lsblk
309 | ```
310 | Example output:
311 | ```
312 | NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS
313 | xvda 202:0 0 10G 0 disk
314 | ├─xvda1 202:1 0 1M 0 part
315 | ├─xvda2 202:2 0 200M 0 part /boot/efi
316 | ├─xvda3 202:3 0 500M 0 part /boot
317 | └─xvda4 202:4 0 9.3G 0 part /
318 | xvdb 202:16 0 20G 0 disk
319 | ```
320 | In this example, xvdb has 20GB needs to be mounted.
321 | 1. Create a XFS file system to enable the mount:
322 | ```bash
323 | sudo mkfs.xfs /dev/xvdb
324 | ```
325 | 1. Make the new directory for container storage:
326 | ```bash
327 | sudo mkdir -p /mnt/data
328 | ```
329 | 1. Mount the volume to the newly created root directory:
330 | ```bash
331 | sudo mount /dev/xvdb /mnt/data
332 | ```
333 | 1. Verify the changes:
334 | ```bash
335 | df -h
336 | ```
337 | Expected Output:
338 | ```
339 | Filesystem Size Used Avail Use% Mounted on
340 | ...
341 | /dev/xvdb 20G 175M 20G 1% /mnt/data
342 | ```
343 | 1. Persist the mount settings after reboot by adding it to the `/etc/fstab` file:
344 | ```
345 | /dev/xvdb /mnt/data xfs defaults 0 0
346 | ```
347 | ###### Part 2 - Creating the new directories for container storage
348 | Create the directory which will be used for container storage:
349 |
350 | If running Podman as rootful:
351 | ```bash
352 | sudo mkdir -p /mnt/data/containers/storage_root
353 | ```
354 | If running Podman as rootless:
355 | ```bash
356 | sudo mkdir -p /mnt/data/containers/storage_rootless
357 | ```
358 | > [!NOTE] If you're unsure whether the client is running rootful or rootless Podman, you can create both directories, just in case.
359 |
360 | When changing the location for rootful user's container storage on an SELINUX-enabled system:
361 | 1. Verify if the customer has SELinux enabled:
362 | ```bash
363 | getenforce
364 | ```
365 | Output will be:
366 | - **Enforcing** → SELinux is active and enforcing policies
367 | - **Permissive** → SELinux logs but doesn't enforce
368 | - **Disabled** → SELinux is off
369 | 1. If Enforcing, run these commands to translate the SELinux policies:
370 | ```bash
371 | sudo semanage fcontext -a -e /var/lib/containers/storage /mnt/data/containers/storage_root
372 | sudo restorecon -R -v /mnt/data/containers/storage_root
373 | ```
374 | 1. For rootless containers, ensure ownership is granted to the rootless user:
375 | ```bash
376 | sudo chown -R amin:amin /mnt/data/containers/storage_rootless
377 | ```
378 | ###### Part 3 - Modifying storage.conf
379 | 1. Edit the storage.conf file:
380 | ```bash
381 | sudo nano /etc/containers/storage.conf
382 | ```
383 | 1. For rootful users, change the graphroot setting:
384 | ```
385 | graphroot = "/mnt/data/containers/storage_root"
386 | ```
387 | 1. For rootless users, uncomment and modify the rootless_storage_path:
388 |
389 | Before:
390 | ```
391 | #rootless_storage_path = "$HOME/.local/share/containers/storage"
392 | ```
393 | After:
394 | ```
395 | rootless_storage_path = "/mnt/data/containers/storage_rootless"
396 | ```
397 | 1. Verify the changes:
398 | ```bash
399 | podman info
400 | ```
401 | Expected output:
402 | ```
403 | store:
404 | ....
405 | graphRoot: /mnt/data/containers/storage_rootless
406 | ....
407 | runRoot: /run/user/1000/containers
408 | transientStore: false
409 | volumePath: /mnt/data/containers/storage_rootless/volumes
410 | ```
411 | ##### Option 3: Symlink container storage to larger drive
412 | 1. If necessary, add additional storage to the machine. Use the lsblk command to find the drive:
413 | ```bash
414 | lsblk
415 | ```
416 | 1. Create a XFS file system:
417 | ```bash
418 | sudo mkfs.xfs /dev/vdc
419 | ```
420 | 1. Make a directory for the symbolic link:
421 | ```bash
422 | sudo mkdir -p /mnt/data
423 | ```
424 | 1. Mount the drive:
425 | ```bash
426 | sudo mount /dev/vdc /mnt/data
427 | df -h
428 | ```
429 | 1. Change the owner of your new directory:
430 | ```bash
431 | sudo chown itzuser:itzuser /mnt/data
432 | ```
433 | 1. Add to fstab for persistence:
434 | ```bash
435 | echo "/dev/vdc /mnt/data xfs defaults 0 0" | sudo tee -a /etc/fstab
436 | ```
437 | 1. Make a directory that we will symlink to:
438 | ```bash
439 | mkdir -p /mnt/data/containers
440 | ```
441 | 1. Copy all data from existing containers directory:
442 | ```bash
443 | rsync -avP /var/lib/containers/ /mnt/data/containers/
444 | ```
445 | 1. Backup the original directory:
446 | ```bash
447 | mv /var/lib/containers /var/lib/containers.old
448 | ```
449 | 1. Create the symbolic link:
450 | ```bash
451 | sudo ln -s /mnt/data/containers /var/lib/containers
452 | ```
453 | 1. Verify that it worked:
454 | ```bash
455 | cd /var/lib
456 | ls -ltr
457 | ```
458 |
459 | 1. Insufficient subgids/subuids for user namespace
460 | ```bash
461 | Error: writing blob: adding layer with blob "sha256:a9089747d5ad599b773f9bfca2a647fe03b75db371c637b5b650c96283e9a36e": processing tar file(potentially insufficient UIDs or GIDs available in user namespace (requested 1000321001:1000321001 for /home/dsuser): Check /etc/subuid and /etc/subgid if configured locally and run "podman system migrate": lchown /home/dsuser: invalid argument): exit status 1
462 | ```
463 | Starting the Remote Engine with rootless Podman requires the user running it to have a specific range of UIDs (1000321001) listed in the files `/etc/subuid` and `/etc/subgid`.
464 | 1. Install `shadow-utils`:
465 | ```bash
466 | sudo yum -y install shadow-utils
467 | ```
468 | 1. Edit `/etc/subuid`:
469 | ```bash
470 | sudo vi /etc/subuid
471 | ```
472 | Add:
473 | ```text
474 | :100000:1001321001
475 | ```
476 | 1. Verify the change:
477 | ```bash
478 | cat /etc/subuid
479 | ```
480 | 1. Edit `/etc/subgid`:
481 | ```bash
482 | sudo vi /etc/subgid
483 | ```
484 | Add:
485 | ```text
486 | :100000:1001321001
487 | ```
488 | 1. Verify:
489 | ```bash
490 | cat /etc/subgid
491 | ```
492 | 1. Apply changes:
493 | ```bash
494 | podman system migrate
495 | ```
496 | > [!TIP] Ensure the UID/GID ranges are properly configured and match across both files. Replace with the name of the user.
497 |
498 | 1. Insufficient 'cpu' and 'cpuset' permissions
499 | ```bash
500 | Error: OCI runtime error: crun: the requested cgroup controller `cpu` is not available
501 | ```
502 | On some systemd-based systems, non-root users do not have resource limit delegation permissions. This causes setting resource limits in rootless Podman to fail (for example, configuring the Remote Engine to have 8 cores).
503 | 1. Log into the affected user.
504 | 1. Verify if resource limit delegation is enabled:
505 | ```bash
506 | cat "/sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/cgroup.controllers"
507 | ```
508 | Example Output:
509 | ```
510 | memory pids
511 | ```
512 | 1. If cpu and cpuset are not listed, we need to add them to the .conf file:
513 | ```bash
514 | sudo mkdir -p /etc/systemd/system/user@.service.d/
515 | sudo nano /etc/systemd/system/user@.service.d/delegate.conf
516 | ```
517 | 1. Modify the file:
518 | ```ini
519 | [Service]
520 | Delegate=memory pids cpu cpuset
521 | ```
522 | 1. Persist the changes:
523 | ```bash
524 | sudo systemctl daemon-reexec
525 | sudo systemctl daemon-reload
526 | ```
527 | 1. Log out of the session and log back in to test the changes:
528 | ```bash
529 | cat "/sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/cgroup.controllers"
530 | ```
531 | Expected Output:
532 | ```
533 | cpuset cpu memory pids
534 | ```
535 |
--------------------------------------------------------------------------------
/RemoteEngine/kubernetes/README.md:
--------------------------------------------------------------------------------
1 | # DataStage Remote Engine on Kubernetes
2 |
3 | ## License
4 | ### Cloud
5 | [IBM DataStage as a Service Anywhere](https://www.ibm.com/support/customer/csol/terms/?ref=i126-9243-06-11-2023-zz-en)
6 | ### CP4D
7 | [IBM DataStage Enterprise and IBM DataStage Enterprise Plus](https://www14.software.ibm.com/cgi-bin/weblap/lap.pl?li_formnum=L-QFYS-RTJPJH)
8 |
9 | ## Requirements
10 | DataStage Remote Engine supports deployment on the following platforms:
11 | * OpenShift 4.12 and above
12 | * Details on setting up an OpenShift cluster: https://docs.openshift.com/container-platform/4.12/welcome/index.html
13 | * IBM Cloud Kubernetes Service (IKS)
14 | * Details on setting up an IKS cluster: https://cloud.ibm.com/docs/containers?topic=containers-getting-started&interface=ui
15 | * Setting up file Storage: https://cloud.ibm.com/docs/containers?topic=containers-file_storage
16 | * Amazon Elastic Kubernetes Service (EKS)
17 | * Details on setting up an EKS cluster: https://docs.aws.amazon.com/eks/latest/userguide/create-cluster.html
18 | * Setting up and Elastic file system: https://www.ibm.com/docs/en/cloud-paks/cp-data/4.6.x?topic=storage-setting-up-amazon-elastic-file-system (see details below)
19 |
20 | A file storage class with support for read-write-many(RWX) is required.
21 | ## Pre-Requisites
22 | The following software are required to be installed on the client from where you will be executing this script:
23 |
24 | 1. `kubectl` or `oc`
25 | 2. `jq`
26 |
27 | ## Sizing
28 | The remote engine supports three default sizes: small, medium, and large.
29 |
30 | ### Small
31 | - 2 compute pods: 3 vCPU and 12 GB RAM
32 | - 1 conductor pod: 1 vCPU and 4 GB RAM
33 |
34 | ### Medium
35 | - 2 compute pods: 6 vCPU and 24 GB RAM
36 | - 1 conductor pod: 2 vCPU and 4 GB RAM
37 |
38 | ### Large
39 | - 3 compute pods: 8 vCPU and 32 GB RAM
40 | - 1 conductor pod: 4 vCPU and 4 GB RAM
41 |
42 | ## Needed API keys
43 | 1. If you are specifically deploying a remote engine for IBM Cloud, an IBM Cloud API key is required for registering the remote engine to your Cloud Pak for Data project on IBM Cloud.
44 | 1. Click Manage > Access (IAM) > API keys to open the “API keys” page (URL: https://cloud.ibm.com/iam/apikeys).
45 | 2. Ensure that My IBM Cloud API keys is selected in the View list.
46 | 3. Click Create an IBM Cloud API key, and then specify a name and description
47 | 2. If you are specifically deploying a remote engine for IBM Cloud, the IBM Cloud Container Registry APIKey. This apikey will be used to download the images needed to run Remote Engine for IBM Cloud. Currently there is no way to generate this, so it needs to be requested via IBM Cloud Support: https://cloud.ibm.com/unifiedsupport
48 | 3. If you are specifically deploying a remote engine for CP4D, the IBM Entitlement APIKey. This apikey will be used to download the images needed to run Remote Engine for CP4D. Please follow https://www.ibm.com/docs/en/cloud-paks/cp-data/5.0.x?topic=information-obtaining-your-entitlement-api-key for instructions on how to obtain your IBM Entitlement API Key.
49 |
50 | ## Usage
51 | To deploy the DataStage operator on cluster without global pull secret configured for the container registry, the pull secret needs to be created. You need an active connection to the cluster with either kubectl or oc cli available.
52 |
53 | ```
54 | # create pull secret for container registry
55 | ./launch.sh create-pull-secret --namespace --username --password ${api-key} [--registry ] [--zen-url (if you are specifically deploying a remote engine for CP4D)]
56 |
57 | # create the proxy secrets if proxies are used
58 | # ./launch.sh create-proxy-secrets --namespace --proxy [--proxy-cacert ] [--zen-url (if you are specifically deploying a remote engine for CP4D)]
59 |
60 | # create the krb5 configmaps if Kerberos Authentication is used
61 | # ./launch.sh create-krb5-configmaps --namespace --krb5-conf [--krb5-conf-dir ]
62 |
63 | # create secret to import DB2Z license
64 | # ./launch.sh create-db2z-license-secret --namespace --import-db2z-license
65 |
66 | # create the api-key for dev or prod environment
67 | ./launch.sh create-apikey-secret --namespace --apikey ${api-key} [--serviceid ${service-id}] [--zen-url (if you are specifically deploying a remote engine for CP4D)]
68 |
69 | # deploy the operator
70 | ./launch.sh install --namespace [--registry ] [--operator-registry-suffix ] [--docker-registry-suffix ] [--digests ,,] [--zen-url (if you are specifically deploying a remote engine for CP4D)]
71 |
72 | # create the remote instance - add '--gateway api.dataplatform.cloud.ibm.com' if the instance needs to registers with prod env
73 |
74 | ./launch.sh create-instance --namespace --name --project-id --storage-class [--storage-size ] [--size ] [--data-center dallas|frankfurt|sydney|toronto (if you are specifically deploying a remote engine for IBM Cloud)] [--additional-users ] [--registry ] [--operator-registry-suffix ] [--docker-registry-suffix ] [--digests ,,] [--zen-url (if you are specifically deploying a remote engine for CP4D)] --license-accept true
75 | ```
76 | For documentation on how to create IBM Cloud API keys, see https://cloud.ibm.com/docs/account?topic=account-manapikey.
77 | To generate a CP4D API Key, go to "Profile and settings" when logged in to the CP4D Cluster to get your api key for the connection.
78 |
79 | ## Setting up Amazon Elastic File System (EKS only)
80 | Creating an EFS file system
81 | Follow the CP4D instruction for creating an EFS file system.
82 | https://www.ibm.com/docs/en/cloud-paks/cp-data/4.6.x?topic=storage-setting-up-amazon-elastic-file-system
83 |
84 | Before you can set up dynamic provisioning, you must obtain the DNS name or IP address of your Amazon Elastic File System:
85 |
86 | DNS name (recommended)
87 | You can obtain the DNS name from the AWS Console on the Amazon EFS > File systems. Select the file system that you want to use. The DNS name is in the General section.
88 | The DNS name has the following format: .efs..amazonaws.com.
89 |
90 | IP address
91 | You can obtain the IP address from the AWS Console on the Amazon EFS > File systems. Select the file system that you want to use. The IP address is on the Network tab.
92 |
93 | ```
94 | # create the NFS provisioner with the EFS file system;
95 | ./launch.sh create-nfs-provisioner --namespace --server [--path ] [--storage-class ]
96 | ```
97 |
98 | ## Installing with an input file
99 | Instead of running the installation script multiple times, the entire installation can done with an input file. When running with an input file, the installation will retrieve the latest images available from the container registry. To update to the latest version simply rerun the installation with the same input file.
100 |
101 | sample input file:
102 | ```
103 | # indicate that you have accepted license for IBM DataStage as a Service Anywhere(https://www.ibm.com/support/customer/csol/terms/?ref=i126-9243-06-11-2023-zz-en)
104 | license_accept=true
105 |
106 | # If you are specifically deploying a remote engine for IBM Cloud, the data center where your DataStage is provisioned on IBM cloud (dallas, frankfurt, sydney, or toronto); the default is dallas.
107 | # data_center=dallas
108 |
109 | # the namespace to deploy the remote engine
110 | namespace=
111 |
112 | # If you are specifically deploying a remote engine for IBM Cloud, the username and api key for the IBM Cloud Container Registry.
113 | # If you are specifically deploying a remote engine for CP4D, the username and api key for the IBM Entitled Registry.
114 | username=
115 | password=
116 |
117 | # If you are deploying a remote engine for IBM Cloud, this value will be the IBM Cloud api key for the remote engine to use.
118 | # If you are deploying a remote engine for CP4D, this value will be the CP4D Cluster account login api key of the target cluster for the remote engine to use. Go to "Profile and settings" when logged in to get your api key for the connection.
119 | api_key=
120 |
121 | # If you are specifically deploying a remote engine for CP4D, the CP4D service id username of the target cluster for the remote engine to use with api key
122 | service_id=cpadmin
123 |
124 | # the comma separated list of project IDs that will be using this remote engine
125 | projectId=
126 |
127 | # the name of the remote engine (alphanumeric and without spaces)
128 | name=
129 |
130 | #the size of the pxruntime - small, medium, or large (default is small)
131 | size=small
132 |
133 | # the file storage class to use
134 | storage_class=
135 |
136 | # the storage size in gb
137 | storage_size=20
138 |
139 | # comma separated list of ids (IAM IDs for cloud, check https://cloud.ibm.com/docs/account?topic=account-identity-overview for details; uids/usernames for cp4d) that can also control remote engine besides the owner
140 | # additional_users=
141 |
142 | # If you are specifically deploying a remote engine for CP4D, the zen url of the target cluster to use for CP4D environment. Specifying this variable will automatically switch usage from IBM Cloud to CP4D.
143 | zen_url=
144 |
145 | # Specify the proxy url (eg. http://:@:).
146 | # proxy_url=
147 |
148 | # Specify the absolute location of the custom CA store for the specified proxy - if it is using a self signed certificate.
149 | # cacert_location=
150 |
151 | # Specify the location of the Kerberos config file if using Kerberos Authentication.
152 | # KRB5_CONF_FILE=
153 |
154 | # Specify the directory of multiple Kerberos config files if using Kerberos Authentication. (Only supported with --krb5-conf, the krb5.conf file needs to include 'includedir /etc/krb5-config-files/krb5-config-dir' line).
155 | # KRB5_CONF_DIR=
156 |
157 | # Specify the location of the DB2Z license to import
158 | # DB2Z_LICENSE=
159 |
160 | # Specify your custom container registry to pull images from if you are image mirroring using a private registry. If using this option, you must set USE_DIGESTS as well for IBM Cloud.
161 | # CUSTOM_DOCKER_REGISTRY=
162 |
163 | # Custom operator registry suffix to use for the remote engine to pull ds-operator images from if using a custom container registry. Defaults to 'cpopen'.
164 | # OPERATOR_REGISTRY_SUFFIX=
165 |
166 | # Custom docker registry suffix to use for the remote engine to pull ds-px-runtime and ds-px-compute images from if using a custom container registry. Defaults to 'cp/cpd'.
167 | # DOCKER_REGISTRY_SUFFIX=
168 |
169 | # Custom digests to use for the remote engine. This option must be set if using a custom registry for IBM Cloud.
170 | # USE_DIGESTS=,,
171 |
172 | # the DNS name or IP of the EFS file system; omit if not deploying on AWS's EKS
173 | # the provisioner will use the storage class name specified in storage_class
174 | # nfs_server=
175 |
176 | # the namespace to deploy the storage class provisioner; will deploy to the same
177 | # namespace as the remote engine if omitted
178 | # provisioner_namespace=
179 | ```
180 | This script will deploy a remote engine for CP4D Cluster. If you need to deploy remote engine for IBM Cloud, uncomment the data_center variable, comment out the zen_url and service_id variables, and change the api_key, username, and password variables according to the commented instructions.
181 |
182 | Running the install script with the input file:
183 | ```
184 | ./launch.sh -f inputFile.txt
185 | ```
186 |
187 | ## Mounting Additional Persistence Volumes
188 | To mount additional storage volumes to the remote engine instance, edit the custom resource (CR) and add the additional PVCs under `additional_storage`
189 | 1. Edit the PXRemoteEngine CR via `oc` or `kubectl`
190 | ```
191 | oc edit pxre
192 | ```
193 | 2. For each PVC, add its name and mount path under the `additional_storage`.
194 | ```
195 | spec:
196 | additional_storage: # mount additional persistent volumes
197 | - mount_path: /data1 # the path to mount the persistent volume
198 | pvc_name: # the name of the associated persistent volume claim
199 | - mount_path: /data2
200 | pvc_name:
201 | ```
202 |
203 | ## Modifying ephemeral-storage limit:
204 |
205 | Note that if the limit is raised higher than the amount of ephemeral storage available on the worker nodes, a worker node may run out of storage and cause stability issues.
206 |
207 | To increase the ephemeral storage limit to the target size, eg. 20GB, use the following command.
208 |
209 | ```
210 | oc patch pxre --patch '{"spec":{"ephemeralStorageLimit": "20Gi"}}' --type=merge
211 | ```
212 |
213 | ## Troubleshooting
214 | 1. If the API Key Changes in DataStage aaS with Anywhere
215 | 1. Rerun the launch.sh script again with the updated input file with the new API Key
216 | 1. Make sure to restart the px-runtime pod to mount the updated apikey secret
217 |
218 | 1. If the px-runtime or px-compute pods are stuck waiting for certs
219 | ```
220 | $ kubectl -n amintest logs testamin01-ibm-datastage-px-compute-0
221 | rm: cannot remove '/opt/ibm/PXService/Server/DSEngine/isjdbc.config.biginsights': No such file or directory
222 | Use CPD_JAVA_CACERTS...
223 | Custom WDP certs folder found. Will import certs to jre /opt/java...
224 | Additional cert folder not found.
225 | Waiting for certs...
226 | Waiting for certs...
227 | Waiting for certs...
228 | Waiting for certs...
229 | ```
230 | You can work around the issue by doing the following:
231 | 1. Log into the shell for either px-runtime or px-compute
232 | 1. Run the following command:
233 | ```bash
234 | touch /opt/ibm/PXService/Server/PXEngine/etc/certs/pxesslcert.p12
235 | ```
236 |
--------------------------------------------------------------------------------
/Resources/datastage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/Resources/datastage.png
--------------------------------------------------------------------------------
/dsjob/Readme.md:
--------------------------------------------------------------------------------
1 |
2 | # DataStage command-line tools
3 |
4 |
5 |
6 | Last Updated: 2025-03-04
7 |
8 |
9 |
10 | CPDCTL and the `dsjob` tool are command-line interfaces (CLI) you can use to manage your DataStage® resources in IBM Cloud Pak for Data.
11 |
12 | ### Documentation:
13 | #### Latest : [DSJob Plugin 5.1.3](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.5.1.3.md)
14 | ### Binary
15 | #### [DSJob Plugin 5.1.3](https://github.com/IBM/cpdctl/releases/tag/v1.7.10)
16 |
17 | #### Other Releases
18 | [DSJob Plugin 5.1.3](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.5.1.3.md)
19 |
20 | [DSJob Plugin 5.1.2](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.5.1.2.md)
21 |
22 | [DSJob Plugin 5.1.1](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.5.1.1.md)
23 |
24 | [DSJob Plugin 5.1.0](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.5.1.0.md)
25 |
26 | [DSJob Plugin 5.0.3](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.5.0.3.md)
27 |
28 | [DSJob Plugin 5.0.2](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.5.0.2.md)
29 |
30 | [DSJob Plugin 5.0.0](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.5.0.0.md)
31 |
32 | [DSJob Plugin 4.8.5](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.8.5.md)
33 |
34 | [DSJob Plugin 4.8.4](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.8.4.md)
35 |
36 | [DSJob Plugin 4.8.3](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.8.3.md)
37 |
38 | [DSJob Plugin 4.8.2](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.8.2.md)
39 |
40 | [DSJob Plugin 4.8.1](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.8.1.md)
41 |
42 | [DSJob Plugin 4.8.0](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.8.0.md)
43 |
44 | [DSJob Plugin 4.7.4](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.7.4.md)
45 |
46 | [DSJob Plugin 4.7.3](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.7.3.md)
47 |
48 | [DSJob Plugin 4.7.2](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.7.2.md)
49 |
50 | [DSJob Plugin 4.7.1](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.7.1.md)
51 |
52 | [DSJob Plugin 4.7.0](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.7.0.md)
53 |
54 | [DSJob Plugin 4.6.6](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.6.6.md)
55 |
56 | [DSJob Plugin 4.6.4](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.6.4.md)
57 |
58 | [DSJob Plugin 4.6.2](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.6.2.md)
59 |
60 | [Change Log](https://github.com/IBM/DataStage/tree/main/dsjob/changelog.md)
61 |
62 |
63 | The following table illustrate CP4D release compatibility to corresponding CPDCTL release.
64 | CPDCTL release build contains the dsjob plugin which is release for the mentioned CP4D release.
65 | | CPD release | CPDCTL Version |
66 | | -------------|---------------|
67 | |4.0.8 | 1.1.194 |
68 | |4.5.0 | 1.1.235 |
69 | |4.5.2 | 1.1.243 |
70 | |4.5.3 | 1.1.257 |
71 | |4.5.3 | 1.1.269 |
72 | |4.6.0 | 1.1.299 |
73 | |4.6.1 | 1.1.313 |
74 | |4.6.2 | v1.2.1 |
75 | |4.6.4 | v1.3.16 |
76 | |4.7.0 | v1.4.20 |
77 | |4.7.1 | v1.4.34 |
78 | |4.7.2 | v1.4.42 |
79 | |4.8.0 | v1.4.84 |
80 | |4.8.1 | v1.4.104 |
81 | |4.8.2 | v1.4.116 |
82 | |4.8.3 | v1.4.130 |
83 | |4.8.4 | v1.4.145 |
84 | |4.8.5 | v1.4.175 |
85 | |4.8.5.1 | v1.4.179 |
86 | |4.8.5.2 | v1.4.195 |
87 | |5.0.0 | v1.6.6 |
88 | |5.0.1 | v1.6.29 |
89 | |5.0.2 | v1.6.55 |
90 | |5.0.2.1 | v1.6.61 |
91 | |5.0.3 | v1.6.81 |
92 | |5.1.0 | v1.6.93 |
93 | |5.1.1 | v1.6.98 |
94 | |5.1.2 | v1.7.0 |
95 | |5.1.3 | v1.7.10 |
96 |
--------------------------------------------------------------------------------
/dsjob/blogs/GitIntegration.md:
--------------------------------------------------------------------------------
1 | # Git Integration
2 |
3 | Git Integration is a mechanism to write your project contents to a Git repository and maintain version control and integrate with your CI/CD pipelines.
4 |
5 | DataStage customers would like to use Git as means for version control their project assets in CloudPak Projects. It will also help them promote their Development work into higher environments by integrating Git into their CI/CD process.
6 |
7 | DataStage implements Git Integration as part of ds-migration service. Currently ds-migration service hosts all export and import functionality for the projects. New Api are added to the ds-migration service to allow users to commit and pull their work from git using the existing export and import functionalities.
8 |
9 | Before invoking the API, project needs to be Git enabled. This will setup initial configuration for the project and can be overridden by individual operations to Git.
10 | This functionality will allow you to update incremental changes from your project where you collaborate and build functionality and sync to Git repo. It is then possible to promote your tested projects to higher environments such as QA and eventually to Production.
11 |
12 | Git Integration provides three commands:
13 | - `git-commit`: Allow CPD Project resources to push and commit to Git repository
14 | - `git-pull`: Allow project artifacts from Git repository to pull and update into CPD Projects
15 | - `git-status`: Provides status on resource to identify the differences between the CPD Project and the Git repository
16 |
17 | A bit of background...
18 | - Git Integration is also available before 5.x releases and was implemented as a CLI side functionality. It did not require user to configure the projects with Git Integration.
19 | - CLI tooling has limitations and other scalability issues. We now have Git Integration implemented into DataStage Migration service. This now require additional steps to configure.
20 | - Git repositories used before 5.x releases are still backward compatible
21 |
22 | #### Setting up Git Integration for the project
23 | For allowing project to be aware of Git Integration we need to enable them to track changes internally of all the resources in a project. This requires iusers to explicitly configure the project for Git Integration using the following command.
24 | ```
25 | cpdctl dsjob git-configure {--project PROJECT | --project-id PROJID} [--git-enable] [--git-url URL] [--git-owner OWNER] [--git-token TOKEN] [--git-org ORG] [--git-email EMAIL] [--git-provider GITPROVIDER] [--git-branch GITBRANCH] [--git-folder GITFOLDER]
26 | ```
27 | - `project` is the name of the project.
28 | - `project-id` is the id of the project. One of `project` or `project-id` must be specified.
29 | - `git-enable` allows project to be git enabled and starts tracking resource changes
30 | - `git-url` git repo url. ex: `https://github.company.com/username/git-dsjob.git`
31 | - `git-owner` owner of the git repository ex: `username`. This field is optional.
32 | - `git-token` token used for authentication. This field is encrypted when stored.
33 | - `git-org` Git organization, this field is optional.
34 | - `git-provider` Integrate to specific provider, must be `git` or `gitlab` currently.
35 | - `git-branch` Default branch used for git operations
36 | - `git-folder` Folder to which this project will be committed or fetched from. This is optional
37 |
38 | Git URL is the destination of your organizations git URL. Git owner is the user who commits to Git and the Git token is the user’s token used for auth. Fields `git-owner`, `git-org` are deducible to the URL and can be removed from the command eventually.
39 | Currently git is configurable with Auth Token but will support SSL certs in the futurei. Also we only support https enabled repositories in this release.
40 |
41 | Git can also be configured from UI
42 |
43 | 
44 |
45 | Once git is configured, Migration service starts collecting data on all assets that are added, modified or deleted so that we tally these assets to allow user to accurately get status on the CPD Project against Git repo. This will consume some cluster resource and can be managed using the [tracking](https://github.com/IBM/DataStage/blob/main/dsjob/dsjob.5.1.0.md#managing-git-project-tracking-data) api to turn it on when needed.
46 |
47 | Use the provided [git-operations](https://github.com/IBM/DataStage/blob/main/dsjob/dsjob.5.1.0.md#git-integration) to integrate your CPD project with Git.
48 |
49 | #### Git Commit
50 | This Api allows users to commit their project as a whole or incrementally into Git. When a project is committed to Git, it maintains a specific structure of this project in Git.
51 |
52 | Each action is a single signed commit.
53 | Commit takes assets from project and writes to Git repo under a branch and a folder. User must make sure that each project is maintained into a separate folder to make sure that project data is not overlapped.
54 |
55 | Commit can be invoked from UI as shown by pressing the Sync button.
56 |
57 | 
58 |
59 | User will have chance to select assets to commit or can commit the entire project.
60 |
61 | 
62 |
63 | User will be prompted to enter commit message
64 |
65 | 
66 |
67 | Once committed to git the repo structure would look like below. This repo `git-dsjob` has two projects under the branch `dsjob101`. Each project is committed to its own folder `dsjob-test` and `dsjob101` respectively.
68 |
69 | 
70 |
71 | Assets of each type are assigned to their respective folders. All parameter sets are shown below are stored as json types. Some assets are stored in binary form.
72 |
73 | 
74 |
75 | Context based commits are available from UI from assets page or by right clicking on an asset.
76 |
77 | 
78 |
79 | The commit is based on the structure of the DataStage export API. Each asset is stored into a folder designated by its type. For example, all flows are stored as Json files under `data_intg_flow`. Files are named after asset names.
80 | Git commit also maintains and updates ` DataStage-README.json` at the root of the folder with a manifest of all assets it operated on. Also, Git commit writes `DataStage-DirectoryAsset.json` into repo as is to restore folder structure when this project from the repository is imported back into CloudPak project.
81 | Note `project.json` is a special file that is added at the root of each folder and maintained by Git API. This file will help determine project level changes with respect to Git. This file along side the project tracking data in the Migration service helps in determining what assets are changed between Project and Git.
82 |
83 |
84 | #### Git Commit from CLI
85 | ```
86 | $ cpdctl dsjob git-commit -p dsjob-demo1122 --repo git-dsjob --branch demo1122 --in-folder folder1122 --commit-message "test commit" --wait 2000
87 | ...
88 | status in_progress
89 | 2025-02-19 11:28:29: Project export status: in_progress, total: 49, completed: 22, failed: 0.
90 | 2025-02-19 11:28:39: Project export status: in_progress, total: 49, completed: 49, failed: 0.
91 | 2025-02-19 11:28:49: Project export status: in_progress, total: 49, completed: 49, failed: 0.
92 | 2025-02-19 11:28:59: Project export status: completed, total: 49, completed: 49, failed: 0.
93 | {
94 | "metadata": {
95 | "url": "https://cpd-ds.apps.clusterid.com/data_intg/v3/migration/export_flows_status?project_id=7a1d2ad1-5a1c-4216-9ddc-5edcb22af077",
96 | "project_id": "7a1d2ad1-5a1c-4216-9ddc-5edcb22af077",
97 | "project_name": "dsjob-demo1122",
98 | "created_at": "2025-02-19T19:28:19.126Z",
99 | "created_by": "cpadmin"
100 | },
101 | "entity": {
102 | "status": "completed",
103 | "start_time": "2025-02-19T19:28:19.148Z",
104 | "end_time": "2025-02-19T19:28:53.065Z",
105 | "elapsed_time": 33,
106 | "failed_flows": [
107 | {
108 | "end_time": "2025-02-19T19:28:20.379Z",
109 | "errors": [
110 | {
111 | "description": "DSMIG0013W : Data connection `test_StorageVolume` is missing.",
112 | "name": "test_sequenceFlow_waitForFile_storagevolume_connection",
113 | "type": "flow_export_error"
114 | }
115 | ],
116 | "id": "1ef16a5e-de72-4b5a-ad20-40432c871e59",
117 | "name": "test_sequenceFlow_waitForFile_storagevolume_connection",
118 | "status": "completed_with_error",
119 | "type": "sequence_job"
120 | }
121 | ],
122 | "tally": {
123 | "total": 49,
124 | "completed": 49,
125 | "skipped": 0,
126 | "failed": 0,
127 | "sequence_jobs_total": 10,
128 | "parallel_jobs_total": 13,
129 | "sequence_job_export_completed": 10,
130 | "parallel_jobs_export_completed": 13,
131 | "sequence_job_export_failed": 0,
132 | "parallel_jobs_export_failed": 0,
133 | "connections_total": 3,
134 | "parameter_sets_total": 6,
135 | "table_definitions_total": 7,
136 | "subflows_total": 0,
137 | "routines_total": 0,
138 | "message_handlers_total": 7,
139 | "build_stages_total": 0,
140 | "custom_stages_total": 0,
141 | "wrapped_stages_total": 0,
142 | "xml_schema_libraries_total": 1,
143 | "function_libraries_total": 0,
144 | "java_libraries_total": 0,
145 | "odm_libraries_total": 0,
146 | "match_spec_total": 0,
147 | "rule_set_total": 0,
148 | "data_quality_spec_total": 0,
149 | "data_quality_rules_total": 0,
150 | "data_quality_definitions_total": 0,
151 | "data_assets_total": 0,
152 | "data_sets_total": 1,
153 | "file_sets_total": 1,
154 | "odbc_configuration_total": 0,
155 | "cff_schemas_total": 0,
156 | "test_cases_total": 0
157 | }
158 | },
159 | "gitcommit": {
160 | "fetch_status": "completed",
161 | "repo": "git-dsjob",
162 | "branch": "demo1122",
163 | "folder": "folder1122",
164 | "commitmessage": "test commit",
165 | "commit_sha": "d90fe805397026f57f7eb40df60df4d24ef64f4e"
166 | }
167 | }
168 |
169 | Status code = 0
170 | ```
171 | Incremental commit can be achieved using the git-commit command, it will commit the flows/pipelines with dependencies unless specified. Following example shows incremental commit of a flow and paramete set.
172 | ```
173 | $ cpdctl dsjob git-commit -p dsjob-demo1122 --repo git-dsjob --branch demo1122 --in-folder folder1122 --name data_intg_flow/TestDataSetFileSet --name parameter_set/pset22 --commit-message "test commit2" --wait 2000
174 | ```
175 |
176 | This results in a PR depending on how you configured your repository.
177 |
178 | 
179 |
180 | #### Git Pull
181 | This Api allows users to pull their Git project as a whole or incrementally of the picked assets from Git repository into CloudPak Project. Git pull is backed by ds-migration import service. Apart from specifying repo, branch and folder user can control which assets can be replaced, skipped when promoting to higher environment using conflict resolution, hard-replace and skip-on-replace switches.
182 |
183 | Pull can be invoked from UI as shown:
184 |
185 | 
186 |
187 | User will have chance to select assets to pul or can pull the entire project form repository. Not that the source of truth changes and hence the object that are in the
188 |
189 | 
190 |
191 | Context based pull are available from UI on selected assets.
192 |
193 | 
194 |
195 | #### Git Pull from CLI
196 | Create a project
197 | ```
198 | $ cpdctl dsjob create-project -n dsjob-pulltest
199 | 470b2e48-b94f-4643-a14d-86f1f064859b
200 | ```
201 | Configure the project
202 | ```
203 | $ cpdctl dsjob git-configure -p dsjob-pulltest --git-enable --git-url https://github.aaa.com/user/git-dsjob.git --git-provider git --git-token XXXXX
204 | ```
205 | Now pull the committed repository into the new project
206 | ```
207 | $ cpdctl dsjob git-pull -p dsjob-pulltest --repo git-dsjob --branch demo1122 --in-folder folder1122 --wait 200
208 | ...
209 | 2025-02-19 12:27:17: Waiting until import finishes, import id: 4f21c557-ddbc-40e9-bb2e-0b4235df1046
210 | 2025-02-19 12:27:27: Project import status: started, total: 51, completed: 4, failed: 0, skipped: 0.
211 | 2025-02-19 12:27:47: Project import status: started, total: 51, completed: 32, failed: 0, skipped: 0.
212 | 2025-02-19 12:28:08: Project import status: started, total: 51, completed: 44, failed: 0, skipped: 0.
213 | 2025-02-19 12:28:17: Project import status: started, total: 51, completed: 45, failed: 0, skipped: 0.
214 | 2025-02-19 12:28:38: Project import status: completed, total: 51, completed: 51, failed: 0, skipped: 0.
215 |
216 | Status code = 0
217 | ```
218 |
219 | #### Git Status
220 | Git Integration provides platform specific computations to determine if the resource in Git repository is same as the resource in CloudPak Project.
221 |
222 | Status is displayed which is context based, during commit the source of truth is the project which means `created` refers to a resource in project but not in Git repository and vice versa. Object that are modified in the Project are shown as `updated`. During pull the source of truth comes from Git repository, a `created` object is an object that exists in Git repository and is ready to be pulled and created in the Project.
223 | 
224 |
225 | #### Git status from cli
226 | ```
227 | $ cpdctl dsjob git-status -p dsjob-pulltest --repo git-dsjob --branch demo1122 --in-folder folder1122
228 | ...
229 |
230 | Project Changes:
231 |
232 | data_intg_message_handler:
233 | TestMsgH1(48f69aa0-d54c-4bf3-8d75-c39f22633ecc)
234 | TestMsgH1_Import_1724177183930(e0f7d87d-738f-4ebb-a933-f891a4ac5603)
235 | TestMsgH2(25198c63-6adb-4328-bdf0-87c5fdc79a72)
236 | TestMsgH2_Import_1724177183928(3bd6ce40-524e-48bd-8799-d287c0be6f2d)
237 | TestMsgHandler(40f9cf92-659f-460f-8b69-1d591376b271)
238 | TestMsgHandler2(716bbb91-52ca-44ae-894a-3f9b192e8fe2)
239 | TestMsgHandler2_Import_1724177187002(c2c05a51-1741-44b7-a74d-f04357bea33b)
240 | TestMsgHandler_Import_1724177189960(1599abdd-369d-417d-b63f-a0c7709883c8)
241 |
242 | orchestration_flow:
243 | TestPipeline_Run_Twice(720b0364-f691-4257-b145-de9aecb819b7)
244 | SequenceFlow_userVariables_TimestampToDate_expression(7d0099ca-d19c-4488-95fa-3f52ebc53ba9)
245 | testParams(908cbfcc-d28e-4d59-ab8d-3ae064379aa4)
246 | ...
247 | ```
248 |
249 |
250 | Currently this functionality is available only on CPD platforms and we are working in bring it to Saas soon.
251 |
--------------------------------------------------------------------------------
/dsjob/blogs/ParamSet.md:
--------------------------------------------------------------------------------
1 | # Working with Paramsets and Valuesets
2 |
3 | Let us start with file to create a parameter set with two value sets.
4 |
5 | ```
6 | {
7 | "parameter_set": {
8 | "description": "",
9 | "name": "parmSetTest",
10 | "parameters": [
11 | {
12 | "name": "parm1",
13 | "prompt": "parm1",
14 | "subtype": "",
15 | "type": "int64",
16 | "value": 11
17 | },
18 | {
19 | "name": "parm11",
20 | "prompt": "parm11",
21 | "subtype": "",
22 | "type": "string",
23 | "value": "seqFileN10"
24 | }
25 | ],
26 | "value_sets": [
27 | {
28 | "name": "vset2",
29 | "values": [
30 | {
31 | "name": "parm1",
32 | "value": 12
33 | },
34 | {
35 | "name": "parm11",
36 | "value": "test22"
37 | }
38 | ]
39 | },
40 | {
41 | "name": "ValSet1",
42 | "values": [
43 | {
44 | "name": "parm1",
45 | "value": 11
46 | },
47 | {
48 | "name": "parm11",
49 | "value": "seqFileN10"
50 | }
51 | ]
52 | }
53 | ]
54 | }
55 | }
56 | ```
57 |
58 | Let us create a parameter set using `dsjob` plugin.
59 |
60 | ```
61 | $ cpdctl dsjob create-paramset -p dsjob -n paramSetTest -f resources/paramset1.json
62 | ```
63 |
64 | Check the created parameter set
65 |
66 | ```
67 | $ cpdctl dsjob get-paramset -p dsjob -n parmSetTest --output json
68 | {
69 | "parameter_set": {
70 | "description": "",
71 | "name": "parmSetTest",
72 | "parameters": [
73 | {
74 | "name": "parm1",
75 | "prompt": "parm1",
76 | "subtype": "",
77 | "type": "int64",
78 | "value": 11
79 | },
80 | {
81 | "name": "parm11",
82 | "prompt": "parm11",
83 | "subtype": "",
84 | "type": "string",
85 | "value": "seqFileN10"
86 | }
87 | ],
88 | "value_sets": [
89 | ...
90 | ...
91 | ```
92 |
93 |
94 | ### Update the Param Set Default Values
95 | Current default values are `11`, `setFileN10` for the two parameters in the set parm1 and parm11 respectively.
96 | We can update the values using commandline to new values `22` and `seqFile22` respectively
97 |
98 | ```
99 | $ cpdctl dsjob update-paramset -p dsjob -n parmSetTest --param int64:parm1:22 --param string:parm11:seqFile22
100 | ...
101 | ParameterSet updated for Paramset ID: f11c5c4f-f491-416e-aa88-15b793c8b403
102 |
103 | Status code = 0
104 | ```
105 |
106 | Now check the parameter set to see that indeed the parameter values are updated. It is required that type should be given when updating the parameter in a parameter set. Valid types supported are `int64, sfloat, string, list, time, timestamp, date, path`.
107 |
108 | ```
109 | $ cpdctl dsjob get-paramset -p dsjob -n parmSetTest --output json
110 | {
111 | "parameter_set": {
112 | "description": "",
113 | "name": "parmSetTest",
114 | "parameters": [
115 | {
116 | "name": "parm1",
117 | "prompt": "parm1",
118 | "subtype": "",
119 | "type": "int64",
120 | "value": 22
121 | },
122 | {
123 | "name": "parm11",
124 | "prompt": "parm11",
125 | "subtype": "",
126 | "type": "string",
127 | "value": "seqFile22"
128 | }
129 | ],
130 | "value_sets": [
131 | ...
132 | ...
133 | ```
134 |
135 | We can also do the same from a file and update the parameter set default values
136 | We have a json file to create parameter set values
137 |
138 | ```
139 | {
140 | "parameter_set": {
141 | "description": "New Description",
142 | "name": "parmSetTest",
143 | "parameters": [
144 | {
145 | "name": "parm1",
146 | "prompt": "new prompt parm1",
147 | "subtype": "",
148 | "type": "int64",
149 | "value": 333
150 | },
151 | {
152 | "name": "parm11",
153 | "prompt": "new prompt parm11",
154 | "subtype": "",
155 | "type": "string",
156 | "value": "seqFile3333"
157 | }
158 | ],
159 | }
160 | }
161 | ```
162 | Let us apply this file
163 |
164 | ```
165 | $ cpdctl dsjob update-paramset -p dsjob -n parmSetTest -f paramset1.json
166 | ParameterSet updated for Paramset ID: f11c5c4f-f491-416e-aa88-15b793c8b403
167 |
168 | Status code = 0
169 | ```
170 |
171 | Query the paramset to check the values
172 |
173 | ```
174 | $ cpdctl dsjob get-paramset -p dsjob -n parmSetTest --output json
175 | {
176 | "parameter_set": {
177 | "description": "",
178 | "name": "parmSetTest",
179 | "parameters": [
180 | {
181 | "name": "parm1",
182 | "prompt": "new prompt parm1",
183 | "subtype": "",
184 | "type": "int64",
185 | "value": 333
186 | },
187 | {
188 | "name": "parm11",
189 | "prompt": "new prompt parm11",
190 | "subtype": "",
191 | "type": "string",
192 | "value": "seqFile3333"
193 | }
194 | ],
195 |
196 | ...
197 | ...
198 | ```
199 |
200 | We can update just one parameter in a parameter set and can only have json with what we want to change. Let us look at json file, this file only updates one parameter `parm11` with a new prompt and value. We also want to updated the `description of the parameter set itself.
201 |
202 | ```
203 | {
204 | "parameter_set": {
205 | "description": "Another description"
206 | "name": "parmSetTest",
207 | "parameters": [
208 | {
209 | "name": "parm11",
210 | "prompt": "another prompt parm11",
211 | "value": "seqFile4444444"
212 | }
213 | ],
214 | }
215 | }
216 | ```
217 |
218 | Now take a look at the parameter set
219 |
220 | ```
221 | $ cpdctl dsjob update-paramset -p dsjob -n parmSetTest -f paramset2.json
222 | ...
223 | ParameterSet updated for Paramset ID: f11c5c4f-f491-416e-aa88-15b793c8b403
224 |
225 | Status code = 0
226 |
227 | $ cpdctl dsjob get-paramset -p dsjob -n parmSetTest --output json
228 | {
229 | "parameter_set": {
230 | "description": "Another description",
231 | "name": "parmSetTest",
232 | "parameters": [
233 | {
234 | "name": "parm1",
235 | "prompt": "new prompt parm1",
236 | "subtype": "",
237 | "type": "int64",
238 | "value": 333
239 | },
240 | {
241 | "name": "parm11",
242 | "prompt": "another prompt parm11",
243 | "subtype": "",
244 | "type": "string",
245 | "value": "seqFile4444444"
246 | }
247 | ],
248 | ...
249 | ...
250 | ```
251 |
252 |
253 | ### Updating Value Set in a ParameterSet
254 |
255 | Let us now update the value set `vset2` to new values, to accomplish this from a file, we create a file that represent new value set `vset2` as shown below
256 | ```
257 | {
258 | "name": "vset2",
259 | "values": [
260 | {
261 | "name": "parm1",
262 | "value": "2222"
263 | },
264 | {
265 | "name": "parm11",
266 | "value": "test2222"
267 | }
268 | ]
269 | }
270 | ```
271 |
272 | Before we update this value set let us get the value set to check...
273 |
274 | ```
275 | $ cpdctl dsjob get-paramset-valueset -p dsjob --paramset parmSetTest -n vset2 --output json
276 |
277 | {
278 | "name": "vset2",
279 | "values": [
280 | {
281 | "name": "parm1",
282 | "value": 12
283 | },
284 | {
285 | "name": "parm11",
286 | "value": "test22"
287 | }
288 | ]
289 | }
290 |
291 | Status code = 0
292 |
293 | ```
294 |
295 | Now run update command to change vset2 using the definition from the file above
296 |
297 | ```
298 | $ cpdctl dsjob update-paramset-valueset -p dsjob --paramset parmSetTest -n vset2 -f resources/valueset2.json
299 | ...
300 | ValueSet Updated for Paramset ID: f11c5c4f-f491-416e-aa88-15b793c8b403
301 |
302 | Status code = 0
303 | ```
304 |
305 | The value set is updated using the definition from the file above.
306 | Let us query the valueset now
307 |
308 | ```
309 | $ cpdctl dsjob get-paramset-valueset -p dsjob --paramset parmSetTest -n vset2 --output json
310 | {
311 | "name": "vset2",
312 | "values": [
313 | {
314 | "name": "parm1",
315 | "value": "2222"
316 | },
317 | {
318 | "name": "parm11",
319 | "value": "test2222"
320 | }
321 | ]
322 | }
323 |
324 | Status code = 0
325 | ```
326 |
327 | Let us now update the second value set from command line
328 |
329 | ```
330 | $ cpdctl dsjob update-paramset-valueset -p dsjob --paramset parmSetTest -n ValSet1 --value parm1=888 --value parm11=seqFile888
331 | ...
332 | ValueSet Updated for Paramset ID: f11c5c4f-f491-416e-aa88-15b793c8b403
333 |
334 | Status code = 0
335 | ```
336 |
337 | Noew check to see if the values are updated
338 |
339 | ```
340 | $ cpdctl dsjob get-paramset-valueset -p dsjob --paramset parmSetTest -n ValSet1 --output json
341 | {
342 | "name": "ValSet1",
343 | "values": [
344 | {
345 | "name": "parm1",
346 | "value": 888
347 | },
348 | {
349 | "name": "parm11",
350 | "value": "seqFile888"
351 | }
352 | ]
353 | }
354 |
355 | Status code = 0
356 | ```
357 |
358 |
359 |
--------------------------------------------------------------------------------
/dsjob/blogs/SkipOnReplace.md:
--------------------------------------------------------------------------------
1 | # Working with import-zip using option 'skip-on-replace'
2 |
3 | The 'skip-on-replace' option allows us to skip importing objects that is specified depending on the state of the object. For example, if you are re-importing a zip file with this option, you can skip importing a paramset if it's going to be the same. The 'skip-on-replace' option works on different Datastage asset types like parameter sets, connections, flows etc. The 'skip-on-replace' needs to be used along with conflict-resolution. If 'conflict-resolution' is not set to 'replace’, skip-on-replace is ignored.
4 |
5 | We will go over various use case scenarios here.
6 |
7 | ## 1. 'skip-on-replace' for parameter sets
8 | ### Use case scenario 1: no change on parameter set
9 | Let us consider that we have setup a flow and three parameter sets with import-zip command using paramset.zip file.
10 | ```
11 | % cpdctl dsjob create-project -n DSJob_pset
12 | % cpdctl dsjob import-zip --project DSJob_pset --conflict-resolution replace --file-name paramset.zip --wait 60
13 | ```
14 | ```
15 | % cpdctl dsjob list-paramsets --project DSJob_pset --sort-by-time
16 | ...
17 | ParamSet Name |Updated At
18 | ------------- |----------
19 | peek_paramset |2023-10-06T21:39:52Z
20 | column_name_paramset|2023-10-06T21:39:51Z
21 | description_paramset|2023-10-06T21:39:50Z
22 |
23 | Total: 3 Parameter Sets
24 |
25 | Status code = 0
26 | ```
27 | We haven't modified the parameter sets. Let us import the same zip file with 'skip-on-replace' options.
28 |
29 | ```
30 | % cpdctl dsjob import-zip --project DSJob_pset --conflict-resolution replace --skip-on-replace parameter_set --file-name paramset.zip --wait 60
31 | ...
32 | 2023-10-03 22:03:11: Waiting until import finishes, import id: b290f87c-c347-4f5f-a2c6-6240a170ecef
33 | 2023-10-03 22:03:33: Project import status: completed, total: 4, completed: 4, failed: 0, skipped: 3.
34 | Information:
35 | Parameter Set: column_name_paramset, New parameters are identical to those in the existing parameter set `column_name_paramset`, flow is updated to reference `column_name_paramset`.
36 |
37 | Parameter Set: description_paramset, New parameters are identical to those in the existing parameter set `description_paramset`, flow is updated to reference `description_paramset`.
38 |
39 | Parameter Set: peek_paramset, New parameters are identical to those in the existing parameter set `peek_paramset`, flow is updated to reference `peek_paramset`.
40 |
41 |
42 | Status code = 1
43 | ```
44 | Listing paramsets shows that they are not imported again as the update time didn't change.
45 | ```
46 | % cpdctl dsjob list-paramsets --project DSJob_pset --sort-by-time
47 | ...
48 | ParamSet Name |Updated At
49 | ------------- |----------
50 | peek_paramset |2023-10-06T21:39:52Z
51 | column_name_paramset|2023-10-06T21:39:51Z
52 | description_paramset|2023-10-06T21:39:50Z
53 |
54 | Total: 3 Parameter Sets
55 |
56 | Status code = 0
57 | ```
58 | ### Use case scenario 2: update parameter set
59 | Let us follow these steps for this scenario:
60 |
61 | **Step 1**. Before updating one parameter set 'peek_paramset', let us check the content of the parameter set and the time when it was created:
62 | ```
63 | % cpdctl dsjob get-paramset --project DSJob_pset --name peek_paramset
64 | ...
65 | ParamSet: peek_paramset(5b533421-2eae-448f-9a79-287bd47ad531)
66 | Name |Type |Default |Prompt
67 | ---- |---- |------- |------
68 | description|string|Test peek stage|
69 | row_count |int64 |10 |
70 |
71 | ValueSet: peek_valueset
72 | Name |Default
73 | ---- |-------
74 | row_count |50
75 | description|Test peek stage
76 |
77 | Status code = 0
78 | ```
79 | ```
80 | % cpdctl dsjob list-paramsets --project DSJob_pset --sort-by-time
81 | ...
82 | ParamSet Name |Updated At
83 | ------------- |----------
84 | peek_paramset |2023-10-06T21:39:52Z
85 | column_name_paramset|2023-10-06T21:39:51Z
86 | description_paramset|2023-10-06T21:39:50Z
87 |
88 | Total: 3 Parameter Sets
89 |
90 | Status code = 0
91 | ```
92 |
93 | **Step 2**. Update the 'peek_paramset' and value sets in it by deleting value set and the parameters:
94 | ```
95 | % cpdctl dsjob delete-paramset-valueset --project DSJob_pset --paramset peek_paramset --name peek_valueset
96 | ...
97 | ValueSet Deleted from Paramset ID: 5b533421-2eae-448f-9a79-287bd47ad531
98 |
99 | Status code = 0
100 | ```
101 | ```
102 | % cpdctl dsjob update-paramset --project DSJob_pset --name peek_paramset --delete-param description
103 | ...
104 | ParameterSet updated for Paramset ID: 5b533421-2eae-448f-9a79-287bd47ad531
105 |
106 | Status code = 0
107 | ```
108 | ```
109 | % cpdctl dsjob update-paramset --project DSJob_pset --name peek_paramset --delete-param row_count
110 | ...
111 | ParameterSet updated for Paramset ID: 5b533421-2eae-448f-9a79-287bd47ad531
112 |
113 | Status code = 0
114 | ```
115 |
116 | **Step 3**. After updating the parameter set 'peek_paramset', let us check the content of the parameter set and the update time:
117 | ```
118 | % cpdctl dsjob get-paramset --project DSJob_pset --name peek_paramset
119 | ...
120 | ParamSet: peek_paramset(5b533421-2eae-448f-9a79-287bd47ad531)
121 | Name|Type|Default|Prompt
122 | ----|----|-------|------
123 |
124 | Status code = 0
125 | ```
126 | ```
127 | % cpdctl dsjob list-paramsets --project DSJob_pset --sort-by-time
128 | ...
129 | ParamSet Name |Updated At
130 | ------------- |----------
131 | peek_paramset |2023-10-07T03:51:51Z
132 | column_name_paramset|2023-10-06T21:39:51Z
133 | description_paramset|2023-10-06T21:39:50Z
134 |
135 | Total: 3 Parameter Sets
136 |
137 | Status code = 0
138 | ```
139 |
140 | **Step 4**. After the update, let's import the same zip file with 'skip-on-replace' options.
141 | ```
142 | % cpdctl dsjob import-zip --project DSJob_pset --conflict-resolution replace --skip-on-replace parameter_set --file-name paramset.zip --wait 60
143 | ...
144 | 2023-10-06 20:52:03: Waiting until import finishes, import id: 426bbcb9-dcf2-4050-9840-b14dacef8daa
145 | 2023-10-06 20:52:04: Project import status: started, total: 4, completed: 3, failed: 0, skipped: 3.
146 | 2023-10-06 20:52:25: Project import status: completed, total: 4, completed: 4, failed: 0, skipped: 3.
147 | Information:
148 | Parameter Set: column_name_paramset, New parameters are identical to those in the existing parameter set `column_name_paramset`, flow is updated to reference `column_name_paramset`.
149 |
150 | Parameter Set: description_paramset, New parameters are identical to those in the existing parameter set `description_paramset`, flow is updated to reference `description_paramset`.
151 |
152 | Parameter Set: peek_paramset, New parameters are identical to those in the existing parameter set `peek_paramset`, flow is updated to reference `peek_paramset`.
153 |
154 |
155 | Status code = 1
156 | ```
157 |
158 | **Step 5**. After importing with 'skip-on-replace', let us check the content of the parameter set `peek_paramset` and the update time:
159 | ```
160 | % cpdctl dsjob get-paramset --project DSJob_pset --name peek_paramset
161 | ...
162 | ParamSet: peek_paramset(5b533421-2eae-448f-9a79-287bd47ad531)
163 | Name|Type|Default|Prompt
164 | ----|----|-------|------
165 |
166 | Status code = 0
167 | ```
168 | ```
169 | % cpdctl dsjob list-paramsets --project DSJob_pset --sort-by-time
170 | ...
171 | ParamSet Name |Updated At
172 | ------------- |----------
173 | peek_paramset |2023-10-07T03:51:51Z
174 | column_name_paramset|2023-10-06T21:39:51Z
175 | description_paramset|2023-10-06T21:39:50Z
176 |
177 | Total: 3 Parameter Sets
178 |
179 | Status code = 0
180 | ```
181 | Comparing the update time in step 3 and step 5, it is clear that the import with 'skip-on-replace' skipped the import.
182 |
183 | ### Use case scenario 3: rename a parameter set
184 | Let us follow these steps:
185 |
186 | **Step 1**: Rename the parameter set 'peek_paramset'
187 | ```
188 | % cpdctl dsjob update-paramset --project DSJob_pset --name peek_paramset --to-name peek_paramset_renamed
189 | ...
190 | ParameterSet updated for Paramset ID: 5b533421-2eae-448f-9a79-287bd47ad531
191 |
192 | Status code = 0
193 | ```
194 | **Step 2**: Check the update time
195 | ```
196 | % cpdctl dsjob list-paramsets --project DSJob_pset --sort-by-time
197 | ...
198 | ParamSet Name |Updated At
199 | ------------- |----------
200 | peek_paramset_renamed|2023-10-07T05:29:37Z
201 | column_name_paramset |2023-10-06T21:39:51Z
202 | description_paramset |2023-10-06T21:39:50Z
203 |
204 | Total: 3 Parameter Sets
205 |
206 | Status code = 0
207 | ```
208 | **Step 3**: Do import-zip with --skip-on-replace
209 | ```
210 | % cpdctl dsjob import-zip --project DSJob_pset --conflict-resolution replace --skip-on-replace parameter_set --file-name paramset.zip --wait 60
211 | ...
212 | 2023-10-06 22:29:45: Waiting until import finishes, import id: 14a4a8ab-aff2-408a-9a7c-f756ed5b3e97
213 | 2023-10-06 22:29:47: Project import status: started, total: 4, completed: 3, failed: 0, skipped: 2.
214 | 2023-10-06 22:30:07: Project import status: completed, total: 4, completed: 4, failed: 0, skipped: 2.
215 | Information:
216 | Parameter Set: column_name_paramset, New parameters are identical to those in the existing parameter set `column_name_paramset`, flow is updated to reference `column_name_paramset`.
217 |
218 | Parameter Set: description_paramset, New parameters are identical to those in the existing parameter set `description_paramset`, flow is updated to reference `description_paramset`.
219 |
220 |
221 | Status code = 1
222 | ```
223 | **Step 4**: Check the parameter set content and update time
224 | ```
225 | % cpdctl dsjob list-paramsets --project DSJob_pset --sort-by-time
226 | ...
227 | ParamSet Name |Updated At
228 | ------------- |----------
229 | peek_paramset |2023-10-07T05:29:46Z
230 | peek_paramset_renamed|2023-10-07T05:29:37Z
231 | column_name_paramset |2023-10-06T21:39:51Z
232 | description_paramset |2023-10-06T21:39:50Z
233 |
234 | Total: 4 Parameter Sets
235 |
236 | Status code = 0
237 | ```
238 | ```
239 | % cpdctl dsjob get-paramset --project DSJob_pset --name peek_paramset
240 | ...
241 | ParamSet: peek_paramset(b75df31b-8960-4d54-b8db-f1a414a9f5d2)
242 | Name |Type |Default |Prompt
243 | ---- |---- |------- |------
244 | description|string|Test peek stage|
245 | row_count |int64 |10 |
246 |
247 | ValueSet: peek_valueset
248 | Name |Default
249 | ---- |-------
250 | row_count |50
251 | description|Test peek stage
252 |
253 | Status code = 0
254 |
255 | ```
256 | This scenario suggests that import with 'skip-on-replace' did not skip the parameter set 'peek_paramset' because it is renamed and hence it created the parameter set.
257 |
258 | ### Use case scenario 4: delete a parameter set
259 | Let's follow these steps:
260 |
261 | **Step 1**: Delete a paramset
262 | ```
263 | % cpdctl dsjob delete-paramset --project DSJob_pset --name peek_paramset
264 | ...
265 | Deleted Paramset: peek_paramset
266 |
267 | Status code = 0
268 | ```
269 | **Step 2**: Check the parameter set update time
270 | ```
271 | % cpdctl dsjob list-paramsets --project DSJob_pset --sort-by-time
272 | ...
273 | ParamSet Name |Updated At
274 | ------------- |----------
275 | peek_paramset_renamed|2023-10-07T05:29:37Z
276 | column_name_paramset |2023-10-06T21:39:51Z
277 | description_paramset |2023-10-06T21:39:50Z
278 |
279 | Total: 3 Parameter Sets
280 |
281 | Status code = 0
282 | ```
283 | **Step 3**: Do import-zip with --skip-on-replace
284 | ```
285 | % cpdctl dsjob import-zip --project DSJob_pset --conflict-resolution replace --skip-on-replace parameter_set --file-name paramset.zip --wait 60
286 | ...
287 | 2023-10-06 22:52:55: Waiting until import finishes, import id: 73e4fb98-cb9b-4334-a2ff-838a1399cc51
288 | 2023-10-06 22:52:56: Project import status: started, total: 4, completed: 3, failed: 0, skipped: 2.
289 | 2023-10-06 22:53:17: Project import status: completed, total: 4, completed: 4, failed: 0, skipped: 2.
290 | Information:
291 | Parameter Set: column_name_paramset, New parameters are identical to those in the existing parameter set `column_name_paramset`, flow is updated to reference `column_name_paramset`.
292 |
293 | Parameter Set: description_paramset, New parameters are identical to those in the existing parameter set `description_paramset`, flow is updated to reference `description_paramset`.
294 |
295 |
296 | Status code = 1
297 | ```
298 | Step 4: Check the parameter set update time and it's content
299 | ```
300 | % cpdctl dsjob list-paramsets --project DSJob_pset --sort-by-time
301 | ...
302 | ParamSet Name |Updated At
303 | ------------- |----------
304 | peek_paramset |2023-10-07T05:52:56Z
305 | peek_paramset_renamed|2023-10-07T05:29:37Z
306 | column_name_paramset |2023-10-06T21:39:51Z
307 | description_paramset |2023-10-06T21:39:50Z
308 |
309 | Total: 4 Parameter Sets
310 |
311 | Status code = 0
312 | ```
313 | ```
314 | % cpdctl dsjob get-paramset --project DSJob_pset --name peek_paramset
315 | ...
316 | ParamSet: peek_paramset(59467898-8ad4-4067-8954-2f3d657a4acd)
317 | Name |Type |Default |Prompt
318 | ---- |---- |------- |------
319 | description|string|Test peek stage|
320 | row_count |int64 |10 |
321 |
322 | ValueSet: peek_valueset
323 | Name |Default
324 | ---- |-------
325 | row_count |50
326 | description|Test peek stage
327 |
328 |
329 | Status code = 0
330 | ```
331 | In this scenario import with 'skip-on-replace' did not skip the parameter set 'peek_paramset' because it was deleted, hence created the parameter set.
332 |
333 | This indicates that 'skip-on-replace' skips on objects already exists in the target environment.
334 |
335 | ## 2. 'skip-on-replace' for connection
336 | We will go over various use case scenarios here.
337 |
338 | ### Use case scenario 1: no update on connection
339 | Let us consider that we have setup a flow and a connection with import-zip command using conn.zip file.
340 | ```
341 | % cpdctl dsjob create-project -n DSJob_conn
342 | % cpdctl dsjob import-zip --project DSJob_conn --conflict-resolution replace --file-name conn.zip --wait 60
343 | ```
344 |
345 | Follow these steps below to proceed:
346 |
347 | **Step 1**: Check the connection update time.
348 | ```
349 | % cpdctl dsjob list-connections --project DSJob_conn --sort-by-time
350 | ...
351 | Connection Name|Updated At
352 | ---------------|----------
353 | postgres_con |2023-10-07T06:48:21.967Z
354 |
355 | Total: 1 Connections
356 |
357 | Status code = 0
358 | ```
359 |
360 | **Step 2**: Do not update the connection. Run import-zip with 'skip-on-replace'.
361 | ```
362 | % cpdctl dsjob import-zip --project DSJob_conn --conflict-resolution replace --skip-on-replace connection --file-name conn.zip --wait 60
363 | ...
364 | 2023-10-06 23:55:28: Waiting until import finishes, import id: fe905630-f45e-4456-8a9e-a8390908313b
365 | 2023-10-06 23:55:30: Project import status: started, total: 2, completed: 1, failed: 0, skipped: 1.
366 | 2023-10-06 23:56:19: Project import status: completed, total: 2, completed: 2, failed: 0, skipped: 1.
367 | Information:
368 | Connection: postgres_con, New connection is exactly the same as an existing connection, resource is not updated.
369 |
370 |
371 | Status code = 1
372 | ```
373 |
374 | **Step 3**: Check the connection update time now.
375 | ```
376 | % cpdctl dsjob list-connections --project DSJob_conn --sort-by-time
377 | ...
378 |
379 | Connection Name|Updated At
380 | ---------------|----------
381 | postgres_con |2023-10-07T06:48:21.967Z
382 |
383 | Total: 1 Connections
384 |
385 | Status code = 0
386 | ```
387 | Update time for the connection did not change. It skipped importing the connection.
388 |
389 | ### Use case scenario 2: rename a connection
390 | **Step 1**: Rename the connection
391 | ```
392 | % cpdctl dsjob update-connection --project DSJob_conn --name postgres_con --to-name postgres_con_renamed
393 | ...
394 | {
395 | "database": "conndb",
396 | "host": "dummy",
397 | "password": "dummy",
398 | "port": "19518",
399 | "query_timeout": "300",
400 | "ssl": "false",
401 | "username": "dummy"
402 | }
403 |
404 | Status code = 0
405 | ```
406 | **Step 2**: Check the connection update time
407 | ```
408 | % cpdctl dsjob list-connections --project DSJob_conn --sort-by-time
409 | ...
410 | Connection Name |Updated At
411 | --------------- |----------
412 | postgres_con_renamed|2023-10-07T07:18:07.500Z
413 |
414 | Total: 1 Connections
415 |
416 | Status code = 0
417 | ```
418 | **Step 3**: import-zip with --skip-on-replace
419 | ```
420 | % cpdctl dsjob import-zip --project DSJob_conn --conflict-resolution replace --skip-on-replace connection --file-name conn.zip --wait 60
421 | ...
422 | 2023-10-07 00:18:15: Waiting until import finishes, import id: 89836dba-b5f6-48de-812b-3b92dda6fa35
423 | 2023-10-07 00:18:17: Project import status: started, total: 2, completed: 1, failed: 0, skipped: 1.
424 | 2023-10-07 00:19:06: Project import status: completed, total: 2, completed: 2, failed: 0, skipped: 1.
425 | Information:
426 | Connection: postgres_con, New connection is exactly the same as an existing connection, resource is not updated.
427 |
428 |
429 | Status code = 1
430 | ```
431 | **Step 4**: Check the connection update time
432 | ```
433 | % cpdctl dsjob list-connections --project DSJob_conn --sort-by-time
434 | ...
435 | Connection Name |Updated At
436 | --------------- |----------
437 | postgres_con_renamed|2023-10-07T07:18:07.500Z
438 |
439 | Total: 1 Connections
440 |
441 |
442 | Status code = 0
443 | ```
444 | Update time for the connection did not change. It skipped importing the connection.
445 |
446 | ### Use case scenario 3: delete a connection
447 | Let us follow these steps:
448 |
449 | **Step 1**: Delete a connection
450 | ```
451 | % cpdctl dsjob delete-connection --project DSJob_conn --name postgres_con_renamed
452 | ...
453 | Deleted Connection: postgres_con_renamed
454 |
455 | Status code = 0
456 | ```
457 | **Step 2**: Check delete time
458 | ```
459 | % cpdctl dsjob list-connections --project DSJob_conn --sort-by-time
460 | ...
461 |
462 | Total: 0 Connections
463 |
464 | Status code = 0
465 | ```
466 | **Step 3**: import-zip with --skip-on-replace
467 | ```
468 | % cpdctl dsjob import-zip --project DSJob_conn --conflict-resolution replace --skip-on-replace connection --file-name conn.zip --wait 60
469 | ...
470 | 2023-10-07 00:39:25: Waiting until import finishes, import id: 7a06c6c7-8913-4af0-8227-1e9ca1a9fbb2
471 | 2023-10-07 00:39:26: Project import status: started, total: 2, completed: 1, failed: 0, skipped: 0.
472 | 2023-10-07 00:40:16: Project import status: completed, total: 2, completed: 2, failed: 0, skipped: 0.
473 |
474 | Status code = 0
475 | ```
476 | **Step 4**: Check the connection update time
477 | ```
478 | % cpdctl dsjob list-connections --project DSJob_conn --sort-by-time
479 | ...
480 | Connection Name|Updated At
481 | ---------------|----------
482 | postgres_con |2023-10-07T07:39:25.952Z
483 |
484 | Total: 1 Connections
485 |
486 | Status code = 0
487 | ```
488 | The above data shows that import-zip with 'skip-on-replace' did not skip and imported the connection.
489 |
490 |
--------------------------------------------------------------------------------
/dsjob/blogs/StoragePath.md:
--------------------------------------------------------------------------------
1 | # Working with migrate command using 'storage-path' option
2 |
3 | We are using a directory under `/ds-storage` in the examples below.
4 |
5 | First you need to create the storage volume connection under the project:
6 |
7 | Create a new connection asset of type `storage volume` adding new volume as `ds::ds-storage` and clicking check box `Use my platform login credentials`
8 |
9 | We can pass `storage-path` value two ways in the command as below.
10 |
11 | ## 1. Passing the hardcoded storage path
12 |
13 | Run the cpdctl dsjob command as follows:
14 | ```
15 | % cpdctl dsjob migrate --project indrani_storage_path --storage-path /mnts/ds-storage/mystorage --file-name routineTest1.isx
16 | ...
17 |
18 | ID: 3ed629ff-fc6f-4798-bd02-f597b2a6b457
19 | Created 2023-12-14T00:20:13.906Z
20 | Summary
21 | {
22 | "build_stages_total": 0,
23 | "connection_creation_failed": 0,
24 | "connections_total": 0,
25 | "custom_stages_total": 0,
26 | "data_quality_spec_total": 0,
27 | "deprecated": 0,
28 | "failed": 0,
29 | "flow_compilation_failed": 0,
30 | "flow_creation_failed": 0,
31 | "function_libraries_total": 0,
32 | "imported": 0,
33 | "java_libraries_total": 0,
34 | "job_creation_failed": 0,
35 | "message_handlers_total": 0,
36 | "parallel_jobs_total": 0,
37 | "parameter_sets_total": 0,
38 | "pending": 0,
39 | "renamed": 0,
40 | "replaced": 0,
41 | "routines_total": 0,
42 | "sequence_job_creation_failed": 0,
43 | "sequence_jobs_total": 0,
44 | "skipped": 0,
45 | "subflows_total": 0,
46 | "table_definitions_total": 0,
47 | "total": 0,
48 | "unsupported": 0,
49 | "wrapped_stages_total": 0,
50 | "xml_schema_libraries_total": 0
51 | }
52 |
53 | Status code = 0
54 | ```
55 |
56 | ## 2. Passing environment variable for the storage path
57 |
58 | 1. Run the cpdctl dsjob command as follows:
59 | ```
60 | % cpdctl dsjob migrate --project indrani_storage_path --storage-path "\$ROUTINE_DIR" --file-name routineTest1.isx
61 | ...
62 |
63 | ID: 24b67df8-f441-4564-8942-b2f9cb2d5d3c
64 | Created 2023-12-14T00:23:25.981Z
65 | Summary
66 | {
67 | "build_stages_total": 0,
68 | "connection_creation_failed": 0,
69 | "connections_total": 0,
70 | "custom_stages_total": 0,
71 | "data_quality_spec_total": 0,
72 | "deprecated": 0,
73 | "failed": 0,
74 | "flow_compilation_failed": 0,
75 | "flow_creation_failed": 0,
76 | "function_libraries_total": 0,
77 | "imported": 0,
78 | "java_libraries_total": 0,
79 | "job_creation_failed": 0,
80 | "message_handlers_total": 0,
81 | "parallel_jobs_total": 0,
82 | "parameter_sets_total": 0,
83 | "pending": 0,
84 | "renamed": 0,
85 | "replaced": 0,
86 | "routines_total": 0,
87 | "sequence_job_creation_failed": 0,
88 | "sequence_jobs_total": 0,
89 | "skipped": 0,
90 | "subflows_total": 0,
91 | "table_definitions_total": 0,
92 | "total": 0,
93 | "unsupported": 0,
94 | "wrapped_stages_total": 0,
95 | "xml_schema_libraries_total": 0
96 | }
97 |
98 | Status code = 0
99 | ```
100 |
101 | 2. The routine activity script will get created as below:
102 |
103 | ```
104 | # The original, untranslated routine source code (written in IBM InfoSphere DataStage BASIC language)
105 | # and migration service generated dummy script is saved to the following invoked script file.
106 |
107 | echo "CreateLockFile" > $command_name_PATH
108 |
109 | echo ${CreateLockFile_InputArg} > $InputArg_PATH
110 |
111 | sh $ROUTINE_DIR/projects/indrani_storage_path/scripts/DSU.CreateLockFile.sh
112 | ```
113 |
114 | 3. You need to create an environment variable for the routine activity:
115 | ```
116 | Environment variable: ROUTINE_DIR
117 | value : /mnts/ds-storage/mystorage
118 | ```
119 |
120 |
121 |
122 | Note: You can view the StoragePath.mov
123 |
--------------------------------------------------------------------------------
/dsjob/blogs/StoragePath.mov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/dsjob/blogs/StoragePath.mov
--------------------------------------------------------------------------------
/dsjob/blogs/conn.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/dsjob/blogs/conn.zip
--------------------------------------------------------------------------------
/dsjob/blogs/export-import.md:
--------------------------------------------------------------------------------
1 | ## Project Level Export
2 |
3 | #### Export Project with all assets including DataStage
4 | ```
5 | cpdctl dsjob export --project dsjob --name test-export
6 | --export-file demo-project-export.zip --wait 200
7 | ```
8 |
9 | #### Find if export has completed
10 | ```
11 | $ cpdctl dsjob list-exports --project dsjob
12 | ...
13 | Name |Export ID |State |Created At |Updated At
14 | ---------- |---------- |-------- |----------- |-----------
15 | test-export|3cce517b-8073-437f-bef3-095c39cf3b80|completed|2023-04-18T05:57:21.752Z|2023-04-18T05:57:31.528Z
16 | test-export|2889506e-1c6f-4b76-9f5e-cd51fed51252|completed|2023-04-19T04:49:42.458Z|2023-04-19T04:49:55.568Z
17 | test-export|e6b386f7-0920-44f2-9288-7bececd61954|completed|2023-04-26T00:11:09.503Z|2023-04-26T00:11:25.154Z
18 | ```
19 |
20 | #### Save the export to a zip file
21 | ```
22 | $ cpdctl dsjob save-export --project dsjob --name test-export
23 | --export-file abc.zip
24 | ```
25 |
26 | #### Cleanup the export
27 | ```
28 | cpdctl dsjob delete-export --name test-export
29 | ```
30 |
31 | #### Import the project into a New Project
32 | ```
33 | cpdctl dsjob import --project DSJOB-PROJECT-EXPORT
34 | --import-file demo-project-export.zip --wait 200
35 | ```
36 |
37 |
38 | ---
39 | ---
40 | ## Export DataStage Assets
41 | Three ways we can import DataStage assets
42 | export-zip
43 | export-project
44 | export-datastage-assets
45 |
46 | #### export-zip : Export individual assets
47 | export-zip can be used to export individual flows or pipelines and also their dependencies by default.
48 | ```
49 | cpdctl dsjob export-zip --project dsjob --name Test-DataStage-Flow --file-name test-export-project.zip
50 | or
51 | cpdctl dsjob export-zip --project dsjob --pipeline=testloop2 --file-name test-export-project.zip
52 | ```
53 |
54 | You can export a flow or pipeline without dependencies if you chose to now not to export connection or parameter sets that the flow or pipeline depends on using a `--no-dep` option
55 | Also important to note that `--no-secrets` lets you skip exporting secrets such as passwords.
56 | ```
57 | cpdctl dsjob export-zip --project dsjob --name Test-DataStage-Flow --file-name test-export-project.zip --no-deps --no-secrets
58 | ```
59 |
60 | If you have developed multiple flows and pipelines and want to export them all into a zip file, please use the following options to do so and export the flows and pipelines with their depedencies
61 |
62 | ```
63 | cpdctl dsjob export-zip --project dsjob --name={fsTarget,dsTarget} --pipeline={testloop2,testPipe} --file-name test-export-project.zip
64 | or
65 | cpdctl dsjob export-zip --project dsjob --name fsTarget --name dsTarget --pipeline testloop2 --pipeline testPipe --file-name test-export-project.zip
66 | ```
67 |
68 | #### export-zip : Export individual assets include pipelines and flow.
69 |
70 |
71 | #### export-project : Export the DataStage flows and pipelines in a project with dependencies
72 | ```
73 | cpdctl dsjob export-project --project DSJob_Test
74 | --file-name DSJob_Test-project.zip --wait 200
75 | ```
76 | If wait not used...
77 | get-export-project : Check Status if the export
78 | ```
79 | $ cpdctl dsjob get-export-project --project dsjob
80 | ```
81 | Once export is completed...
82 | save-export-project: Save the exported project to local disk
83 | ```
84 | cpdctl dsjob save-export-project --project dsjob --file-name
85 | test-export-project.zip
86 | ```
87 | Stop the export if something is not right...
88 | ```
89 | cpdctl dsjob stop-export-project --project dsjob
90 | ```
91 |
92 | #### export-datastage-assets : Export all DataStage Assets
93 | Export every DataStage asset in the project.
94 | ```
95 | cpdctl dsjob export-datastage-assets --project DSJob_Test
96 | --file-name DSJob_Test.zip
97 | ```
98 |
99 | #### import-zip : Import a DataStage artifact file
100 | Control how you import
101 | ```
102 | cpdctl dsjob import-zip --project DSJob_Test
103 | --file-name test-dependencies2.zip
104 | --conflict-resolution replace
105 | --skip-on-replace connection --wait 200
106 | ```
107 | If wait not used...
108 | get-import-zip : Check Status if the import
109 | ```
110 | cpdctl dsjob get-import-zip --project DSJob_Test
111 | --import-id f95e4ba8-d64d-4c5c-aa14-b0a3671fccb9
112 | ```
113 |
114 |
--------------------------------------------------------------------------------
/dsjob/blogs/file1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/dsjob/blogs/file1
--------------------------------------------------------------------------------
/dsjob/blogs/file2:
--------------------------------------------------------------------------------
1 | record
2 | ( Name: string[variable_max=10];
3 | Age: int32;
4 | DOB: date;
5 | )
--------------------------------------------------------------------------------
/dsjob/blogs/gitapi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/dsjob/blogs/gitapi.png
--------------------------------------------------------------------------------
/dsjob/blogs/gitbulkcommit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/dsjob/blogs/gitbulkcommit.png
--------------------------------------------------------------------------------
/dsjob/blogs/gitbulkcommit2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/dsjob/blogs/gitbulkcommit2.png
--------------------------------------------------------------------------------
/dsjob/blogs/gitbulkcommit3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/dsjob/blogs/gitbulkcommit3.png
--------------------------------------------------------------------------------
/dsjob/blogs/gitcommitpr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/dsjob/blogs/gitcommitpr.png
--------------------------------------------------------------------------------
/dsjob/blogs/gitcommitstatus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/dsjob/blogs/gitcommitstatus.png
--------------------------------------------------------------------------------
/dsjob/blogs/gitconfiguration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/dsjob/blogs/gitconfiguration.png
--------------------------------------------------------------------------------
/dsjob/blogs/gitcontextcommit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/dsjob/blogs/gitcontextcommit.png
--------------------------------------------------------------------------------
/dsjob/blogs/gitpull.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/dsjob/blogs/gitpull.png
--------------------------------------------------------------------------------
/dsjob/blogs/gitpull2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/dsjob/blogs/gitpull2.png
--------------------------------------------------------------------------------
/dsjob/blogs/gitpull3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/dsjob/blogs/gitpull3.png
--------------------------------------------------------------------------------
/dsjob/blogs/gitrepo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/dsjob/blogs/gitrepo.png
--------------------------------------------------------------------------------
/dsjob/blogs/gitrepo2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/dsjob/blogs/gitrepo2.png
--------------------------------------------------------------------------------
/dsjob/blogs/gitstatus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/dsjob/blogs/gitstatus.png
--------------------------------------------------------------------------------
/dsjob/blogs/migrateConnection.md:
--------------------------------------------------------------------------------
1 | # Migrating workloads from legacy InfoSphere Information Server to DataStage Nextgen on CP4D
2 |
3 | When we migrate integration flows from legacy InfoSphere Information Server(IIS) to DataStage Nextgen on CP4D, dataSources can be configured in many ways. It is possible to configure connection properties manually, using parameter sets or using global environment variables. This blog demonstrates these three scenarios for migration into CP4D environment assuming that we already have the isx file generated from IIS.
4 |
5 | ## Scenario 1
6 |
7 | In this scenario we use an isx file built from legacy IIS that contains manually configured connection, it does not contain any parameter set. We can migrate the isx file objects into CP4D using `cpdctl dsjob` command enabling local connection or without enabling local connection. Both the cases are described below. All connection properties are migrated directly into the connection asset or the flow and there is no parameterization. This method is static in nature and works well if the number of connection assets are manageable.
8 |
9 | ### Case 1: Enabling local connection
10 |
11 | Example `cpdctl dsjob` command to migrate is shown below. `OracleToPeek.isx` is exported from IIS which does not contain any parameter sets:
12 | ```
13 | % cpdctl dsjob migrate --project migrate_conn_no_param --file-name OracleToPeek.isx --enable-local-connection
14 | ```
15 | In this case, only flow gets created and no connection asset is created. In this example, the flow contains `Oracle(optimized)` as a source stage. `Oracle(optimized)` stage properties get populated as:
16 |
17 | ```
18 | Connection -> View connection ->
19 | Connection details -> Hostname ->
20 | Connection details -> Port ->
21 | Connection details -> Servicename ->
22 | Connection details -> Username ->
23 | Connection details -> Password ->
24 | ```
25 |
26 | ### Case 2: Without enabling local connection
27 |
28 | Example `cpdctl dsjob` command to migrate is shown below, `OracleToPeek.isx` is exported from IIS which does not contain any parameter sets:
29 |
30 | ```
31 | % cpdctl dsjob migrate --project migrate_conn_no_param --file-name OracleToPeek.isx
32 | ```
33 | In this case, flow and connection asset get created. In this example, in the flow, `Oracle(optimized)` stage properties gets populated as:
34 |
35 | ```
36 | Connection -> View connection ->
37 | ```
38 |
39 | Connection asset has all the following migrated connection properties which get populated with the values:
40 |
41 | ```
42 | Name
43 | Hostname
44 | Port
45 | Servicename
46 | Username
47 | Password
48 | ```
49 |
50 | In both cases, nothing needs to be edited in the flow as well as in the connection asset. The flow can be run successfully without editing anything.
51 |
52 |
53 | ## Scenario 2
54 |
55 | In this scenario a connection is parameterized such that all the connection properties are migrated into a parameter set and the parameter set is referenced by the connection itself. It provides the flexibility of changing the connections dynamically.
56 |
57 | Example `cpdctl dsjob` command to migrate is shown below, `OracleRoPeek_Param.isx` is exported from IIS which contains :
58 |
59 | ```
60 | % cpdctl dsjob migrate --project migrate_conn_paramset --create-connection-paramsets --file-name OracleRoPeek_Param.isx
61 | ```
62 |
63 | In this scenario, the assets get created are flow, parameter set and connection asset.
64 |
65 | In the generated flow, `Oracle(optimized)` stage properties get populated as:
66 |
67 | ```
68 | Connection -> View connection ->
69 | ```
70 |
71 | Connection asset has the following migrated connection properties populated with the values referencing the parameter values of the parameter set `ORC_Pset1` in this example:
72 |
73 | ```
74 | Hostname #ORC_Pset1.ORC_SVC_NAME#
75 | Port #ORC_Pset1.oracle_db_port#
76 | Servicename #ORC_Pset1.oracle_service_name#
77 | Username #ORC_Pset1.UID#
78 | ```
79 |
80 | The parameter set `ORC_Pset1` contains the following parameters, in this example:
81 |
82 | ```
83 | ORC_SVC_NAME conops-oracle21.fyre.ibm.com:1521/orclpdb
84 | UID tm_ds
85 | PWD ******
86 | oracle_service_name
87 | oracle_db_port
88 | ```
89 |
90 | You need to follow these manual steps to complete the migration process before you run the flow:
91 |
92 | ### Step 1. Edit the parameter set
93 |
94 | Edit the parameter set `ORC_Pset1` like below.
95 |
96 | Before edit, `Default value` appears as:
97 |
98 | ```
99 | ORC_SVC_NAME conops-oracle21.fyre.ibm.com:1521/orclpdb
100 | UID tm_ds
101 | PWD ******
102 | oracle_service_name
103 | oracle_db_port
104 | ```
105 |
106 | After you edit the parameter set, it should look as:
107 |
108 | ```
109 | ORC_SVC_NAME conops-oracle21.fyre.ibm.com
110 | UID tm_ds
111 | PWD ******
112 | oracle_service_name orclpdb
113 | oracle_db_port 1521
114 | ```
115 |
116 | Note the change in the values of `ORC_SVC_NAME`, `oracle_service_name` and `oracle_db_port` after edit.
117 |
118 | ### Step 2. Compile and run the flow
119 |
120 | Now the flow can be compiled and run.
121 |
122 | ## Scenario 3
123 | In this scenario, the connection properties are migrated using global environment variables. The advantage of this method is that the parameters can be used by many datasources in the project. It becomes easy to manage all the flows by directly changing the connection in the PROJDEF. It reduces the administrative complexity when updating the connections. This scenario requires migrating the isx file along with DSFlow params file from IIS environment into CP4D.
124 |
125 | Follow thesse steps to complete the migration:
126 |
127 | ### Step 1. Run `cpdctl dsjob migrate` command
128 |
129 | Example `cpdctl dsjob` command to migrate is below, `OracleToPeek_Projdef.isx` is exported from IIS which contains:
130 |
131 | ```
132 | % cpdctl dsjob migrate --project migrate_conn_paramset --create-connection-paramsets --file-name OracleToPeek_Projdef.isx
133 | ```
134 |
135 | `OracleToPeek_Projdef.isx` contains paramset `ORC_Pset`, `PROJDEF`, connection asset and flow.
136 |
137 | In the generated flow, `Oracle(optimized)` stage propertie gets populated as:
138 |
139 | ```
140 | Connection -> View connection ->
141 | ```
142 |
143 | The paramset `ORC_Pset` contains:
144 |
145 | ```
146 | $ORC_SVC_NAME PROJDEF
147 | $PWD ******
148 | $UID PROJDEF
149 | oracle_service_name
150 | oracle_db_port
151 | ```
152 |
153 | The `PROJDEF` contains:
154 | ```
155 | $UID UID
156 | $ORC_SVC_NAME ORC_SVC_NAME
157 | ```
158 |
159 | The generated connection asset contains:
160 |
161 | ```
162 | Hostname #ORC_Pset.$ORC_SVC_NAME#
163 | Port #ORC_Pset.oracle_db_port#
164 | Servicename #ORC_Pset.oracle_service_name#
165 | Username #ORC_Pset.$UID#
166 | ```
167 |
168 | ### Step 2. Run `cpdctl dsjob create-dsparams` command
169 |
170 | Create dsparams by running the following command:
171 |
172 | ```
173 | % cpdctl dsjob create-dsparams --project migrate_conn_projdef --file-name DSParams.txt
174 | ```
175 |
176 | Example DSParams.txt content here:
177 | ```
178 | [serveronly-functions]
179 | [parallelonly-beforeafter]
180 | [system-variables]
181 | [serveronly-system-variables]
182 | [parallelonly-system-variables]
183 | [EnvVarDefns]
184 | ORC_SVC_NAME\User Defined\-1\String\\0\Project\ORC_SVC_NAME\
185 | UID\User Defined\-1\String\\0\Project\UID\
186 | PWD\User Defined\-1\Encrypted\\0\Project\PWD\
187 | [PROJECT]
188 | [InternalSettings]
189 | [EnvVarValues]
190 | "ORC_SVC_NAME"\1\"conops-oracle21.fyre.ibm.com:1521/orclpdb"
191 | "UID"\1\"tm_ds"
192 | "PWD"\1\"{iisenc}6iJys4F7fdGGGYrOx6hehQ=="
193 |
194 | ```
195 |
196 | In this process, `PROJDEF` gets updated as:
197 | ```
198 | $UID tm_ds
199 | $ORC_SVC_NAME conops-oracle21.fyre.ibm.com:1521/orclpdb
200 | $PWD {dsnextenc}CkmnmfdOwoauTg2eHINZfw==
201 | ```
202 |
203 | ### Step 3. Edit `PROJDEF`
204 |
205 | Edit `PROJDEF` for $ORC_SVC_NAME.
206 |
207 | Before editing, `PROJDEF` content is:
208 | ```
209 | $UID tm_ds
210 | $ORC_SVC_NAME conops-oracle21.fyre.ibm.com:1521/orclpdb
211 | $PWD {dsnextenc}CkmnmfdOwoauTg2eHINZfw==
212 | ```
213 |
214 | After editing, `PROJDEF` content is::
215 | ```
216 | $UID tm_ds
217 | $ORC_SVC_NAME conops-oracle21.fyre.ibm.com
218 | $PWD {dsnextenc}CkmnmfdOwoauTg2eHINZfw==
219 | ```
220 |
221 | Note, the value of $ORC_SVC_NAME after editing `PROJDEF`.
222 |
223 | ### Step 4. Edit paramset
224 | In this example, edit the param set `ORC_Pset`.
225 |
226 | Before editing the paramet `ORC_Pset`, the values of the parameters are:
227 | ```
228 | $ORC_SVC_NAME PROJDEF
229 | $PWD ******
230 | $UID PROJDEF
231 | oracle_service_name
232 | oracle_db_port
233 | ```
234 |
235 | After editing the paramset `ORC_Pset`, the values of the parameters are:
236 | ```
237 | $ORC_SVC_NAME PROJDEF
238 | $PWD ******
239 | $UID PROJDEF
240 | oracle_service_name orclpdb
241 | oracle_db_port 1521
242 | ```
243 |
244 | Note the value of `oracle_service_name` and `oracle_db_port` after editing `ORC_Pset`.
245 |
246 | ### Step 5. Compile and run the flow
247 |
248 | Now, the flow can be compiled and run.
249 |
250 |
--------------------------------------------------------------------------------
/dsjob/blogs/paramset.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/dsjob/blogs/paramset.zip
--------------------------------------------------------------------------------
/dsjob/blogs/sequencer.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ## Run pipelines using optimized runner.
4 |
5 | To Run a previously designed Orchestration pipeline as a pipeline runner involves two steps `compile` and `run-pipeline` with `--optimize` option.
6 |
7 | #### Compile Pipeline as sequencer job.
8 |
9 | This step compiles an existing pipeline as sequencer job. This command performs following three operations.
10 | - compile existing pipeline into code that is persisted on volume and ready for execution
11 | - create a sequencer job with name `.DataStage sequence job`, that can run this code
12 | - move the job to the same folder where pipeline belongs
13 |
14 | The following syntax compiles a pipeline or all the pipeline in a project:
15 |
16 | ```
17 | cpdctl dsjob compile-pipeline {--project PROJECT | --project-id PROJID} {--name PIPELINE | --id PIPELINEID} [--code] [--threads ] [--enable-inline]
18 |
19 | ```
20 | - `project` is the name of the project that contains the pipeline.
21 | - `project-id` is the id of the project. One of `project` or `project-id` must be specified.
22 | - `name` is the name of the flow or a pipeline.
23 | - `id` is the id of the flow or a pipeline. One of `name` or `id` must be specified.
24 | - `code` will output the details on the code it generates
25 | - `threads` specifies the number of parallel compilations to run. The value should be in the range 5-20, default value is 5. This field is optional.
26 | - `enable-inline` when set to false, generates code to run each nested pipeline as independent sequencer job. Default value is `true`
27 |
28 | ex:
29 | // compile all pipelines in a project by concurrently compiling 10 at a time
30 | `cpdctl dsjob compile-pipeline -p dsjob-project --threads 10`
31 |
32 | // compile a single pipeline in the project
33 | `cpdctl dsjob compile-pipeline -p dsjob-project --name test-pipeline`
34 |
35 | #### Run Pipeline as sequencer job
36 |
37 | To run the pipeline as sequencer job you must use two new options to configure and run the `run-pipeline` command. For general syntax on the `run-pipeline` command please refer to [documentation](https://github.com/IBM/DataStage/blob/main/dsjob/dsjob.5.0.2.md#running-pipelines)
38 |
39 | Syntax for running the pipeline is as shown below. Please note that the two new options `--optimize` and `--skip-compile` will be available using the cpdctl release version [1.6.62]([https://github.com/IBM/cpdctl/releases/tag/v1.6.62](https://github.com/IBM/cpdctl/releases/tag/v1.6.62))
40 | To use `enable-inline` option please use cpdctl version [1.6.78]([https://github.com/IBM/cpdctl/releases/tag/v1.6.62](https://github.com/IBM/cpdctl/releases/tag/v1.78))
41 |
42 |
43 | - `optimize` when true the pipeline is run as sequencer job. If not specified then the pipeline is run as a normal pipeline execution.
44 | - `skip-compile` when true the pipeline is not compiled during the run, if this flag is false then the pipeline is compiled as part of the run. This flag is only effective when `optimize` flag is set for the run, i.e. in optimized runner mode.
45 | - `enable-inline` when set to false allows each nested pipeline to run as independent sequencer job. If `skip-compile` is set, then this flag will not have any effect because the existing compiled code is run as is.
46 |
47 |
48 | Following are some examples :
49 |
50 | ##### Run pipeline as sequencer job
51 | ```
52 | cpdctl dsjob run-pipeline --project dsjob-test --name testbashds --optimize --wait 200
53 | or
54 | cpdctl dsjob run-pipeline --project dsjob-test --name testbashds --optimize=true --wait 200
55 | or
56 | cpdctl dsjob run-pipeline --project dsjob-test --name testbashds --optimize --skip-compile=true --wait 200
57 | ```
58 | ##### Run pipeline as sequencer job with independent nested sequencer job runs
59 | Run pipeline with enable-inline set to false.
60 | ```
61 | cpdctl dsjob run-pipeline --project dsjob-test --name testbashds --optimize --wait 200 --enable-inline=false
62 | ```
63 |
64 |
65 | ##### Run pipeline without compiling
66 | Note: If you run pipeline without compilation, the run behavior will dependent on already compiled code with or without enable-inline option
67 | ```
68 | cpdctl dsjob run-pipeline --project dsjob-test --name testbashds --optimize --skip-compile --wait 200
69 | or
70 | cpdctl dsjob run-pipeline --project dsjob-test --name testbashds --optimize=true --skip-compile=true --wait 200
71 | ```
72 |
73 |
74 | ##### Some internals
75 | When you run the pipeline using the above command, a job gets created with name `.DataStage sequence job`
76 |
77 | If you want to configure the job further, you can chose to use cpdctl job `update-job` or `schedule-job` commands here [update-job](https://github.com/IBM/DataStage/blob/main/dsjob/dsjob.5.0.0.md#updating-jobs) and [schedule-job](https://github.com/IBM/DataStage/blob/main/dsjob/dsjob.5.0.2.md#scheduling-jobs)
78 |
79 | You can also use UI to update the job from Jobs Dashboard.
80 |
81 | Note: You cannot view the run through the Run Tracker when running the pipeline as a sequencer job
82 |
--------------------------------------------------------------------------------
/dsjob/changelog.md:
--------------------------------------------------------------------------------
1 |
2 | # DataStage command-line change log
3 |
4 | The following updates and changes apply to the `dsjob` command-line
5 | interface.
6 |
7 | [5.1.3](#513)
8 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.5.1.3.md)
9 |
10 | [5.1.2](#512)
11 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.5.1.2.md)
12 |
13 | [5.1.1](#511)
14 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.5.1.1.md)
15 |
16 | [5.1.0](#510)
17 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.5.1.0.md)
18 |
19 | [5.0.3](#503)
20 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.5.0.3.md)
21 |
22 | [5.0.2](#502)
23 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.5.0.2.md)
24 |
25 | [5.0.1](#501)
26 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.5.0.1.md)
27 |
28 | [5.0.0](#500)
29 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.5.0.0.md)
30 |
31 | [4.8.5](#485)
32 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.8.5.md)
33 |
34 | [4.8.4](#484)
35 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.8.4.md)
36 |
37 | [4.8.3](#483)
38 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.8.3.md)
39 |
40 | [4.8.2](#482)
41 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.8.2.md)
42 |
43 | [4.8.1](#481)
44 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.8.1.md)
45 |
46 | [4.8.0](#480)
47 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.8.0.md)
48 |
49 | [4.7.4](#474)
50 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.7.4.md)
51 |
52 | [4.7.3](#473)
53 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.7.3.md)
54 |
55 | [4.7.2](#472)
56 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.7.2.md)
57 |
58 | [4.7.1](#471)
59 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.7.1.md)
60 |
61 | [4.7.0](#470)
62 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.7.0.md)
63 |
64 | [4.6.6](#466)
65 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.6.6.md)
66 |
67 | [4.6.4](#464)
68 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.6.4.md)
69 |
70 | [4.6.2](#462)
71 | [Documentation](https://github.com/IBM/DataStage/tree/main/dsjob/dsjob.4.6.2.md)
72 |
73 | ## 5.1.3
74 |
75 | ### New commands
76 |
77 | The following commands are added:
78 |
79 | - `list-ds-runs` provides a report of DataStage job runs and pipeline runs.
80 | - `reset-pipeline-cache` allows to reset pipeline optimizer cache.
81 | - `describe-seq-file` describes the metadata information for a sequence file.
82 | - `view-data-file` allows to browse data from a sequential file or a CFF file.
83 |
84 | ### Command changes
85 |
86 | The following commands have changed:
87 |
88 | - `import` command now takes a new flag `ok-to-replace` for user to acknowledge that it is ok to replace asset that already exist.
89 | - `run`, `run-pipeline(optimizer)` commands changed to interpret `warn-limit` value of 0 as no limit.
90 | - `compile-pipeline` takes new flag `enable-cache` to enable caching on optimizer pipelines.
91 | - `waitforjob` command has been enhanced to support pipelines.
92 |
93 | ### Fixes
94 |
95 | - `run-pipeline` command has been reworked for a better performance for optimized runs.
96 |
97 | ## 5.1.2
98 |
99 | ### New commands
100 |
101 | The following commands are added:
102 |
103 | - `index-project` indexes all assets in a project to facilitate fast search.
104 | - `get-index-status` gets current status of indexed assets in a project.
105 | - `enable-project-indexing` enables a project for indexing.
106 | - `test-repository-connection` tests connection to the repository used by project indexing.
107 | - `copy-dataset` copies dataset into a new name.
108 | - `copy-fileset` copies fileset into a new name.
109 |
110 | ### Command changes
111 |
112 | The following commands have changed:
113 |
114 | - `list-jobs` adds new flag `--last-run` to list the last run of the job, leaves empty if a job is never run.
115 | - `lognewest` uses environment variable ENABLE_IIS_EXITCODE to print output accordingly.
116 | - `upload-data-asset` takes arguments `--mime` and `--tag` to specify asset type and tag the asset for searching.
117 | - `migrate` command adds new option `migrate-bash-param` to migrate bash params.
118 |
119 | ### Fixes
120 |
121 | - `upload-data-asset` and `download-data-asset` commands are fixed and will handle data assets as files.
122 | - `view-fileset` fixes issue with fetching an empty fileset and displays is corrected.
123 | - `git-status` command output change to address visibility in dark mode.
124 | - `update-paramset` and `update-paramset-valuset` now addresses empty strings without setting them as null.
125 | - `run-pipeline` is fixed to use correct suffix from project settings.
126 |
127 | ## 5.1.1
128 |
129 | ### New commands
130 |
131 | The following commands are added:
132 |
133 | - `copy-dataset` adds ability to copy dataset to a different location as a new dataset asset
134 | - `copy-fileset` adds ability to copy fileset to a different location as a new fileset asset
135 | - `delete-data-asset` deletes an existing data asset from a project
136 | - `download-data-asset` downloads the data asset to local file
137 | - `export-data-asset` exports the data asset to a zip file compatible with import-zip
138 | - `list-data-assets` lists all data assets in a project
139 | - `upload-data-asset` updates the data asset with local file content
140 | - `list-datasources` lists all data sources available in a project
141 |
142 | ### Command changes
143 |
144 | The following commands have changed:
145 |
146 | - `create-connection`, `update-connection` now allows `--parameterize` flag to let users to create or update connection port as a parameterized value.
147 | - `update-ds-setting` commands now support ability to set default suffixes through the flags `--datastage-job-suffix]`, `--pipeline-job-suffix` `--pipeline-optimized-suffix` to set suffixes for DataStage jobs, Pipelines and Optimized Runner jobs.
148 | - `update-ds-settings` adds new flag `--enable-remote-engine` to allow only remote engine option. The option is irreversible.
149 | - `migrate` adds new flag `--migrate_jdbc_impala` to migrate as impala connection.
150 | - `compile-pipeline` takes a new flag `--job-suffix` to create jobs with a specific suffix.
151 | - `rename-dataset` adds new flag `--deep` to rename dataset and underlying physical files.
152 | - `rename-fileset` adds new flag `--deep` to rename fileset and underlying physical files.
153 |
154 | ### Fixes
155 |
156 | - `jobrunclean` and `list-sctive-runs` commands use new job run api and address some performance improvements.
157 | - `run-pipeline` fixes issue with fetching logURL for each stage, this is caused due to framework changes.
158 |
159 | ## 5.1.0
160 |
161 | ### New commands
162 |
163 | The following commands are added:
164 |
165 | - `update-project` currently this command can be used to enable folder support on a project.
166 | - `get-git-commit` provides git commit status while commit is in progress.
167 | - `get-git-pull` provides git pull status while pull is in progress.
168 | - `get-wlmconfig` provides Workload Management configuration set on a px instance.
169 | - `update-wlparams` updates WLM params for a WLM configuration on a px instance.
170 | - `update-wlresources` updates WLM resources for a WLM configuration on a px instance.
171 | - `update-wlqueues` updates WLM queues for a WLM configuration on a px instance.
172 | - `list-track-project` provides list of all projects that are tracked for changes, this information is useful for git operations.
173 | - `update-build-stage` updates build stage with new definition.
174 | - `update-message-handler` updates message handler with new definitions.
175 |
176 | ### Command changes
177 |
178 | The following commands have changed:
179 |
180 | - `update-ds-setting`: New format are now accepted for timestamp as "%mm-%dd-%yyyy %hh:%nn:%ss", time as %hh:%nn:%ss" and data as %mm-%dd-%yyyy".
181 | - `warn-limit` option is added to control warn limits before job is aborted at project level.
182 | - `create-job` optionally takes `optimize` parameter that creates a DataStage sequence runner job. It now take `enable-inline` flag to run nested pipelines inline instead of as separate jobs.
183 | - `folders` option is added to the following commands to allow asset to be created in a folder
184 | - `create-flow`, `create-cff-schema`, `create-connection`, `create-build-stage` , `create-custom-stage`, `create-wrapped-stage`, `create-java-library`, `create-message-handler`, `create-paramset`, `create-pipeline-job`, `create-rule`, `create-subflow`, `create-tabledef`, `create-function-lib` and `create-job`.
185 | - `git-status` command changes to allow to display identical, changed and deleted objects using the flags `hide-identical`, `hide-modified` and `hide-deleted` respectively.
186 | - `migrate` now has two new flags `enable-autosave` to autosave migrated pipelines and `enable-jinja` to allow ninja templates in a bash script.
187 | - `run` commands now take list of paramfiles to allow to load job parameters from multiple files.
188 | - `compile-pipeline` and `run-pipeline` add new flag `enable-inline` to allow user to set it to false to allow nested pipelines run as independent sequence runner jobs.
189 |
190 | ### Fixes
191 |
192 | - `list-jobruns`, `list-job-status`, `list-active-runs` and `jobrunclean` fixed to address issues switching to new Jobs API.
193 | - `describe-dataset` and `describe-fileset` now display same partition information when output is set to json format.
194 | - `jobinfo` commands output changed to retrofit with changes from job api. Job runs now missing some of CAMS metadata.
195 |
196 | ## 5.0.3
197 |
198 | ### New commands
199 |
200 | The following commands are added:
201 |
202 | - `generate-project-report` Generates project level report on Flows and Pipelines and their components.
203 | - `update-flow` Update the flow with a new json definition.
204 | - `compile-pipeline` Compile a pipeline into executable script to run it as a DataStage job.
205 |
206 | ### Command changes
207 |
208 | The following commands have changed:
209 |
210 | - `run-pipeline`: Update to run pipeline using new option `optimize` and `skip-compile` to execute pipeline as a DataStage sequence runner.
211 | - `create-job` optionally takes `optimize` parameter that creates a DataStage sequence runner job.
212 | - `list-folders`: now has an option `output json` to output json results.
213 |
214 | ### Fixes
215 |
216 | - `get-paramset` , `delete-paramset` and `delete-paramset-valueset` returns proper exit code when an invalid name is specified.
217 | - `run-pipeline` run status message outputs actual job runtime by default unless environment variable `DSJOB_SHOW_RUNTIMES` set to 1, when the CLI execution time is shown.
218 |
219 | ## 5.0.2
220 |
221 | ### New commands
222 |
223 | The following commands are added:
224 |
225 | - `list-scheduled-jobs` List all scheduled jobs.
226 |
227 | ### Command changes
228 |
229 | The following commands have changed:
230 |
231 | `list-dependencies`:
232 | `list-usage`:
233 |
234 | - enhanced to support pipeline/orchestration flow dependencies
235 |
236 | `view-dataset`:
237 | `view-fileset`:
238 |
239 | - added ability to generate a `orchadmins dump` like output to a file using a record delimiter.
240 |
241 | `list-jobruns`:
242 |
243 | - if `--run-name` is given, it filters all jobs matching the run-name
244 |
245 | `get-job run`:
246 |
247 | - added ability to fetch last run of the job with specific run-name
248 |
249 | `list-jobs`
250 |
251 | - now works at cluster level, listing all jobs in projects in the cluster
252 |
253 | `create-message-handler`:
254 |
255 | - enhances to support a json or text file with list of rules that can be used to create the message handler.
256 |
257 | `migrate`:
258 | `import-zip`:
259 |
260 | - added new option `--run-job-by-name` to migrate the job using name instead of id.
261 |
262 | `validate-job`:
263 |
264 | - enhanced to print flow runtime parameters and job parameters if they differ
265 | - allows to update the job parameters to match flow runtime parameters using the option `--remove-param-overrides`
266 |
267 | `send-mail`:
268 |
269 | - allows upto three attachments to send in each email.
270 |
271 | ### Fixes
272 |
273 | `list-jobruns`:
274 |
275 | - when a project name is specified the output would be compatible to prior version
276 | - if the command is run on all projects in the cluster then the output column will differ from prior version
277 |
278 | `get-jobrun-report`:
279 |
280 | - now produces the correct start time for stage and link metrics.
281 |
282 | `run-pipeline`:
283 | `get-pipeline-log`
284 |
285 | - log fetching uses new api to reduce data exchange and optimize the run
286 |
287 | ## 5.0.1
288 |
289 | ### New commands
290 |
291 | The following commands are added:
292 |
293 | - `reset-pipeline-cache` Reset a pipeline's cache.
294 | - `clear-vault-cache` Clear a vault cache.
295 |
296 | ### Command changes
297 |
298 | The following commands have changed:
299 |
300 | `schedule-job`:
301 |
302 | - you can now remove schedule information from the job.
303 |
304 | `migrate`
305 |
306 | - added two flags to allow annotation styling and math operation migrations.
307 |
308 | `update-metrics`
309 |
310 | - allows additional parameters to use certificates for authentication.
311 |
312 | ### Fixes
313 |
314 | `view-dataset`:
315 | `view-fileset`:
316 |
317 | - fixed issue with truncating the first two rows.
318 |
319 | ## 5.0.0
320 |
321 | ### New commands
322 |
323 | The following commands are added:
324 |
325 | - `list-params` List flow/pipeline parameters.
326 | - `export-folder` Export a folder.
327 |
328 | ### Command changes
329 |
330 | The following commands have changed:
331 |
332 | `jobrunstat`:
333 |
334 | - now reports start and end timestamps for the job runs.
335 |
336 | `run` and `pipeline-run`:
337 |
338 | - are enhanced to produce IIS exit codes for run status using environment variable `ENABLE_IIS_EXITCODE=1`.
339 |
340 | `list-dependencies` and `list-usage`:
341 |
342 | - are enhanced to produce json output using `--output json` option.
343 |
344 | `git-pull`:
345 |
346 | - supports `-skip-on-replace` and `--hard-replace` to selectively skip on certain assets such as connections, parameter sets.
347 |
348 | `list-connections`:
349 |
350 | - updated to reflect changes in the backend API to list all connections properly.
351 |
352 | `list-jobruns`:
353 |
354 | - now takes `--start` and `--end` times and `--state` to filter job runs.
355 |
356 | `schedule-job`:
357 |
358 | - takes a new option `--remove-schedule` to turn of scheduling on a job.
359 |
360 | `import-zip`,
361 | `export-zip`,
362 | `export-project`,
363 | `export-datastage-assets`:
364 |
365 | - are modified to add new option `--import-binaries` to include compiled binaries of assets such as build stages.
366 |
367 | ### Fixes
368 |
369 | `list-usage`:
370 |
371 | - fixed to show all dependent assets correctly. Before this fix list-assets would not identify all dependent assets and prints a partial list.
372 |
373 | `compile`:
374 |
375 | - properly handles gateway timeouts and 500 errors without crashing.
376 |
377 | ## 4.8.5
378 |
379 | ### New commands
380 |
381 | The following commands are added:
382 |
383 | - `list-params` List flow/pipeline parameters.
384 | - `export-folder` Export a folder.
385 |
386 | ### Command changes
387 |
388 | The following commands have changed:
389 |
390 | `git-commit`,
391 | `git-pull`,
392 | `git-status`
393 |
394 | - these commands now support GitLab
395 |
396 | ## 4.8.4
397 |
398 | ### Command changes
399 |
400 | The following commands have changed:
401 |
402 | `update-ds-settings`:
403 |
404 | - added validation check to default message handler.
405 | - added `--env` and `--env-id` flags to set the default environment.
406 | - command now only allows you to set a valid default environment.
407 | - list of environments can no longer be changed by the user directly. Instead, list is displayed from available environments in the project.
408 |
409 | `delete-build-stage`,
410 | `delete-connection`,
411 | `delete-custom-stage`,
412 | `delete-dataset`,
413 | `delete-fileset`,
414 | `delete-flow`,
415 | `delete-function-lib`
416 | `delete-java-library`,
417 | `delete-job`,
418 | `delete-match-spec`,
419 | `delete-paramset`,
420 | `delete-pipeline`,
421 | `delete-subflow`,
422 | `delete-tabledef`,
423 | `delete-wrapped-stage`,
424 | `export-datastage-assets`,
425 | `export-project`,
426 | `export-zip`,
427 | `export`,
428 | `git-commit`,
429 | `git-pull`,
430 | `import-zip`,
431 | `import`,
432 | `logdetail`,
433 | `run`,
434 | `waitforjob`
435 |
436 | - these commands now return a proper error code to the shell. Previously, all failures would set shell exit code to 1, now the values are set to the status code. Please refer to documentation for all return code types.
437 |
438 | - `jobrunclean` added retry logic after a resource is deleted. Checks to see if the resource exists and perform delete as a workaround when framework fails to delete the resource and no error message is returned.
439 |
440 | ### Fixes
441 |
442 | `list-job-status`:
443 |
444 | - fixed issue that forces user to enter job name or job id, which is optional.
445 | - fixed generating empty report when there are no job runs to display.
446 |
447 | `create-message-handler`:
448 |
449 | - fixed issue with `--default` flag, it now correctly sets the message handler into project settings as a default handler.
450 | - fixed issue with runtime errors with message handlers created through dsjob due to missing type information.
451 |
452 | `export-datastage-assets`:
453 |
454 | - exporting an empty project will now display an error properly.
455 |
456 | `run-pipeline`
457 |
458 | - fixed to properly process `--wait -1` and wait indefinitely for job run completion.
459 |
460 | ## 4.8.3
461 |
462 | ### New commands
463 |
464 | The following commands are added to enhance job run analysis.
465 |
466 | - `rename-dataset` Rename a Data Set.
467 | - `rename-fileset` Rename a File Set.
468 |
469 | ### Command changes
470 |
471 | The following commands have changed:
472 |
473 | `compile`:
474 |
475 | - added `--skip` option to skip compilation of flows that are not changed and do not need to be compiled again.
476 |
477 | `list-flows`:
478 |
479 | - added new option `--with-compiled` to display if the flow is compiled or not, last compile time for the compiled flow, and if the flow needs compilation.
480 | - added new option `--sort-by-compiled` to sort the flows by last compile time.
481 |
482 | ## Fixes
483 |
484 | `logsum`:
485 |
486 | - line with multiline strings were previously truncated, with the fix the entire string is displayed.
487 |
488 | `list-jobruns`:
489 |
490 | - fixed command `list-jobruns` to properly handle incorrect job names or ids and incorrect job run names or ids.
491 |
492 | ## 4.8.2
493 |
494 | ### Command changes
495 |
496 | The following commands have changed:
497 |
498 | `run`:
499 |
500 | - added option `no-logs` to not print logs when waiting to finish the run.
501 | - run command now prints the time taken to complete job run from the time job is successfully submitted. This excludes all the time take to prepare the job run.
502 |
503 | Note: When using command line to specify a parameter that start with a `$` user should be aware that shell will try to evaluate it as a shell variable. In order to avoid this parameters starting with $ should be escaped using single quotes or a backward slash. ex: --param '$param=value` or --param \\$param=value.
504 |
505 | `run-pipeline`:
506 |
507 | - added option `no-logs` to not print logs when waiting to finish the run.
508 | - run command now prints the time taken to complete job run from the time job is successfully submitted. This excludes all the time take to prepare the job run.
509 | - allows pipeline params of different type definitions, previously supported strings only.
510 |
511 | Note: When using command line to specify a parameter that start with a `$` user should be aware that shell will try to evaluate it as a shell variable. In order to avoid this parameters starting with $ should be escaped using single quotes or a backward slash. ex: --parma '$param=value` or --param \$param=value.
512 |
513 | `list-jobruns`:
514 |
515 | - added option `sort-by-duration` to sort job runs by duration.
516 | - added column to display job run duration.
517 |
518 | `create-dsparams`:
519 |
520 | - when a field in DSParams file is encrypted then it is migrated as encrypted type field into PROJDEF, previously it was migrated as string field into PROJDEF.
521 |
522 | `git-pull`:
523 |
524 | - supports `branch` option to pull code from a branch.
525 | - added `name` to specify list of assets to pull partial code from git repo.
526 |
527 | `migrate`:
528 |
529 | - added support for `output json` to output the response from migrate status.
530 |
531 | `list-projects`:
532 |
533 | - added support for `output json` to output the response in json of project definition.
534 |
535 | `list-spaces`:
536 |
537 | - added support for `output json` to output the response in json of space definition.
538 |
539 | `create-paramset` and `update-paramset`:
540 |
541 | - support type multilinestring for a parameter set field type.
542 |
543 | `export-paramset`:
544 |
545 | - added support to export multiple parameter sets into a zip file.
546 |
547 | ## Fixes
548 |
549 | `run-pipeline`:
550 |
551 | - fixed the parameter parsing logic to allow pipeline parameters of types other than string.
552 |
553 | ## 4.8.1
554 |
555 | ### New commands
556 |
557 | The following commands are added to enable job run analysis.
558 |
559 | - `list-links` List all links to or from a stage in a DataStage job.
560 | - `list-stages` List all stages in a DataStage job.
561 | - `get-stage-link` Display information for a link to or from a stage in a datastage job.
562 | - `get-jobrun-report` Display job run report for a datastage job.
563 |
564 | The following commands are added to allow fileset cleanup.
565 |
566 | - `truncate-fileset` Truncate File Set data from a given project by name.
567 |
568 | ### Command changes
569 |
570 | The following commands have changed:
571 |
572 | `run-pipeline`:
573 |
574 | - enhanced to take `run-name`.
575 | - added ability to process PROJDEF parameter references.
576 | - flag short-name for `reset-cache` changed from `-R` to `-C`
577 |
578 | `list-jobruns`: fixed incorrect flag name from `sort-by-runid` to `sort-by-runname`.
579 |
580 | `list-paramsets` supports json formatted output.
581 |
582 | ## 4.8.0
583 |
584 | ### New commands
585 |
586 | The following commands are added to manage assets in folders.
587 |
588 | - `list-folders` List all folders.
589 | - `list-folder` List folder contents.
590 | - `create-folder` Create a folder.
591 | - `delete-folder` Delete a folder.
592 | - `update-folder` Update folder name.
593 | - `move-folder` Move a folder.
594 | - `move-asset` Move asset to a folder.
595 |
596 | The following commands are added to work with Cobol File Formats.
597 |
598 | - `list-cff-schemas` List CFF Schemas.
599 | - `get-cff-schema` Get CFF Schema.
600 | - `create-cff-schema` Create CFF Schema.
601 | - `delete-cff-schema` Delete CFF Schema.
602 | - `export-cff-schema` Export CFF Schema(s).
603 |
604 | The following commands are added to synchronize projects artifacts with git.
605 |
606 | - `git-commit` Git commit a Project.
607 | - `git-pull` Git pull a Project.
608 | - `git-status` Get git status of a Project.
609 |
610 | Other:
611 |
612 | - `dataset-truncate` allows truncation of data in a DataSet.
613 | - `encrypt` allows cluster-specific encryption of text.
614 |
615 | ### Command changes
616 |
617 | The following commands have changed:
618 |
619 | `list-active-runs`: enhanced to display run-name and duration. Users can sort on these fields.
620 |
621 | `list-usage`:
622 |
623 | - asset name can be prefixed with type to avoid conflicts.
624 | - supports data rules and data definitions.
625 |
626 | `list-dependencies` supports data rules and data definitions.
627 |
628 | `migrate` command added new options:
629 |
630 | - `enable-notifications` Enable Notifications for Migration.
631 | - `storage-path` Folder path of the storage volume for routine scripts and other data assets.
632 | - `migrate-to-send-email` Will migrate all notification activity stages in sequence job to send email task nodes.
633 | - `migrate_hive_impala` Enable hive impala for migration.
634 |
635 | `import-zip`:
636 |
637 | - now import flows with or without compilations.
638 | - import only listed DataStage components.
639 | - takes a key to decrypt sensitive data, this must match the key used during export.
640 |
641 | `export-datastage-assets`: now can exclude data files from datasets and filesets to be part of export.
642 |
643 | `export-zip`, `export-project` will allow 'testcase' DataStage components to be specified to export.
644 |
645 | `export-zip`, `export-project` and `export-datastage-assets` commands take an encryption key to encrypt any sensitive data before writing to the zip output file.
646 |
647 | `list-job-status`:
648 |
649 | - enhanced to show all job runs and in a specified time window.
650 | - migrate command enhanced to enable notifications,
651 | - migrate databases connections using dsn type,
652 | - enables hive impala migration and enables migration of notification activities as send email nodes.
653 | - sort by duration is added.
654 |
655 | `run-pipeline` now allows reset-cache options to clear cache before the run.
656 |
657 | `compile` enhanced to support pushdown materialization policy.
658 |
659 | `run` can now use runtime environment and runtime language settings for each job run.
660 |
661 | `update-ds-settings` is enhanced to set collate option on the project.
662 |
663 | `describe-dataset` enhanced to write to a file with additional infomation on dataset size, location and last access time.
664 | `describe-fileset` enhanced to write to a file with additional infomation on fileset size, location and last access time.
665 |
666 | All export- commands now use unix globbing pattern with wild cards instead of regular expressions.
667 |
668 | `create-dsparams` changed to export user environment definitions from DSParams file from legacy into PROJDEF parameter file. It will no longer use runtime environment to export and store DSParams.
669 |
670 | `jobrunclean` enhanced to accept timestamp to clean up jobs that started before a specific time.
671 |
672 | `create-paramset`, `update-paramset` now validates references to PROFDEF in parameter definitions.
673 |
674 | `create-paramset`, `update-paramset` now support `encrypted` data type.
675 |
676 | `upload-volume-files` enhanced to accept destination file different from the source file.
677 |
678 | ### Fixes
679 |
680 | `export-project`, `export-datastage-assets` commands adjusted internally to accommodate for rate-limiting issues for exporting large projects on ibm-cloud.
681 |
682 | `run` command now implements multiple retry loops to accommodate any intermittent errors and ensure completion of job runs in a high number of conncurrent runs.
683 |
684 | ## 4.7.4
685 |
686 | ### Command changes
687 |
688 | All `export-` commands will now use a globbing pattern to export multiple files. This includes: `export-quality-definition`,`export-quality-rule`,`export-asset`,`export-build-stage`,`export-connection`,`export-cff-schema`,`export-custom-stage`,`export-dataset`,`export-fileset`,`export-function-lib`,`export-java-library`,`export-library`,`export-match-spec`,`export-paramset`,`export-rule`,`export-subflow`,`export-tabledef`,`export-wrapped-stage`.
689 |
690 | ex: `cpdclt dsjob export-subflow --project >PROJECTNAME> --name ab*` will export all sub flows that start with name `ab`.
691 |
692 | `upload-volume-files`: Enhanced upload-volume-files to allow user to specify a destination file name.
693 |
694 | `create-paramset` and `update-paramset`: added logic to verify Parameter Set fields that reference PROJDEF are valid such that PROJDEF exists and the reference exists, if not a warning message is displayed.
695 |
696 | ### Fixes
697 |
698 | `update-env-vars`: fixed issue with update-env-vars to avoid overwriting the existing environment variables
699 |
700 | `download-volume-files`: fixed issue to report proper error when it fails to write the downloaded file to local disk.
701 |
702 | ## 4.7.3
703 |
704 | ### Command changes
705 |
706 | `migrate` now takes a new flag to migrate optimized connectors: `use-dsn-name`.
707 |
708 | `compile` now takes a new flag `materialization-policy` when ELT compile is enabled with the flag `--enable-elt-mode`. This flag determines the generated output and takes the following values: OUTPUT_ONLY, TEMP_TABLES, TEMP_VIEWS, CARDINARLITY_CHANGER_TABLES. The output of the command now displays total time in the summary line.
709 |
710 | `delete-dataset` and `delete-fileset` now have an option to delete multiple objects. A `--dry-run` option is now available to show the details of the objects that would be deleted.
711 |
712 | ### Fixes
713 |
714 | `list-jobruns` exits gracefully when the incorrect job run id is specified.
715 |
716 | `validate-flow` no longer crashes when a single flow name needs validating due to incorrect initialization of cache entries.
717 |
718 | ## 4.7.2
719 |
720 | ### Command changes
721 |
722 | `delete-dataset` and `delete-fileset` can now take unix-like globbing pattern to delete multiple datasets.
723 | `delete-dataset` and `delete-fileset` can now take the `--dry-run` option to run the command without deletions.
724 |
725 | ## 4.7.1
726 |
727 | ### New commands
728 |
729 | The following export commands are added to allow export of individual assets into a .zip file. The output .zip file is compatible with migration using the `import-zip` command.
730 |
731 | - `export-build-stage` Export Build Stage(s).
732 | - `export-connection` Export Connection(s).
733 | - `export-custom-stage` Export Custom Stage(s).
734 | - `export-dataset` Export Data Sets(s).
735 | - `export-fileset` Export File Sets(s).
736 | - `export-java-library` Export Java Library(s).
737 | - `export-library` Export XML Library(s).
738 | - `export-message-handler` Export Message Handler(s).
739 | - `export-operational-dm` Export Operational Decision Manager(s).
740 | - `export-subflow` Export Subflow(s).
741 | - `export-tabledef` Export Table Definition(s).
742 | - `export-wrapped-stage` Export Wrapped Stage(s).
743 | - `export-quality-definition` Export Data Quality Definition.
744 | - `export-quality-rule` Export Data Quality Rule.
745 | - `download-dataset` Download a DataSet asset with dataset backend files.
746 | - `download-fileset` Download a FileSet asset with dataset backend files.
747 | - `upload-dataset` Upload a DataSet asset with dataset backend files.
748 | - `upload-fileset` Upload a FileSet asset with dataset backend files.
749 |
750 | ### Command changes
751 |
752 | The following commands have had semantic changes to make them compatible with other export commands. They now produce a .zip file that is compatible with the `import-zip` command.
753 |
754 | `export-match-spec`: Export Match Specification(s).
755 |
756 | `export-rule`: Export Standardization Rule by name.
757 |
758 | `update-job` now takes `--paramset` to update parameter set definitions for the job.
759 |
760 | `list-env-vars` takes `--sort` to sort the list alphabetically.
761 |
762 | `migrate` takes additional flag `--enable-platform-connection` to migrate optimized connectors.
763 |
764 | `run` has new default value for `--run-name`: "Job Run" instead of "job_run". Changed for UI compatibility.
765 |
766 | ### Fixes
767 |
768 | DSJob plugin commands are now organized alphabetically for easier browsing.
769 |
770 | `list-dependencies` and `list-usage` are enhanced to show relationships to DataStage components such as Data sets, File sets, Operational Decision Managers, Schema libraries, Standardization rules and Match specifications.
771 |
772 | `run` and `run-pipeline` are enhanced to handle parameter sets natively in the job definitions. Also, these commands now have retry logic to wait and run jobs again if any temporary issues occur with job service. The retry waits up to 10 minutes if a job run fails and attempts to run the job periodically until it succeeds.
773 |
774 | `migrate` command ignores `--create-connection-paramsets` when `--enable-local-connection` is set.
775 |
776 | `get-paramset` prints out a detailed parameter set definition by default using table format.
777 |
778 | ## 4.7.0
779 |
780 | ### New commands
781 |
782 | - `list-message-handlers` List all Message Handlers.
783 | - `get-message-handler` Get a Message Handler.
784 | - `create-message-handler` Create a Message Handler.
785 | - `delete-message-handler` Delete a Message Handler.
786 | - `list-job-status` List Jobs with their run status.
787 | - `export-paramset` Export Parameter Set(s).
788 | - `list-usage` List dependencies between DataStage components.
789 | - `update-function-lib` Update User Defined Function.
790 | - `export-function-lib` Export User Defined Function(s).
791 |
792 | ### Command changes
793 |
794 | The following function library commands are renamed, adding `-lib`.
795 |
796 | `list-function-libs` List function libraries.
797 |
798 | `get-function-lib` Get function libraries.
799 |
800 | `create-function-lib` Create function libraries.
801 |
802 | `delete-function-lib` Delete function libraries.
803 |
804 | `migrate` and `import-zip` commands now take a new parameter `hard-replace`. This allows for reconciliation when importing parameter sets that conflict with existing parameter sets.
805 |
806 | `list-job-runs` now can sort using `sort-by-runid` to sort job runs using their `run-name` or `invocation-id`.
807 |
808 | `list-job`s modified to allow users to `sort-by-time` to list jobs chronologically based on job update timestamp. Also `list jobs` takes a new flag `sched-info` to print schedule information if the job has scheduled runs.
809 |
810 | ### Fixes
811 |
812 | All the list commands that have `sort-by-time` option will now use `updatedAt` timestamp of the object to sort the list.
813 |
814 | Following commands will now produce a nonzero exit command upon failure, that can be checked on parent shell using $?
815 | `compile`, `run`, `log detail`, `import`, `export`, `import-zip`, `export-project` and all delete commands.
816 |
817 | Fixed `create-function-lib` command to take additional arguments to configure return types and aliases.
818 |
819 | Changed output format to table format for `jobrunstat` and `list-jobruns`.
820 |
821 | ## 4.6.6
822 |
823 | ### New commands
824 |
825 | - `validate-connection` Validate connections.
826 | - `validate-flow` Validate flow references.
827 | - `validate-subflow` Validate subflow references.
828 | - `validate-job` Validate job references.
829 | - `validate-pipeline` Validate pipelines.
830 | - `waitforjob` Wait for Job.
831 | - `list-dependencies` List dependencies between DataStage components.
832 | - `create-dsparams` Create DSParams as environment variables.
833 |
834 | ### Commmand changes
835 |
836 | `update-connection` allows user to rename a connection using the new flag `to-name`.
837 |
838 | ### Fixes
839 |
840 | Fixed `logdetail`, `logsum` and `lognewest` to use raw logs to parse logs to generate output.
841 |
842 | Allow `export` command to export assets specified, fix validates export types specified at command line correctly to the name of the asset.
843 |
844 | ## 4.6.4
845 |
846 | ### New commands
847 |
848 | - `create-pipeline-job` with `schedule` options is added to help scheduled runs for pipelines
849 | - `validate-pipeline` is added to validate flows or pipelines referenced in a pipeline
850 |
851 | ### Command changes
852 |
853 | `migrate` adds `--create-connection-paramsets` option to create parameter sets for missing properties in connections
854 |
855 | `migrate` when used with `--wait` option prints consolidated Errors, Warnings and Unsupported sections that are alphabetically sorted.
856 |
857 | `jobrunclean` command now cleans up job runs from a specific job or across projects or spaces. It also takes `--before TIMESTAMP` to cleanup based on timestamp.
858 |
859 | DataSet operations now take advantage of new asset api. Currently following operations are supported and their command options are changed to make use of new api.
860 |
861 | ```
862 | list-datasets List DataSets.
863 | get-dataset Get DataSet data.
864 | delete-dataset Delete a Dataset.
865 | describe-dataset Describe the DataSet.
866 | view-dataset View DataSet.
867 | list-filesets List FileSets.
868 | get-fileset Get FileSet data.
869 | delete-fileset Delete a FileSet.
870 | describe-fileset Describe the FileSet
871 | view-fileset View FileSet.
872 | ```
873 |
874 | `dsjob version`
875 | This command now prints component versions in alphabetic order. Added canvas and pipeline to the list.
876 |
877 | `compile` command now allows user to specify regular expression to compile multiple flows.
878 | `compile` flag `--enable-elt-mode` is used to compile dbt model to perform ELT operations.
879 |
880 | Retry logic is tuned to handle http errors and properly exit if backend cannot progress. Commands that are affected are `migrate`, `export-project`, `import-zip` with wait options and also `export-datastage-assets`.
881 |
882 | ### Fixes
883 |
884 | Fixed `update-paramset` to keep the prompt field unaffected by the update operation.
885 |
886 | Fixed issue with `import-quality-rule` to take the definitions id and optionally name to appropriately allow rule to be added to the target project.
887 |
888 | Fixed `run-pipeline` to accept parameter sets, previously `--paramset` are ignored when there are no job parameters specified using `--param`.
889 |
890 | ## 4.6.2
891 |
892 | ### New job commands
893 |
894 | - `cleanup-jobs`
895 |
896 | ### New Java Library commands
897 |
898 | - `list-java-libraries`
899 | - `get-java-library`
900 | - `create-java-library`
901 | - `delete-java-library`
902 |
903 | ### New User-defined function commands
904 |
905 | - `list-functions`
906 | - `get-function`
907 | - `create-function`
908 | - `delete-function`
909 |
910 | ### New Data Quality Rule and Data Quality Definition commands
911 |
912 | - `list-quality-rules`
913 | - `get-quality-rule`
914 | - `import-quality-rule`
915 | - `export-quality-rule`
916 | - `delete-quality-rule`
917 | - `list-quality-definitions`
918 | - `get-quality-definition`
919 | - `import-quality-definition`
920 | - `export-quality-definition`
921 | - `delete-quality-definition`
922 |
923 | ### Fixed issues
924 |
925 | Fixed issue with pagination while fetching job runs. The following commands are affected.
926 |
927 | - `list-jobruns`
928 | - `jobrunstat`
929 | - `jobrunclean`
930 | - `prune`
931 |
932 | ### Command changes
933 |
934 | - `run` command takes new flag `--warn-limit` to specify number of warning allowed in a run.
935 | - `import-zip` now prints sorted list of objects that failed to import, also added errors sections to print errors for each failed object import.
936 | - `export-zip` takes additional flag `--include-data-assets` to export data assets into the export.
937 | - `export-project` takes additional flag `--include-data-assets` to export data assets into the export.
938 | - Added an `export-datastage-assets` command that uses `export-project` and then adds missing DataStage components into the export. This
939 | command also supports flag `--include-data-assets` to export data assets.
940 |
941 | ### Shorthand flag changes
942 |
943 | The shorthand flags for string `name` and `ID` of a parameter set
944 | have changed from `-s` to `-t` for `name`and
945 | `-S` to `-T` for `ID`. This affects the following commands:
946 | `list-paramset-valuesets,get-paramset-valueset,create-paramset-valueset,delete-paramset-valueset,update-paramset-valueset`.
947 |
948 | The shorthand flags for stringArray `name` and `ID` of a pipeline
949 | have changed from `-s` to `-l` for `name`and
950 | `-S` to `-L` for `ID`. This affects the command
951 | `export-zip`.
952 |
953 | The `migrate` command no longer supports the short flag `-s` as a
954 | replacement for `--stop`.
955 |
956 | The `compile` command now takes multiple flows/flow-ids to compile, listed as
957 | `--name Flow1 --name Flow2...`.
958 |
--------------------------------------------------------------------------------
/dsjob/export-import.md:
--------------------------------------------------------------------------------
1 | ## Project Level Export
2 |
3 | #### Export Project with all assets including DataStage
4 | ```
5 | cpdctl dsjob export --project dsjob --name test-export
6 | --export-file demo-project-export.zip --wait 200
7 | ```
8 |
9 | #### Find if export has completed
10 | ```
11 | $ cpdctl dsjob list-exports --project dsjob
12 | ...
13 | Name |Export ID |State |Created At |Updated At
14 | ---------- |---------- |-------- |----------- |-----------
15 | test-export|3cce517b-8073-437f-bef3-095c39cf3b80|completed|2023-04-18T05:57:21.752Z|2023-04-18T05:57:31.528Z
16 | test-export|2889506e-1c6f-4b76-9f5e-cd51fed51252|completed|2023-04-19T04:49:42.458Z|2023-04-19T04:49:55.568Z
17 | test-export|e6b386f7-0920-44f2-9288-7bececd61954|completed|2023-04-26T00:11:09.503Z|2023-04-26T00:11:25.154Z
18 | ```
19 |
20 | #### Save the export to a zip file
21 | ```
22 | $ cpdctl dsjob save-export --project dsjob --name test-export
23 | --export-file abc.zip
24 | ```
25 |
26 | #### Cleanup the export
27 | ```
28 | cpdctl dsjob delete-export --name test-export
29 | ```
30 |
31 | #### Import the project into a New Project
32 | ```
33 | cpdctl dsjob import --project DSJOB-PROJECT-EXPORT
34 | --import-file demo-project-export.zip --wait 200
35 | ```
36 |
37 |
38 | ---
39 | ---
40 | ## Export DataStage Assets
41 | Three ways we can import DataStage assets
42 | export-zip
43 | export-project
44 | export-datastage-assets
45 |
46 | #### export-zip : Export individual assets
47 | export-zip can be used to export individual flows or pipelines and also their dependencies by default.
48 | ```
49 | cpdctl dsjob export-zip --project dsjob --name Test-DataStage-Flow --file-name test-export-project.zip
50 | or
51 | cpdctl dsjob export-zip --project dsjob --pipeline=testloop2 --file-name test-export-project.zip
52 | ```
53 |
54 | You can export a flow or pipeline without dependencies if you chose to now not to export connection or parameter sets that the flow or pipeline depends on using a `--no-dep` option
55 | Also important to note that `--no-secrets` lets you skip exporting secrets such as passwords.
56 | ```
57 | cpdctl dsjob export-zip --project dsjob --name Test-DataStage-Flow --file-name test-export-project.zip --no-deps --no-secrets
58 | ```
59 |
60 | If you have developed multiple flows and pipelines and want to export them all into a zip file, please use the following options to do so and export the flows and pipelines with their depedencies
61 |
62 | ```
63 | cpdctl dsjob export-zip --project dsjob --name={fsTarget,dsTarget} --pipeline={testloop2,testPipe} --file-name test-export-project.zip
64 | or
65 | cpdctl dsjob export-zip --project dsjob --name fsTarget --name dsTarget --pipeline testloop2 --pipeline testPipe --file-name test-export-project.zip
66 | ```
67 |
68 | #### export-zip : Export individual assets include pipelines and flow.
69 |
70 |
71 | #### export-project : Export the DataStage flows and pipelines in a project with dependencies
72 | ```
73 | cpdctl dsjob export-project --project DSJob_Test
74 | --file-name DSJob_Test-project.zip --wait 200
75 | ```
76 | If wait not used...
77 | get-export-project : Check Status if the export
78 | ```
79 | $ cpdctl dsjob get-export-project --project dsjob
80 | ```
81 | Once export is completed...
82 | save-export-project: Save the exported project to local disk
83 | ```
84 | cpdctl dsjob save-export-project --project dsjob --file-name
85 | test-export-project.zip
86 | ```
87 | Stop the export if something is not right...
88 | ```
89 | cpdctl dsjob stop-export-project --project dsjob
90 | ```
91 |
92 | #### export-datastage-assets : Export all DataStage Assets
93 | Export every DataStage asset in the project.
94 | ```
95 | cpdctl dsjob export-datastage-assets --project DSJob_Test
96 | --file-name DSJob_Test.zip
97 | ```
98 |
99 | #### import-zip : Import a DataStage artifact file
100 | Control how you import
101 | ```
102 | cpdctl dsjob import-zip --project DSJob_Test
103 | --file-name test-dependencies2.zip
104 | --conflict-resolution replace
105 | --skip-on-replace connection --wait 200
106 | ```
107 | If wait not used...
108 | get-import-zip : Check Status if the import
109 | ```
110 | cpdctl dsjob get-import-zip --project DSJob_Test
111 | --import-id f95e4ba8-d64d-4c5c-aa14-b0a3671fccb9
112 | ```
113 |
114 |
--------------------------------------------------------------------------------
/dsjob/incoming-changes.md:
--------------------------------------------------------------------------------
1 | ### New commands
2 | `rename-dataset`
3 | `rename-fileset`
4 |
5 |
6 | ### Command changes
7 | `compile` will allow to compile flows that a stale using `skip` option
8 | `list-compile` shows flow compilation information
9 |
10 | ### Fixed
11 |
12 |
--------------------------------------------------------------------------------
/utils/getstacks/README.md:
--------------------------------------------------------------------------------
1 | # DataStage Gather Stacks utility
2 |
3 | This compressed file contains gdb and a set of scripts that can automatically gather stack traces from running DataStage jobs in CP4D or on DataStage Anywhere containers.
4 |
5 | Download and decompress the file to a local temporary directory on a machine that has access to px-runtime pods or containers of the px-runtime instance.
6 | Example:
7 | /tmp/test> tar xvfz getstacks.tar.gz
8 | gdb-8.2.1-x86_64.tar.gz
9 | getstacksre.sh
10 | getstackscpd.sh
11 |
12 | For CP4D run the getstackscpd.sh script. The script takes 1 optional argument which is the px-runtime instance name. If not given ds-px-default is used.
13 | Examples:
14 | ./getstackscpd.sh
15 | ./getstackscpd.sh inst1-small
16 |
17 | For DataStage Anywhere run the getstacksre.sh script. The script takes 1 optional argument which is the container command. If not given docker is used.
18 | Examples:
19 | ./getstacksre.sh
20 | ./getstacksre.sh podman
21 |
--------------------------------------------------------------------------------
/utils/getstacks/getstacks.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/DataStage/4cc73e840d0c9c6573efdfdb51d3d92fd470a28a/utils/getstacks/getstacks.tar.gz
--------------------------------------------------------------------------------