├── .gitignore
├── LICENSE
├── NOTES.md
├── README.md
└── bin
├── code-de-query-download.sh
├── code-de-transfer.sh
├── dataHubTransfer.sh
└── includes
├── error-handler.sh
├── log-handler.sh
└── singleton.sh
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 |
3 | code-de-tools
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/NOTES.md:
--------------------------------------------------------------------------------
1 | # Notes on CODE-DE search and download interfaces
2 |
3 | The following automation interfaces are described next:
4 | * Product searches
5 | * Download products
6 | * Download a whole directory
7 | * Incremental download
8 |
9 | ## Product searches
10 |
11 | The simplest way of finding data products in CODE-DE is using OpenSearch queries.
12 | The following example demonstrates an OpenSearch URL with a time specification of one day and an
13 | area of interest (AOI) over Germany, OpenSearch URL example:
14 |
15 | https://catalog.code-de.org/opensearch/request/?httpAccept=application/atom%2Bxml&parentIdentifier=EOP:CODE-DE:S2_MSI_L1C&startDate=2017-01-04T00:00:00.000Z&endDate=2017-01-04T23:59:59.999Z&bbox=5.9,47.2,15.2,55
16 |
17 | Paging can be achieved by adding the parameters ```&startPage=1``` or ```&startRecord=1``` to the URL
18 | and you can specify the page size with ```&maximumRecords=100``` :warning: the default is 50 and the maximum is 500).
19 | The full OpenSearch description document with the search templates and parameters can be retrieved with
20 | the URL
21 |
22 | https://catalog.code-de.org/opensearch/description.xml?parentIdentifier=EOP:CODE-DE:S2_MSI_L1C]
23 |
24 | for example to locate the ```&cloudCover=[0,20]``` parameter.
25 |
26 | :warning: Note: the CODE-DE OpenSeach Service does not require authentication.
27 |
28 | :bulb: Note: you can extract an prepared OpenSearch query from the CODE-DE Catalog Client by setting-up
29 | the desired filter parameters and taking the executed query from the Browser debug window (visible after
30 | pressing F12 key and in the network tab right-click on the openasearch URL, then copy address)
31 |
32 |
33 | ## Download products
34 |
35 | The most effective way of downloading CODE-DE data products is using the HTTP Download Service.
36 | The download URLs can be extracted from the above OpenSearch query result. A utility script is included in this
37 | [package](https://github.com/dlr-eoc/code-de-tools/blob/master/bin/code-de-query-download.sh).
38 | The follwing bash script snippet demonstrates the process:
39 | ```
40 | #!/usr/bin/bash
41 | baseUrl=https://catalog.code-de.org/opensearch/request/?httpAccept=application/atom%2Bxml
42 | parentIdentifier=EOP:CODE-DE:S2_MSI_L1C
43 | startDate=2017-01-04T00:00:00.000Z
44 | endDate=2017-01-04T23:59:59.999
45 | AOI=5.9,47.2,15.2,55
46 | batchSize=100
47 | downloadParallel=4
48 |
49 | # execute query and extract the dwnload URL list
50 |
51 | urls=$(curl "${baseUrl}&parentIdentifier=${parentIdentifier}&startDate=${startDate}&endDate=${endDate}&bbox=${AOI}&maximumRecords=${batchSize}" | xmllint --xpath '//*[local-name()="link" and @title="Download"]/@href' - |sed -e 's/ *href="//g' | tr '"' '\n' )
52 |
53 | # download them all to the local directory
54 | echo $urls | xargs -n1 -P4 curl -O
55 | ```
56 |
57 | :bulb: the above command can be assembled to run as a bash one-liner.
58 |
59 | :warning: Note: the current CODE-DE Download Service does not use authentication. In the near future,
60 | the curl download will need the access account information passed with the ```-u :``` parameter.
61 |
62 |
63 | ## Download a whole directory
64 |
65 | Another example to download a whole directory from the download server:
66 |
67 | _Download directories_
68 | ```
69 | wget -O- -nv https://code-de.org/Sentinel2/2016/06/14 2> /dev/null | grep 'a href=".*.zip' | cut -d'"' -f2 ; done | head -10 | xargs -n1 -P10 -I{} wget http://code-de.org/download/{}
70 | ```
71 | This command can be enhanced to filter for specific Sentinel-2 tiles, based on the new compact file naming convention,
72 | inserting another ```| grep _T32UPU_``` filter (example tile over munich).
73 |
74 |
75 | ## Incremetal Downloads
76 |
77 | The script [dataHubTransfer.sh](https://github.com/dlr-eoc/code-de-tools/blob/master/bin/dataHubTransfer.sh)
78 | provides the means to incrementally download new products from a DHuS (ESA Data Hub Software). CODE-DE operates a DHuS
79 | mirror providing access to Sentinel prodcuts. The script is intended to be run in a cron job.
80 | Instructions are included in the README.md and in the script header itself.
81 |
82 | :warning: Note: To use this interface you need to separately sign-up for an account: https://code-de.org/dhus/#/self-registration
83 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # code-de-tools
2 |
3 | Copernicus Data-access and Expoitation platform for Germany (CODE-DE) - user tools
4 |
5 | ## Description
6 |
7 | This tools package publishes several scripts, examples and utilities to automate queries and data retrieval from the CODE-DE offerings.
8 |
9 | The CODE-DE Platform provides standardized interfaces for dataset discovery, Earth-Observation product filtered searches and download.
10 |
11 | ## Contents
12 |
13 | The scripts are located in the `bin/` subdirectory. The script header contains instructions on how to use. For convenience the usage help is listed below.
14 |
15 | ### code-de-query-download.sh
16 |
17 | Performs an OpenSearch query and downloads the found products.
18 | ```
19 | USAGE:
20 |
21 | ./code-de-query-download.sh -c|--condition=... [-b|--baseurl=https://catalog.code-de.org] [-o|--curlOpts=curl-options] [-l|--limit=50] [-p|--parallel=1] [-n|--noTransfer]
22 | --condition is the full OpenSearch query, for example:
23 | -c 'parentIdentifier=EOP:CODE-DE:S2_MSI_L1C&startDate=2018-06-04T00:00:00.000Z&endDate=2018-06-04T23:59:59.999&bbox=5.9,47.2,15.2,55'
24 | --user in the form username:password (alternatively use --curlOpts='--netrc-file...myNetRc...file'
25 | --baseurl of the CODE-DE services (default is https://catalog.code-de.org)
26 | --curlOpts allos specifying special curl options like -o='--progress-bar --netrc-file=...myNetRc...file'
27 | --limit the amount of products to be retrieved (default=50, max=500)
28 | --parallel count of retrievals, WARNING: do not overload your system and network (the server might limit you to 2 or 4 parallel downloads)
29 | --noTransfer to test the query
30 | ```
31 | Output products are placed in the current directory.
32 |
33 | #### Example
34 | ```
35 | code-de-query-download.sh -c 'parentIdentifier=EOP:CODE-DE:S2_MSI_L1C&startDate=2018-06-04T00:00:00.000Z&endDate=2018-06-04T23:59:59.999&bbox=5.9,47.2,15.2,55' -o='--insecure --netrc-file /home/user/.netrc_code-de --location --cookie-jar /tmp/t' -l=2
36 | ```
37 | Note: when using a __.netrc__ file with ```--curlOptions```, make sure you include ```--cookie-jar``` and ```--location``` options. The .netrc file must contain the single-sign-on server, with your login and password of your CODE-DE account:
38 | ```
39 | machine sso.eoc.dlr.de
40 | login xxxxxxx
41 | password yyyyyyy
42 | ```
43 |
44 | #### Change History
45 | 2018-06-15 Enhanced with options --user, --curlOptions and --noTransfer
46 |
47 | ### dataHubTransfer.sh
48 |
49 | This script will search and incrementally download new products from a DHuS.
50 |
51 | The script is intended to be run in a cron job, e.g.:
52 | ```
53 | 10 * * * * /path-to-cronjob/dataHubTransfer.sh /path/to/workdir &>> /path/to/workdir/log/dataHubTransfer_$(date +\%Y\%m\%d).log
54 | ```
55 |
56 | The path to a writable working directory must contain a file:
57 | ```
58 | dataHubTransfer.properties
59 | ```
60 |
61 | containing the properties:
62 | ```
63 | dhusUrl="https://code-de.org/dhus"
64 | WGETRC=/path/to/.wgetrc (file with user=xxx and password=yyy)
65 | basefilter="platformname:Sentinel-2 AND footprint:\"Intersects(POLYGON((5.9 47.2,15.2 47.2,15.2 55.1,5.9 55.1,5.9 47.2)))\""
66 | outputPath=/tmp
67 | lastIngestionDate=NOW-1DAY
68 | batchSize=100
69 | #MAXCACHESIZE=$((9 * 1000000)) ## in kbytes
70 | #transferAction=/path/to/some/command/to/run/after/file/transfer
71 | ```
72 | Note: properties above prefixed with `#` are optional.
73 |
74 |
75 | ### code-de-transfer.sh
76 | Similar to ```dataHubTransfer.sh```, but uses the CODE-DE native OpenSearch interface (better Performance and whole catalog of ONLINE and OFFLINE products). See script header for usage instructions.
77 |
78 |
79 | ## Installation
80 |
81 | Place the script package `bin/` contents somewhere on your PATH. The scripts require bash,
82 | wget, curl and a few common shell utilities.
83 |
84 |
85 | ## License
86 |
87 | See the LICENSE.txt file.
88 |
--------------------------------------------------------------------------------
/bin/code-de-query-download.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # File: code-de-query-download.sh
3 | #
4 | # Description:
5 | # Performs an OpenSearch query and downloads the found products
6 | #
7 | # Note:
8 | # The use of the CODE-DE tools, online serivces and data is subject to the CODE-DE Terms & Conditions
9 | # https://code-de.org/en/terms/CODE-DE_Terms.pdf
10 | # Currently CODE-DE does not use authentication/authorization, so a login is not requried.
11 | #
12 | function usage {
13 | echo "USAGE:"
14 | echo "$0 -c|--condition=... [-b|--baseurl=https://catalog.code-de.org] [-o|--curlOpts=curl-options] [-l|--limit=50] [-p|--parallel=1] [-n|--noTransfer]"
15 | echo " --condition is the full OpenSearch query, for example:"
16 | echo " -c 'parentIdentifier=EOP:CODE-DE:S2_MSI_L1C&startDate=2018-06-04T00:00:00.000Z&endDate=2018-06-04T23:59:59.999&bbox=5.9,47.2,15.2,55'"
17 | echo " --user in the form username:password (alternatively use --curlOpts='--netrc-file...myNetRc...file'"
18 | echo " --baseurl of the CODE-DE services (default is https://catalog.code-de.org)"
19 | echo " --curlOpts allows specifying special curl options like -o='--progress-bar --netrc-file=...myNetRc...file' --cookie-jar /tmp/t --location-trusted"
20 | echo " --limit the amount of products to be retrieved (default=50, max=500)"
21 | echo " --parallel count of retrievals, WARNING: do not overload your system and network (the server might limit you to 2 or 4 parallel downloads)"
22 | echo " --noTransfer to test the query"
23 | echo ""
24 | echo "A .netrc file must contain the single-sign-on machine sso.eoc.dlr.de and the login and password for your CODE-DE account."
25 | echo ""
26 | echo "Output products are placed in the current directory."
27 | echo ""
28 | exit 1;
29 | }
30 |
31 | #defaults
32 | user=''
33 | baseUrl=https://catalog.code-de.org
34 | curlOpts=''
35 | batchSize=50
36 | parallel=1
37 | noExec=''
38 |
39 | while [ "$#" -gt 0 ]; do
40 | case "$1" in
41 | -c|--condition) condition="$2"; shift 2;;
42 | -u|--user) user="--cookie-jar /tmp/$(basename $0)_$$ --location-trusted --user $2"; shift 2;;
43 | -b|--baseurl) baseUrl="$2"; shift 2;;
44 | -o|--curlOpts) curlOpts="$2"; shift 2;;
45 | -l|--limit) batchSize="$2"; shift 2;;
46 | -p|--parallel) parallel="$2"; shift 2;;
47 | -n|--noTransfer) noExec="echo"; shift 1;;
48 |
49 | -c=*|--condition=*) condition="${1#*=}"; shift 1;;
50 | -u=*|--user=*) user="--cookie-jar /tmp/$(basename $0)_$$ --location-trusted --user ${1#*=}"; shift 1;;
51 | -b=*|--baseurl=*) baseUrl="${1#*=}"; shift 1;;
52 | -o=*|--curlOpts=*) curlOpts="${1#*=}"; shift 1;;
53 | -l=*|--limit=*) batchSize="${1#*=}"; shift 1;;
54 | -p=*|--parallel=*) parallel="${1#*=}"; shift 1;;
55 |
56 | *) echo "ERROR: unknown option '$1'"; usage; exit;;
57 | esac
58 | done
59 |
60 | if [ "$condition" == "" ]; then
61 | echo "ERROR: no condition defined!"
62 | echo ""
63 | usage
64 | exit 1
65 | fi
66 | echo "Running query with $condition"
67 |
68 | # expand the base URL
69 | searchUrl=$baseUrl/opensearch/request/?httpAccept=application/atom%2Bxml
70 |
71 | # execute query and extract the dwnload URL list
72 | urls=$(curl -s $curlOpts "${searchUrl}&${condition}&maximumRecords=${batchSize}" | xmllint --xpath '//*[local-name()="link" and @title="Download"]/@href' - |sed -e 's/ *href="//g' | tr '"' '\n' )
73 |
74 | count=$(echo $urls | wc -w | tr -d ' ')
75 | if [ $count = 0 ]; then
76 | echo "No products found."
77 | exit
78 | else
79 | echo "Found $count products, downloading..."
80 | fi
81 |
82 | # download them all to the local directory
83 | echo $urls | xargs -n1 -P${parallel} $noExec curl $user $curlOpts -O
84 |
--------------------------------------------------------------------------------
/bin/code-de-transfer.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Filename: code-de-transfer.sh
3 | #
4 | # Description:
5 | # This script will search and incrementally download new products from CODE-DE.
6 | #
7 | # The script is intended to be run in a cron job, e.g.:
8 | # 10 * * * * /path-to-cronjob/code-de-transfer.sh /path/to/workdir &>> /path/to/workdir/log/code-de-transfer_$(date +\%Y\%m\%d).log
9 | #
10 | # Parameters:
11 | # Path to a writable working directory
12 | # that has a file:
13 | # code-de-transfer.properties
14 | # containing the properties:
15 | # WGETRC=/path/to/.wgetrc (file with user=xxx and password=yyy of the CODE-DE download account)
16 | # basefilter="parentIdentifier=EOP:CODE-DE:S2_MSI_L1C&geometry=POLYGON((5.9 47.2,15.2 47.2,15.2 55.1,5.9 55.1,5.9 47.2))"
17 | # lastIngestionDate=2018-01-01T00:00:00.000
18 | # outputPath=/tmp
19 | # batchSize=100
20 | # #queryUrl="https://catalog.code-de.org/opensearch/request"
21 | # #MAXCACHESIZE=$((9 * 1000000)) ## in kbytes
22 | # #transferAction=/path/to/some/command/to/run/after/file/transfer
23 | #
24 | # Depends:
25 | # includes/singleton.sh
26 | # includes/error-handler.sh
27 | SCRIPT_DIR=$(dirname $0)
28 | . $SCRIPT_DIR/includes/log-handler.sh
29 |
30 | # defaults
31 | queryUrl='https://catalog.code-de.org/opensearch/request'
32 |
33 | # check working directory and load properties
34 | WD=${1-}
35 | if [ "$WD" == "" ]; then
36 | logerr "no working directory specified"
37 | exit 1
38 | elif [ ! -d $WD ]; then
39 | logerr "no working directory $WD"
40 | exit 1
41 | elif [ ! -r $WD/code-de-transfer.properties ]; then
42 | logerr "missing $WD/code-de-transfer.properties"
43 | exit 1
44 | fi
45 | log "Using working directory $WD"
46 |
47 | # load the properties
48 | . $WD/code-de-transfer.properties
49 |
50 | # singleton pattern
51 | . $SCRIPT_DIR/includes/singleton.sh
52 |
53 | # error handler: print location of last error and process it further
54 | . $SCRIPT_DIR/includes/error-handler.sh
55 |
56 | # ------------------------------------------------------------------
57 | # check storage space
58 | if [ "$MAXCACHESIZE" != "" ]; then
59 | cachesize=$(du -sk $outputPath |cut -f1)
60 | if (( $cachesize > $MAXCACHESIZE )); then
61 | log WARNING "cache full ($cachesize > max $MAXCACHESIZE kbyte), processing stopped"
62 | exit
63 | fi
64 | fi
65 |
66 | # keeps the latest ingestionDate of previous retrieval
67 | lastDateHolder=$WD/lastFileDate
68 | function keepLastFileDate {
69 | # remember date of this file for next query
70 | dateISO=$1
71 | dateNum=$(echo $dateISO | tr -d -- '-: .TZ' |cut -c1-12)
72 | echo -n $dateISO > $lastDateHolder
73 | touch -t $dateNum $lastDateHolder
74 | }
75 |
76 | # ------------------------------------------------------------------
77 | log "starting with $queryUrl"
78 |
79 | # ------------------------------------------------------------------
80 | # retransmit handling
81 | TRANSFERHISTORY=$WD/transferHistory.txt
82 |
83 | # ------------------------------------------------------------------
84 | # error handling
85 | DEFECTSHISTORY=$WD/defectsHistory.txt
86 | function logDefect
87 | {
88 | logerr "$2"
89 | echo "$(date '+%Y-%m-%dT%h:%m:%s') ingestion failed $1" >> $DEFECTSHISTORY
90 | }
91 |
92 | # ------------------------------------------------------------------
93 | # prepare query filter
94 | if [ -r $lastDateHolder ]; then
95 | log "Using $lastDateHolder"
96 | lastIngestionDate="$(cat $lastDateHolder)"
97 | fi
98 | condition="$basefilter&creationDate=[$lastIngestionDate"
99 |
100 | log "Searching for new files with $condition"
101 |
102 | # ------------------------------------------------------------------
103 | # query for new data
104 | export WGETRC
105 | response=$(/usr/bin/wget --auth-no-challenge --no-check-certificate -q -O - "$queryUrl?httpAccept=application/sru%2Bxml&recordSchema=om&startRecord=1&maximumRecords=$batchSize&$condition&sortBy=creationDate&sortDir=ASC" 2>&1 | cat)
106 | if [ "$?" -ne 0 ] || [ "${response:0:1}" != "<" ] ; then
107 | logerr "query failed: $response"
108 | exit 1
109 | fi
110 | # the following xmllint, sed and awk combination ensures proper parsing and order of attributes in output
111 | files=$(echo $response \
112 | | xmllint --xpath "//*[local-name()='timePosition' or local-name()='ProductInformation' or local-name()='Size' or local-name()='identifier']" - \
113 | | tr '<' '\n' | egrep -v '^/' | egrep 'timePosition|href|size|identifier' | sed -e 's/uom=".*"//' | tr '=' '>' | tr -d '"' | cut -d'>' -f2 | paste -d';' - - - -
114 | )
115 |
116 | count=$(echo $files | wc -w | tr -d ' ')
117 | if [ $count == 0 ]; then
118 | log "Found nothing."
119 | exit 0;
120 | fi
121 | log "ingesting next $count products..."
122 |
123 | # ------------------------------------------------------------------
124 | # process the products found
125 | index=0
126 | for f in ${files[@]}
127 | do
128 | # next index
129 | index=$((index+1))
130 |
131 | ingestionDate=$(echo $f | cut -d';' -f1)
132 | downloadUrl=$(echo $f | cut -d';' -f2)
133 | size=$(echo $f | cut -d';' -f3)
134 | id=$(echo $f | cut -d';' -f4)
135 | id=${id##*:}
136 | file="$outputPath/${id##*/}.SAFE.zip"
137 |
138 | # check if already retrieved
139 | if [[ -r "$file" && ( $size == $(stat -L --format='%s' "${file}") ) ]]; then
140 | log "[$index/$count] Skipping $f"
141 | keepLastFileDate $ingestionDate
142 | continue
143 | elif [ -r $TRANSFERHISTORY ] && [ $(grep -c $id $TRANSFERHISTORY) -gt 0 ]; then
144 | log "[$index/$count] already transferred $id"
145 | keepLastFileDate $ingestionDate
146 | continue
147 | elif [ -r $DEFECTSHISTORY ] && [ $(grep -c $id $DEFECTSHISTORY) -gt 2 ]; then
148 | log WARNING "[$index/$count] Skipping previously defect $f"
149 | keepLastFileDate $ingestionDate
150 | continue
151 | else
152 | # retreive file
153 | log "[$index/$count] Reading $downloadUrl $ingestionDate $size"
154 | wget -q --auth-no-challenge --no-check-certificate -O "${file}_tmp" "$downloadUrl"
155 | fi
156 |
157 | # check size
158 | if [[ $size != $(stat -L --format='%s' "${file}_tmp") ]]; then
159 | logDefect "$file" "size mismatch $file $size <> $(stat -L --format='%s' '${file}_tmp')"
160 | exit
161 | fi
162 |
163 | # check ZIP integrity
164 | unzip -tqq "${file}_tmp" > /dev/null 2>&1 | cat
165 | if [ "${PIPESTATUS[0]}" -ne 0 ]; then
166 | logDefect "$file" "transfered file contains errors, will retry in next round"
167 | exit
168 | else
169 | log "[$index/$count] Transferred $file $size bytes"
170 | fi
171 |
172 | # rename validated ZIP file
173 | mv "${file}_tmp" "${file}"
174 |
175 | # remember date of this file for next query
176 | keepLastFileDate $ingestionDate
177 |
178 | echo "$id" >> $TRANSFERHISTORY
179 |
180 | # --------------------------------------------------------------
181 | # execute transfer actions
182 | if [ "$transferAction" != "" ] && [ -x $transferAction ]; then
183 | $transferAction "$file"
184 | fi
185 |
186 | done
187 |
188 | log "Done."
189 |
--------------------------------------------------------------------------------
/bin/dataHubTransfer.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Filename: dataHubTransfer.sh
3 | #
4 | # Description:
5 | # This script will search and incrementally download new products from a DHuS.
6 | #
7 | # The script is intended to be run in a cron job, e.g.:
8 | # 10 * * * * /path-to-cronjob/dataHubTransfer.sh /path/to/workdir &>> /path/to/workdir/log/dataHubTransfer_$(date +\%Y\%m\%d).log
9 | #
10 | # Parameters:
11 | # Path to a writable working directory
12 | # that has a file:
13 | # dataHubTransfer.properties
14 | # containing the properties:
15 | # dhusUrl="https://code-de.org/dhus"
16 | # WGETRC=/path/to/.wgetrc (file with user=xxx and password=yyy of the account at the DHuS service)
17 | # basefilter="platformname:Sentinel-2 AND footprint:\"Intersects(POLYGON((5.9 47.2,15.2 47.2,15.2 55.1,5.9 55.1,5.9 47.2)))\""
18 | # outputPath=/tmp
19 | # lastIngestionDate=NOW-1DAY
20 | # batchSize=100
21 | # #MAXCACHESIZE=$((9 * 1000000)) ## in kbytes
22 | # #transferAction=/path/to/some/command/to/run/after/file/transfer
23 | #
24 | # Depends:
25 | # includes/singleton.sh
26 | # includes/error-handler.sh
27 | SCRIPT_DIR=$(dirname $0)
28 | . $SCRIPT_DIR/includes/log-handler.sh
29 |
30 | WD=${1-}
31 | if [ "$WD" == "" ]; then
32 | logerr "no working directory specified"
33 | exit 1
34 | elif [ ! -d $WD ]; then
35 | logerr "no working directory $WD"
36 | exit 1
37 | elif [ ! -r $WD/dataHubTransfer.properties ]; then
38 | logerr "missing $WD/dataHubTransfer.properties"
39 | exit 1
40 | fi
41 | log "Using working directory $WD"
42 |
43 | # load the properties
44 | . $WD/dataHubTransfer.properties
45 |
46 | # singleton pattern
47 | . $SCRIPT_DIR/includes/singleton.sh
48 |
49 | # error handler: print location of last error and process it further
50 | . $SCRIPT_DIR/includes/error-handler.sh
51 |
52 | # ------------------------------------------------------------------
53 | # check storage space
54 | if [ "$MAXCACHESIZE" != "" ]; then
55 | cachesize=$(du -sk $outputPath |cut -f1)
56 | if (( $cachesize > $MAXCACHESIZE )); then
57 | log WARNING "cache full ($cachesize > max $MAXCACHESIZE kbyte), processing stopped"
58 | exit
59 | fi
60 | fi
61 |
62 | # keeps the latest ingestionDate of previous retrieval
63 | lastDateHolder=$WD/lastFileDate
64 | function keepLastFileDate {
65 | # remember date of this file for next query
66 | dateISO=$1
67 | dateNum=$(echo $dateISO | tr -d -- '-: .TZ' |cut -c1-12)
68 | echo -n $dateISO > $lastDateHolder
69 | touch -t $dateNum $lastDateHolder
70 | }
71 |
72 | # ------------------------------------------------------------------
73 | log "starting with $dhusUrl"
74 |
75 | # ------------------------------------------------------------------
76 | # retransmit handling
77 | TRANSFERHISTORY=$WD/transferHistory.txt
78 |
79 | # ------------------------------------------------------------------
80 | # error handling
81 | DEFECTSHISTORY=$WD/defectsHistory.txt
82 | function logDefect
83 | {
84 | logerr "$2"
85 | echo "$(date '+%Y-%m-%dT%h:%m:%s') ingestion failed $1" >> $DEFECTSHISTORY
86 | }
87 |
88 | # ------------------------------------------------------------------
89 | # prepare query filter
90 | if [ -r $lastDateHolder ]; then
91 | log "Using $lastDateHolder"
92 | lastIngestionDate="$(cat $lastDateHolder)"
93 | fi
94 | # use now with a 30-second offset to ensure DHuS internal DB is up-to-date
95 | condition="$basefilter AND ingestionDate:[$lastIngestionDate TO NOW-30SECONDS]"
96 |
97 | log "Searching for new files with $condition"
98 |
99 | # ------------------------------------------------------------------
100 | # query for new data
101 | export WGETRC
102 | response=$(/usr/bin/wget --auth-no-challenge --no-check-certificate -q -O - "$dhusUrl/search?q=$condition&rows=$batchSize&orderby=ingestiondate asc" 2>&1 | cat)
103 | if [ "$?" -ne 0 ] || [ "${response:0:1}" != "<" ] ; then
104 | logerr "query failed: $response"
105 | exit 1
106 | fi
107 | # the following xmllint, sed and awk combination ensures proper parsing and order of attributes in output
108 | files=$(echo $response \
109 | | xmllint --xpath "//*[local-name()='entry']/*[local-name()='title' or @name='uuid' or @name='ingestiondate' or @name='size']" - \
110 | | sed -e 's^[a-zA-Z:]*>^\n^g' -e 's/<[a-zA-Z]*://g' -e 's/>/ /g' \
111 | | awk 'function printentry() {print uuid";"name";"date";"size} /^title/ && (uuid!="") {printentry(); uuid=""} /^title/ {name=$2} /uuid/ {uuid=$3} /ingestiondate/ {date=$3} /size/ {size=$3$4} END {printentry()}'
112 | )
113 |
114 | count=$(echo $files | wc -w | tr -d ' ')
115 | if [ $count == 0 ]; then
116 | log "Found nothing."
117 | exit 0;
118 | fi
119 | log "ingesting next $count products..."
120 |
121 | # ------------------------------------------------------------------
122 | # process the products found
123 | index=0
124 | for f in ${files[@]}
125 | do
126 | # next index
127 | index=$((index+1))
128 |
129 | uuid=$(echo $f | cut -d';' -f5)
130 | safe=$(echo $f | cut -d';' -f1)
131 | ingestionDate=$(echo $f | cut -d';' -f2)
132 | size=$(echo $f | cut -d';' -f3-4 | tr ';' ' ')
133 | file="$outputPath/${safe}.SAFE.zip"
134 |
135 | # check if already retrieved
136 | if [[ -r "$file" && ( $size == $(stat -L --format='%s' "${file}") ) ]]; then
137 | log "[$index/$count] Skipping $f"
138 | keepLastFileDate $ingestionDate
139 | continue
140 | elif [ -r $TRANSFERHISTORY ] && [ $(grep -c $safe $TRANSFERHISTORY) -gt 0 ]; then
141 | log "[$index/$count] already transferred $safe"
142 | keepLastFileDate $ingestionDate
143 | continue
144 | elif [ -r $DEFECTSHISTORY ] && [ $(grep -c $safe $DEFECTSHISTORY) -gt 2 ]; then
145 | log WARNING "[$index/$count] Skipping previously defect $f"
146 | keepLastFileDate $ingestionDate
147 | continue
148 | else
149 | # retreive file
150 | log "[$index/$count] Reading $uuid $safe $ingestionDate $size"
151 | wget -q --auth-no-challenge --no-check-certificate -O "${file}_tmp" "$dhusUrl/odata/v1/Products('${uuid}')/\$value"
152 | fi
153 |
154 | # check ZIP integrity
155 | unzip -tqq "${file}_tmp" > /dev/null 2>&1 | cat
156 | if [ "${PIPESTATUS[0]}" -ne 0 ]; then
157 | logDefect "$file" "transfered file contains errors, will retry in next round"
158 | exit
159 | else
160 | log "[$index/$count] Transferred $file $size bytes"
161 | fi
162 |
163 | # rename validated ZIP file
164 | mv "${file}_tmp" "${file}"
165 |
166 | # remember date of this file for next query
167 | keepLastFileDate $ingestionDate
168 |
169 | echo "$file" >> $TRANSFERHISTORY
170 |
171 | # --------------------------------------------------------------
172 | # execute transfer actions
173 | if [ "$transferAction" != "" ] && [ -x $transferAction ]; then
174 | $transferAction "$file"
175 | fi
176 |
177 | done
178 |
179 | log "Done."
180 |
181 |
--------------------------------------------------------------------------------
/bin/includes/error-handler.sh:
--------------------------------------------------------------------------------
1 | # error handler: print location of last error and process it further
2 | function error_handler() {
3 | LASTLINE="$1" # argument 1: last line of error occurence
4 | LASTERR="$2" # argument 2: error code of last command
5 | echo "$(date +%Y-%m-%dT%H:%M:%SZ) ERROR in ${0} (line ${LASTLINE} exit status: ${LASTERR})"
6 | exit $LASTERR
7 | }
8 | # abort and log errors
9 | set -e
10 | trap 'error_handler ${LINENO} $?' ERR
11 |
--------------------------------------------------------------------------------
/bin/includes/log-handler.sh:
--------------------------------------------------------------------------------
1 | # log handler functions
2 | function log() {
3 | echo "$(date +%Y-%m-%dT%H:%M:%SZ) $(if [[ $# -ne 2 ]]; then echo INFO; else echo $1; fi) ${BASH_SOURCE[1]##*/} ${2:-$1}"
4 | }
5 |
6 | function logerr() {
7 | cat <<< "$(date +%Y-%m-%dT%H:%M:%SZ) ERROR ${BASH_SOURCE[1]##*/} $@" >&2
8 | }
9 |
--------------------------------------------------------------------------------
/bin/includes/singleton.sh:
--------------------------------------------------------------------------------
1 | # bash include file implementing singleton pattern (script runs only once in parallel)
2 | PIDFILE=$WD/pid
3 | LOCK=$WD/lock
4 |
5 | # check for existing lock (directory)
6 | mkdir ${LOCK} >> /dev/null 2>&1
7 | if [ $? != 0 ]; then
8 | pid=$(cat $PIDFILE)
9 | scriptname="${0##*/}"
10 | if [ -r /proc/$pid ] && [ $(grep -c "$scriptname" /proc/$pid/cmdline) == 1 ]; then
11 | echo -e "$(date +%Y-%m-%dT%H:%M:%SZ) WARNING: an istance of \"$scriptname\" is running with PID=$pid (if it isn't running: delete the lockdir ${LOCK})"
12 | exit
13 | fi
14 | fi
15 |
16 | echo $$ > $PIDFILE
17 |
18 | # ensure lock is removed when exiting
19 | trap "rm -fr ${LOCK} ${PIDFILE}" EXIT
20 |
--------------------------------------------------------------------------------