├── .coveragerc
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── docs
├── arcgis-metadata.dtd
├── fgdc-std.dtd
└── iso-19139-std.dtd
├── gis_metadata
├── __init__.py
├── arcgis_metadata_parser.py
├── exceptions.py
├── fgdc_metadata_parser.py
├── iso_metadata_parser.py
├── metadata_parser.py
├── tests
│ ├── __init__.py
│ ├── data
│ │ ├── arcgis_metadata.xml
│ │ ├── fgdc_metadata.xml
│ │ ├── iso_citation_href.xml
│ │ ├── iso_citation_linkage.xml
│ │ ├── iso_metadata.xml
│ │ └── utility_metadata.xml
│ └── tests.py
└── utils.py
├── poetry.lock
├── pyproject.toml
└── setup.py
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | source = gis_metadata
3 | data_file = .coverage
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .cache/
3 | .coverage
4 | __pycache__/
5 | build/
6 | dist/
7 | gis-metadata-parser.egg-info/
8 | gis_metadata_parser.egg-info/
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - "3.9"
4 | - "3.8"
5 | - "3.7"
6 | - "3.6"
7 |
8 | install:
9 | - "pip install mock"
10 | - "pip install parserutils"
11 | - "pip install frozendict"
12 | - "pip install coveralls"
13 |
14 | script:
15 | coverage run --omit=gis_metadata/tests/*.py --source=gis_metadata -m unittest gis_metadata.tests.tests
16 |
17 | after_success:
18 | coveralls
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2016, Conservation Biology Institute
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | * Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 |
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 |
14 | * Neither the name of gis-metadata-parser nor the names of its
15 | contributors may be used to endorse or promote products derived from
16 | this software without specific prior written permission.
17 |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # gis-metadata-parser
2 |
3 | XML parsers for GIS metadata that are designed to read in, validate, update and output a core set of properties that have been mapped between the most common standards, currently:
4 |
5 | * FGDC
6 | * ISO-19139 (and ISO-19115)
7 | * ArcGIS (tested with ArcGIS format 1.0).
8 |
9 | This library is compatible with Python versions 2.7 and 3.4 through 3.6.
10 |
11 | [](https://app.travis-ci.com/github/consbio/gis-metadata-parser)
12 | [](https://coveralls.io/github/consbio/gis-metadata-parser?branch=main)
13 |
14 | ## Installation
15 | Install with `pip install gis-metadata-parser`.
16 |
17 | ## Usage
18 |
19 | Parsers can be instantiated from files, XML strings or URLs. They can be converted from one standard to another as well.
20 | ```python
21 | from gis_metadata.arcgis_metadata_parser import ArcGISParser
22 | from gis_metadata.fgdc_metadata_parser import FgdcParser
23 | from gis_metadata.iso_metadata_parser import IsoParser
24 | from gis_metadata.metadata_parser import get_metadata_parser
25 |
26 | # From file objects
27 | with open(r'/path/to/metadata.xml') as metadata:
28 | fgdc_from_file = FgdcParser(metadata)
29 |
30 | with open(r'/path/to/metadata.xml') as metadata:
31 | iso_from_file = IsoParser(metadata)
32 |
33 | # Detect standard based on root element, metadata
34 | fgdc_from_string = get_metadata_parser(
35 | """
36 |
37 |
38 |
39 |
40 |
41 | """
42 | )
43 |
44 | # Detect ArcGIS standard based on root element and its nodes
45 | iso_from_string = get_metadata_parser(
46 | """
47 |
48 |
49 |
50 |
51 |
52 |
53 | """
54 | )
55 |
56 | # Detect ISO standard based on root element, MD_Metadata or MI_Metadata
57 | iso_from_string = get_metadata_parser(
58 | """
59 |
60 |
61 |
62 |
63 |
64 | """
65 | )
66 |
67 | # Convert from one standard to another
68 | fgdc_converted = iso_from_file.convert_to(FgdcParser)
69 | iso_converted = fgdc_from_file.convert_to(IsoParser)
70 | arcgis_converted = iso_converted.convert_to(ArcGISParser)
71 |
72 | # Output supported properties as key value pairs (dict)
73 | fgdc_key_vals = fgdc_from_file.convert_to(dict)
74 | iso_key_vals = iso_from_file.convert_to(dict)
75 | ```
76 |
77 | Finally, the properties of the parser can be updated, validated, applied and output:
78 | ```python
79 | with open(r'/path/to/metadata.xml') as metadata:
80 | fgdc_from_file = FgdcParser(metadata)
81 |
82 | # Example simple properties
83 | fgdc_from_file.title
84 | fgdc_from_file.abstract
85 | fgdc_from_file.place_keywords
86 | fgdc_from_file.thematic_keywords
87 |
88 | # :see: gis_metadata.utils.SUPPORTED_PROPS for list of all supported properties
89 |
90 | # Complex properties
91 | fgdc_from_file.attributes
92 | fgdc_from_file.bounding_box
93 | fgdc_from_file.contacts
94 | fgdc_from_file.dates
95 | fgdc_from_file.digital_forms
96 | fgdc_from_file.larger_works
97 | fgdc_from_file.process_steps
98 | fgdc_from_file.raster_info
99 |
100 | # :see: gis_metadata.utils.COMPLEX_DEFINITIONS for structure of all complex properties
101 |
102 | # Update properties
103 | fgdc_from_file.title = 'New Title'
104 | fgdc_from_file.dates = {'type': 'single' 'values': '1/1/2016'}
105 |
106 | # Apply updates
107 | fgdc_from_file.validate() # Ensure updated properties are valid
108 | fgdc_from_file.serialize() # Output updated XML as a string
109 | fgdc_from_file.write() # Output updated XML to existing file
110 | fgdc_from_file.write(out_file_or_path='/path/to/updated.xml') # Output updated XML to new file
111 | ```
112 |
113 | ## Extending and Customizing
114 |
115 | ### Tips
116 |
117 | There are a few unwritten (until now) rules about the way the metadata parsers are wired to work:
118 |
119 | 1. Properties are generally defined by XPATH in each `parser._data_map`
120 | 2. Simple parser properties accept only values of `string` and `list`'s of `string`'s
121 | 3. XPATH's configured in the data map support references to element attributes: `'path/to/element/@attr'`
122 | 4. Complex parser properties are defined by custom parser/updater functions instead of by XPATH
123 | 5. Complex parser properties accept values of type `dict` containing simple properties, or a list of said `dict`'s
124 | 6. XPATH keys in the data map with leading underscores are parsed, but not validated or written out
125 | 7. XPATH keys in the data map that "shadow" other properties but with a leading underscore serve as secondary values
126 | 8. Secondary values are used in the absence of a primary value if primary location (element or attribute) is missing
127 | 9. Additional underscores indicate further locations to check for missing values, i.e. `title`, `_title`, `__title`
128 |
129 | Some examples of existing secondary properties are as follows:
130 | ```python
131 | # In the ArcGIS parser for distribution contact phone:
132 |
133 | ARCGIS_TAG_FORMATS = frozendict({
134 | ...
135 | 'dist_phone': 'distInfo/distributor/distorCont/rpCntInfo/cntPhone/voiceNum',
136 | '_dist_phone': 'distInfo/distributor/distorCont/rpCntInfo/voiceNum', # If not in cntPhone
137 | ...
138 | })
139 |
140 | # In the FGDC parser for sub-properties in the contacts definition:
141 |
142 | FGDC_DEFINITIONS = dict({k: dict(v) for k, v in iteritems(COMPLEX_DEFINITIONS)})
143 | FGDC_DEFINITIONS[CONTACTS].update({
144 | '_name': '{_name}',
145 | '_organization': '{_organization}'
146 | })
147 | ...
148 | class FgdcParser(MetadataParser):
149 | ...
150 | def _init_data_map(self):
151 | ...
152 | ct_format = FGDC_TAG_FORMATS[CONTACTS]
153 | fgdc_data_structures[CONTACTS] = format_xpaths(
154 | ...
155 | name=ct_format.format(ct_path='cntperp/cntper'),
156 | _name=ct_format.format(ct_path='cntorgp/cntper'), # If not in cntperp
157 | organization=ct_format.format(ct_path='cntperp/cntorg'),
158 | _organization=ct_format.format(ct_path='cntorgp/cntorg'), # If not in cntperp
159 | )
160 |
161 | # Also see the ISO parser for secondary and tertiary sub-properties in the attributes definition:
162 |
163 | ISO_DEFINITIONS = dict({k: dict(v) for k, v in iteritems(COMPLEX_DEFINITIONS)})
164 | ISO_DEFINITIONS[ATTRIBUTES].update({
165 | '_definition_source': '{_definition_src}',
166 | '__definition_source': '{__definition_src}',
167 | '___definition_source': '{___definition_src}'
168 | })
169 | ```
170 |
171 |
172 | ### Examples
173 |
174 | Any of the supported parsers can be extended to include more of a standard's supported data. In this example we'll add two new properties to the `IsoParser`:
175 |
176 | * `metadata_language`: a simple string field describing the language of the metadata file itself (not the dataset)
177 | * `metadata_contacts`: a complex structure with contact info leveraging and enhancing the existing contact structure
178 |
179 | This example will cover:
180 |
181 | 1. Adding a new simple property
182 | 2. Configuring a secondary location for a property value
183 | 3. Referencing an element attribute in an XPATH
184 | 4. Adding a new complex property
185 | 5. Customizing the complex property to include a new sub-property
186 |
187 | Also, this example is specifically covered by unit tests.
188 |
189 | ```python
190 | from gis_metadata.iso_metadata_parser import IsoParser
191 | from gis_metadata.utils import COMPLEX_DEFINITIONS, CONTACTS, format_xpaths, ParserProperty
192 |
193 |
194 | class CustomIsoParser(IsoParser):
195 |
196 | def _init_data_map(self):
197 | super(CustomIsoParser, self)._init_data_map()
198 |
199 | # 1. Basic property: text or list (with secondary location referencing `codeListValue` attribute)
200 |
201 | lang_prop = 'metadata_language'
202 | self._data_map[lang_prop] = 'language/CharacterString' # Parse from here if present
203 | self._data_map['_' + lang_prop] = 'language/LanguageCode/@codeListValue' # Otherwise, try from here
204 |
205 | # 2. Complex structure (reuse of contacts structure plus phone)
206 |
207 | # 2.1 Define some basic variables
208 | ct_prop = 'metadata_contacts'
209 | ct_xpath = 'contact/CI_ResponsibleParty/{ct_path}'
210 | ct_defintion = COMPLEX_DEFINITIONS[CONTACTS]
211 | ct_defintion['phone'] = '{phone}'
212 |
213 | # 2.2 Reuse CONTACT structure to specify locations per prop (adapted from parent to add `phone`)
214 | self._data_structures[ct_prop] = format_xpaths(
215 | ct_defintion,
216 | name=ct_xpath.format(ct_path='individualName/CharacterString'),
217 | organization=ct_xpath.format(ct_path='organisationName/CharacterString'),
218 | position=ct_xpath.format(ct_path='positionName/CharacterString'),
219 | phone=ct_xpath.format(
220 | ct_path='contactInfo/CI_Contact/phone/CI_Telephone/voice/CharacterString'
221 | ),
222 | email=ct_xpath.format(
223 | ct_path='contactInfo/CI_Contact/address/CI_Address/electronicMailAddress/CharacterString'
224 | )
225 | )
226 |
227 | # 2.3 Set the contact root to insert new elements at "contact" level given the defined path:
228 | # 'contact/CI_ResponsibleParty/...'
229 | # By default we would get multiple "CI_ResponsibleParty" elements under a single "contact"
230 | # This way we get multiple "contact" elements, each with its own single "CI_ResponsibleParty"
231 | self._data_map['_{prop}_root'.format(prop=ct_prop)] = 'contact'
232 |
233 | # 2.4 Leverage the default methods for parsing complex properties (or write your own parser/updater)
234 | self._data_map[ct_prop] = ParserProperty(self._parse_complex_list, self._update_complex_list)
235 |
236 | # 3. And finally, let the parent validation logic know about the two new custom properties
237 |
238 | self._metadata_props.add(lang_prop)
239 | self._metadata_props.add(ct_prop)
240 |
241 |
242 | with open(r'/path/to/metadata.xml') as metadata:
243 | iso_from_file = CustomIsoParser(metadata)
244 |
245 | iso_from_file.metadata_language
246 | iso_from_file.metadata_contacts
247 | ```
248 |
--------------------------------------------------------------------------------
/docs/fgdc-std.dtd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
--------------------------------------------------------------------------------
/gis_metadata/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/consbio/gis-metadata-parser/a3cac07857bc425185d43ec819aece1a9533ec8c/gis_metadata/__init__.py
--------------------------------------------------------------------------------
/gis_metadata/arcgis_metadata_parser.py:
--------------------------------------------------------------------------------
1 | """ A module to contain utility ArcGIS metadata parsing helpers """
2 |
3 | from frozendict import frozendict
4 | from parserutils.collections import flatten_items, reduce_value, wrap_value
5 | from parserutils.elements import get_elements, get_element_name, get_element_attributes
6 | from parserutils.elements import clear_element, element_to_dict, insert_element, remove_element, remove_empty_element
7 |
8 | from gis_metadata.exceptions import InvalidContent
9 | from gis_metadata.metadata_parser import MetadataParser
10 | from gis_metadata.utils import DATE_TYPE, DATE_TYPE_SINGLE, DATE_TYPE_MULTIPLE
11 | from gis_metadata.utils import DATE_TYPE_RANGE, DATE_TYPE_RANGE_BEGIN, DATE_TYPE_RANGE_END
12 | from gis_metadata.utils import ATTRIBUTES
13 | from gis_metadata.utils import BOUNDING_BOX
14 | from gis_metadata.utils import CONTACTS
15 | from gis_metadata.utils import DATES
16 | from gis_metadata.utils import DIGITAL_FORMS
17 | from gis_metadata.utils import KEYWORDS_PLACE, KEYWORDS_STRATUM, KEYWORDS_TEMPORAL, KEYWORDS_THEME
18 | from gis_metadata.utils import LARGER_WORKS
19 | from gis_metadata.utils import PROCESS_STEPS
20 | from gis_metadata.utils import RASTER_DIMS, RASTER_INFO
21 | from gis_metadata.utils import COMPLEX_DEFINITIONS, ParserProperty
22 | from gis_metadata.utils import format_xpaths, get_default_for_complex, get_default_for_complex_sub
23 | from gis_metadata.utils import parse_complex_list, parse_property, update_complex_list, update_property
24 |
25 |
26 | ARCGIS_ROOTS = ('metadata', 'Metadata')
27 | ARCGIS_NODES = ('dataIdInfo', 'distInfo', 'dqInfo', 'Esri')
28 |
29 | ARCGIS_TAG_FORMATS = frozendict({
30 | '_attribute_accuracy_root': 'dqInfo/report',
31 | '_attributes_root': 'eainfo/detailed/attr',
32 | '_bounding_box_root': 'dataIdInfo/dataExt/geoEle',
33 | '_contacts_root': 'dataIdInfo/idPoC',
34 | '_dataset_completeness_root': 'dqInfo/report',
35 | '_dates_root': 'dataIdInfo/dataExt/tempEle',
36 | '_digital_forms_root': 'distInfo/distFormat',
37 | '_dist_liability_root': 'dataIdInfo/resConst',
38 | '_transfer_options_root': 'distInfo/distTranOps/onLineSrc',
39 | '_larger_works_root': 'dataIdInfo/aggrInfo/aggrDSName',
40 | '_process_steps_root': 'dqInfo/dataLineage/prcStep',
41 | '_raster_info_root': 'spatRepInfo/GridSpatRep/axisDimension',
42 | '_use_constraints_root': 'dataIdInfo/resConst',
43 |
44 | '_srinfo_grid_rep': 'spatRepInfo/GridSpatRep',
45 |
46 | 'title': 'dataIdInfo/idCitation/resTitle',
47 | 'abstract': 'dataIdInfo/idAbs',
48 | 'purpose': 'dataIdInfo/idPurp',
49 | 'supplementary_info': 'dataIdInfo/suppInfo',
50 | 'online_linkages': 'dataIdInfo/idCitation/citRespParty/rpCntInfo/cntOnlineRes/linkage',
51 | '_online_linkages': 'dataIdInfo/idCitation/citOnlineRes/linkage', # If not in citRespParty
52 | 'originators': 'dataIdInfo/idCitation/citRespParty/rpOrgName',
53 | 'publish_date': 'dataIdInfo/idCitation/date/pubDate',
54 | 'data_credits': 'dataIdInfo/idCredit',
55 | CONTACTS: 'dataIdInfo/idPoC/{ct_path}',
56 | 'dist_contact_org': 'distInfo/distributor/distorCont/rpOrgName',
57 | 'dist_contact_person': 'distInfo/distributor/distorCont/rpIndName',
58 | 'dist_address_type': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/@addressType',
59 | 'dist_address': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/delPoint',
60 | 'dist_city': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/city',
61 | 'dist_state': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/adminArea',
62 | 'dist_postal': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/postCode',
63 | 'dist_country': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/country',
64 | 'dist_phone': 'distInfo/distributor/distorCont/rpCntInfo/cntPhone/voiceNum',
65 | '_dist_phone': 'distInfo/distributor/distorCont/rpCntInfo/voiceNum', # If not in cntPhone
66 | 'dist_email': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/eMailAdd',
67 | 'dist_liability': 'dataIdInfo/resConst/LegConsts/othConsts',
68 | 'processing_fees': 'distInfo/distributor/distorOrdPrc/resFees',
69 | 'processing_instrs': 'distInfo/distributor/distorOrdPrc/ordInstr',
70 | 'resource_desc': 'dataIdInfo/idSpecUse/specUsage',
71 | 'tech_prerequisites': 'dataIdInfo/envirDesc',
72 | ATTRIBUTES: 'eainfo/detailed/attr/{ad_path}', # Same as in FGDC (and for good reason)
73 | 'attribute_accuracy': 'dqInfo/report/measDesc',
74 | BOUNDING_BOX: 'dataIdInfo/dataExt/geoEle/GeoBndBox/{bbox_path}',
75 | 'dataset_completeness': 'dqInfo/report/measDesc',
76 | DIGITAL_FORMS: 'distInfo/distFormat/{df_path}',
77 | '_access_desc': 'distInfo/distTranOps/onLineSrc/orDesc',
78 | '_access_instrs': 'distInfo/distTranOps/onLineSrc/protocol',
79 | '_network_resource': 'distInfo/distTranOps/onLineSrc/linkage',
80 | PROCESS_STEPS: 'dqInfo/dataLineage/prcStep/{ps_path}',
81 | LARGER_WORKS: 'dataIdInfo/aggrInfo/aggrDSName/{lw_path}',
82 | RASTER_INFO: 'spatRepInfo/GridSpatRep/axisDimension/{ri_path}',
83 | '_ri_num_dims': 'spatRepInfo/GridSpatRep/numDims',
84 | 'other_citation_info': 'dataIdInfo/idCitation/otherCitDet',
85 | 'use_constraints': 'dataIdInfo/resConst/Consts/useLimit',
86 | '_use_constraints': 'dataIdInfo/resConst/LegConsts/useLimit',
87 | DATES: 'dataIdInfo/dataExt/tempEle/TempExtent/exTemp/{type_path}',
88 | KEYWORDS_PLACE: 'dataIdInfo/placeKeys/keyword',
89 | KEYWORDS_STRATUM: 'dataIdInfo/stratKeys/keyword',
90 | KEYWORDS_TEMPORAL: 'dataIdInfo/tempKeys/keyword',
91 | KEYWORDS_THEME: 'dataIdInfo/themeKeys/keyword',
92 |
93 | # Other ArcGIS keywords not supported by other standards
94 | 'discipline_keywords': 'dataIdInfo/discKeys/keyword',
95 | 'other_keywords': 'dataIdInfo/otherKeys/keyword',
96 | 'product_keywords': 'dataIdInfo/productKeys/keyword',
97 | 'search_keywords': 'dataIdInfo/searchKeys/keyword',
98 | 'topic_category_keywords': 'dataIdInfo/subTopicCatKeys/keyword'
99 | })
100 |
101 |
102 | class ArcGISParser(MetadataParser):
103 | """ A class to parse metadata files generated by ArcGIS """
104 |
105 | def _init_data_map(self):
106 | """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """
107 |
108 | if self._data_map is not None:
109 | return # Initiation happens once
110 |
111 | # Parse and validate the ArcGIS metadata root
112 |
113 | if self._xml_tree is None:
114 | agis_root = ARCGIS_ROOTS[0] # Default to uncapitalized
115 | else:
116 | agis_root = get_element_name(self._xml_tree)
117 |
118 | if agis_root not in ARCGIS_ROOTS:
119 | raise InvalidContent('Invalid XML root for ArcGIS metadata: {root}', root=agis_root)
120 |
121 | agis_data_map = {'_root': agis_root}
122 | agis_data_map.update(ARCGIS_TAG_FORMATS)
123 |
124 | agis_data_structures = {}
125 |
126 | # Capture and format complex XPATHs
127 |
128 | ad_format = agis_data_map[ATTRIBUTES]
129 | agis_data_structures[ATTRIBUTES] = format_xpaths(
130 | COMPLEX_DEFINITIONS[ATTRIBUTES],
131 | label=ad_format.format(ad_path='attrlabl'),
132 | aliases=ad_format.format(ad_path='attalias'),
133 | definition=ad_format.format(ad_path='attrdef'),
134 | definition_src=ad_format.format(ad_path='attrdefs')
135 | )
136 |
137 | bb_format = agis_data_map[BOUNDING_BOX]
138 | agis_data_structures[BOUNDING_BOX] = format_xpaths(
139 | COMPLEX_DEFINITIONS[BOUNDING_BOX],
140 | east=bb_format.format(bbox_path='eastBL'),
141 | south=bb_format.format(bbox_path='southBL'),
142 | west=bb_format.format(bbox_path='westBL'),
143 | north=bb_format.format(bbox_path='northBL')
144 | )
145 |
146 | ct_format = agis_data_map[CONTACTS]
147 | agis_data_structures[CONTACTS] = format_xpaths(
148 | COMPLEX_DEFINITIONS[CONTACTS],
149 | name=ct_format.format(ct_path='rpIndName'),
150 | organization=ct_format.format(ct_path='rpOrgName'),
151 | position=ct_format.format(ct_path='rpPosName'),
152 | email=ct_format.format(ct_path='rpCntInfo/cntAddress/eMailAdd')
153 | )
154 |
155 | dt_format = agis_data_map[DATES]
156 | agis_data_structures[DATES] = {
157 | DATE_TYPE_MULTIPLE: dt_format.format(type_path='TM_Instant/tmPosition'),
158 | '_' + DATE_TYPE_MULTIPLE: dt_format.format(type_path='TM_Instant/tmPosition/@date'),
159 | DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TM_Period/tmBegin'),
160 | '_' + DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TM_Period/tmBegin/@date'),
161 | DATE_TYPE_RANGE_END: dt_format.format(type_path='TM_Period/tmEnd'),
162 | '_' + DATE_TYPE_RANGE_END: dt_format.format(type_path='TM_Period/tmEnd/@date'),
163 |
164 | # Same as multiple dates, but will contain only one
165 | DATE_TYPE_SINGLE: dt_format.format(type_path='TM_Instant/tmPosition'),
166 | '_' + DATE_TYPE_SINGLE: dt_format.format(type_path='TM_Instant/tmPosition/@date')
167 | }
168 | agis_data_structures[DATES][DATE_TYPE_RANGE] = [
169 | agis_data_structures[DATES][DATE_TYPE_RANGE_BEGIN],
170 | agis_data_structures[DATES][DATE_TYPE_RANGE_END]
171 | ]
172 | agis_data_structures[DATES]['_' + DATE_TYPE_RANGE] = [
173 | agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_BEGIN],
174 | agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_END]
175 | ]
176 |
177 | df_format = agis_data_map[DIGITAL_FORMS]
178 | agis_data_structures[DIGITAL_FORMS] = format_xpaths(
179 | COMPLEX_DEFINITIONS[DIGITAL_FORMS],
180 | name=df_format.format(df_path='formatName'),
181 | content=df_format.format(df_path='formatInfo'),
182 | decompression=df_format.format(df_path='fileDecmTech'),
183 | version=df_format.format(df_path='formatVer'),
184 | specification=df_format.format(df_path='formatSpec'),
185 | access_desc=agis_data_map['_access_desc'],
186 | access_instrs=agis_data_map['_access_instrs'],
187 | network_resource=agis_data_map['_network_resource']
188 | )
189 |
190 | lw_format = agis_data_map[LARGER_WORKS]
191 | agis_data_structures[LARGER_WORKS] = format_xpaths(
192 | COMPLEX_DEFINITIONS[LARGER_WORKS],
193 | title=lw_format.format(lw_path='resTitle'),
194 | edition=lw_format.format(lw_path='resEd'),
195 | origin=lw_format.format(lw_path='citRespParty/rpIndName'),
196 | online_linkage=lw_format.format(lw_path='citRespParty/rpCntInfo/cntOnlineRes/linkage'),
197 | other_citation=lw_format.format(lw_path='otherCitDet'),
198 | date=lw_format.format(lw_path='date/pubDate'),
199 | place=lw_format.format(lw_path='citRespParty/rpCntInfo/cntAddress/city'),
200 | info=lw_format.format(lw_path='citRespParty/rpOrgName')
201 | )
202 |
203 | ps_format = agis_data_map[PROCESS_STEPS]
204 | agis_data_structures[PROCESS_STEPS] = format_xpaths(
205 | COMPLEX_DEFINITIONS[PROCESS_STEPS],
206 | description=ps_format.format(ps_path='stepDesc'),
207 | date=ps_format.format(ps_path='stepDateTm'),
208 | sources=ps_format.format(ps_path='stepSrc/srcDesc')
209 | )
210 |
211 | ri_format = agis_data_map[RASTER_INFO]
212 | agis_data_structures[RASTER_INFO] = format_xpaths(
213 | COMPLEX_DEFINITIONS[RASTER_DIMS],
214 | type=ri_format.format(ri_path='@type'),
215 | size=ri_format.format(ri_path='dimSize'),
216 | value=ri_format.format(ri_path='dimResol/value'),
217 | units=ri_format.format(ri_path='dimResol/value/@uom')
218 | )
219 |
220 | # Assign XPATHS and gis_metadata.utils.ParserProperties to data map
221 |
222 | for prop, xpath in dict(agis_data_map).items():
223 | if prop in (ATTRIBUTES, CONTACTS, PROCESS_STEPS):
224 | agis_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list)
225 |
226 | elif prop in (BOUNDING_BOX, LARGER_WORKS):
227 | agis_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex)
228 |
229 | elif prop in ('attribute_accuracy', 'dataset_completeness'):
230 | agis_data_map[prop] = ParserProperty(self._parse_report_item, self._update_report_item)
231 |
232 | elif prop == DATES:
233 | agis_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates)
234 |
235 | elif prop == DIGITAL_FORMS:
236 | agis_data_map[prop] = ParserProperty(self._parse_digital_forms, self._update_digital_forms)
237 |
238 | elif prop == RASTER_INFO:
239 | agis_data_map[prop] = ParserProperty(self._parse_raster_info, self._update_raster_info)
240 |
241 | else:
242 | agis_data_map[prop] = xpath
243 |
244 | self._data_map = agis_data_map
245 | self._data_structures = agis_data_structures
246 |
247 | def _parse_digital_forms(self, prop=DIGITAL_FORMS):
248 | """ Concatenates a list of Digital Form data structures parsed from the metadata """
249 |
250 | xpath_map = self._data_structures[prop]
251 |
252 | # Parse base digital form fields: 'name', 'content', 'decompression', 'version', 'specification'
253 | xpath_root = self._data_map['_digital_forms_root']
254 | digital_forms = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop)
255 |
256 | # Parse digital form transfer option fields: 'access_desc', 'access_instrs', 'network_resource'
257 | xpath_root = self._data_map['_transfer_options_root']
258 | transfer_opts = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop)
259 |
260 | # Combine digital forms and transfer options into a single complex struct
261 |
262 | df_len = len(digital_forms)
263 | to_len = len(transfer_opts)
264 | parsed_forms = []
265 |
266 | for idx in range(0, max(df_len, to_len)):
267 | digital_form = {}.fromkeys(COMPLEX_DEFINITIONS[prop], u'')
268 |
269 | if idx < df_len:
270 | digital_form.update(i for i in digital_forms[idx].items() if i[1])
271 | if idx < to_len:
272 | digital_form.update(i for i in transfer_opts[idx].items() if i[1])
273 |
274 | if any(digital_form.values()):
275 | parsed_forms.append(digital_form)
276 |
277 | return get_default_for_complex(prop, parsed_forms)
278 |
279 | def _parse_report_item(self, prop):
280 | """ :return: the text for each element at the configured path if type attribute matches"""
281 |
282 | item_type = None
283 |
284 | if prop == 'attribute_accuracy':
285 | item_type = 'DQQuanAttAcc'
286 | elif prop == 'dataset_completeness':
287 | item_type = 'DQCompOm'
288 |
289 | xroot = self._get_xroot_for(prop)
290 |
291 | parsed = (element_to_dict(e) for e in get_elements(self._xml_tree, xroot))
292 | parsed = flatten_items(e['children'] for e in parsed if e['attributes'].get('type') == item_type)
293 |
294 | return reduce_value([p['text'] for p in parsed if p and p['name'] == 'measDesc'])
295 |
296 | def _parse_raster_info(self, prop=RASTER_INFO):
297 | """ Collapses multiple dimensions into a single raster_info complex struct """
298 |
299 | raster_info = {}.fromkeys(COMPLEX_DEFINITIONS[prop], u'')
300 |
301 | # Ensure conversion of lists to newlines is in place
302 | raster_info['dimensions'] = get_default_for_complex_sub(
303 | prop=prop,
304 | subprop='dimensions',
305 | value=parse_property(self._xml_tree, None, self._data_map, '_ri_num_dims'),
306 | xpath=self._data_map['_ri_num_dims']
307 | )
308 |
309 | xpath_root = self._get_xroot_for(prop)
310 | xpath_map = self._data_structures[prop]
311 |
312 | for dimension in parse_complex_list(self._xml_tree, xpath_root, xpath_map, RASTER_DIMS):
313 | dimension_type = dimension['type'].lower()
314 |
315 | if dimension_type == 'vertical':
316 | raster_info['vertical_count'] = dimension['size']
317 |
318 | elif dimension_type == 'column':
319 | raster_info['column_count'] = dimension['size']
320 | raster_info['x_resolution'] = u' '.join(dimension[k] for k in ['value', 'units']).strip()
321 |
322 | elif dimension_type == 'row':
323 | raster_info['row_count'] = dimension['size']
324 | raster_info['y_resolution'] = u' '.join(dimension[k] for k in ['value', 'units']).strip()
325 |
326 | return raster_info if any(raster_info[k] for k in raster_info) else {}
327 |
328 | def _update_digital_forms(self, **update_props):
329 | """
330 | Update operation for ArcGIS Digital Forms metadata
331 | :see: gis_metadata.utils.COMPLEX_DEFINITIONS[DIGITAL_FORMS]
332 | """
333 |
334 | digital_forms = wrap_value(update_props['values'])
335 |
336 | # Update all Digital Form properties: distFormat*
337 |
338 | xpath_map = self._data_structures[update_props['prop']]
339 |
340 | dist_format_props = ('name', 'content', 'decompression', 'version', 'specification')
341 | dist_format_xroot = self._data_map['_digital_forms_root']
342 | dist_format_xmap = {prop: xpath_map[prop] for prop in dist_format_props}
343 | dist_formats = []
344 |
345 | for digital_form in digital_forms:
346 | dist_formats.append({prop: digital_form[prop] for prop in dist_format_props})
347 |
348 | update_props['values'] = dist_formats
349 | dist_formats = update_complex_list(
350 | xpath_root=dist_format_xroot, xpath_map=dist_format_xmap, **update_props
351 | )
352 |
353 | # Update all Network Resources: distTranOps+
354 |
355 | trans_option_props = ('access_desc', 'access_instrs', 'network_resource')
356 | trans_option_xroot = self._data_map['_transfer_options_root']
357 | trans_option_xmap = {prop: self._data_map['_' + prop] for prop in trans_option_props}
358 |
359 | trans_options = []
360 | for digital_form in digital_forms:
361 | trans_options.append({prop: digital_form[prop] for prop in trans_option_props})
362 |
363 | update_props['values'] = trans_options
364 | trans_options = update_complex_list(
365 | xpath_root=trans_option_xroot, xpath_map=trans_option_xmap, **update_props
366 | )
367 |
368 | return {
369 | 'distribution_formats': dist_formats,
370 | 'transfer_options': trans_options
371 | }
372 |
373 | def _update_dates(self, **update_props):
374 | """
375 | Update operation for ArcGIS Dates metadata
376 | :see: gis_metadata.utils.COMPLEX_DEFINITIONS[DATES]
377 | """
378 |
379 | tree_to_update = update_props['tree_to_update']
380 | xpath_root = self._data_map['_dates_root']
381 |
382 | if self.dates:
383 | date_type = self.dates[DATE_TYPE]
384 |
385 | # First remove all date info from common root
386 | remove_element(tree_to_update, xpath_root)
387 |
388 | if date_type == DATE_TYPE_MULTIPLE:
389 | xpath_root += '/TempExtent/TM_Instant'
390 | elif date_type == DATE_TYPE_RANGE:
391 | xpath_root += '/TempExtent/TM_Period'
392 |
393 | return super(ArcGISParser, self)._update_dates(xpath_root, **update_props)
394 |
395 | def _update_report_item(self, **update_props):
396 | """ Update the text for each element at the configured path if attribute matches """
397 |
398 | tree_to_update = update_props['tree_to_update']
399 | prop = update_props['prop']
400 | values = wrap_value(update_props['values'])
401 | xroot = self._get_xroot_for(prop)
402 |
403 | attr_key = 'type'
404 | attr_val = u''
405 |
406 | if prop == 'attribute_accuracy':
407 | attr_val = 'DQQuanAttAcc'
408 | elif prop == 'dataset_completeness':
409 | attr_val = 'DQCompOm'
410 |
411 | # Clear (make empty) all elements of the appropriate type
412 | for elem in get_elements(tree_to_update, xroot):
413 | if get_element_attributes(elem).get(attr_key) == attr_val:
414 | clear_element(elem)
415 |
416 | # Remove all empty elements, including those previously cleared
417 | remove_empty_element(tree_to_update, xroot)
418 |
419 | # Insert elements with correct attributes for each new value
420 |
421 | attrs = {attr_key: attr_val}
422 | updated = []
423 |
424 | for idx, value in enumerate(values):
425 | elem = insert_element(tree_to_update, idx, xroot, **attrs)
426 | updated.append(insert_element(elem, idx, 'measDesc', value))
427 |
428 | return updated
429 |
430 | def _update_raster_info(self, **update_props):
431 | """ Derives multiple dimensions from a single raster_info complex struct """
432 |
433 | tree_to_update = update_props['tree_to_update']
434 | prop = update_props['prop']
435 | values = update_props.pop('values')
436 |
437 | # Update number of dimensions at raster_info root (applies to all dimensions below)
438 |
439 | xroot, xpath = None, self._data_map['_ri_num_dims']
440 | raster_info = [update_property(tree_to_update, xroot, xpath, prop, values.get('dimensions', u''))]
441 |
442 | # Derive vertical, longitude, and latitude dimensions from raster_info
443 |
444 | xpath_root = self._get_xroot_for(prop)
445 | xpath_map = self._data_structures[prop]
446 |
447 | v_dimension = {}
448 | if values.get('vertical_count'):
449 | v_dimension = v_dimension.fromkeys(xpath_map, u'')
450 | v_dimension['type'] = 'vertical'
451 | v_dimension['size'] = values.get('vertical_count', u'')
452 |
453 | x_dimension = {}
454 | if values.get('column_count') or values.get('x_resolution'):
455 | x_dimension = x_dimension.fromkeys(xpath_map, u'')
456 | x_dimension['type'] = 'column'
457 | x_dimension['size'] = values.get('column_count', u'')
458 | x_dimension['value'] = values.get('x_resolution', u'')
459 |
460 | y_dimension = {}
461 | if values.get('row_count') or values.get('y_resolution'):
462 | y_dimension = y_dimension.fromkeys(xpath_map, u'')
463 | y_dimension['type'] = 'row'
464 | y_dimension['size'] = values.get('row_count', u'')
465 | y_dimension['value'] = values.get('y_resolution', u'')
466 |
467 | # Update derived dimensions as complex list, and append affected elements for return
468 |
469 | update_props['prop'] = RASTER_DIMS
470 | update_props['values'] = [v_dimension, x_dimension, y_dimension]
471 |
472 | raster_info += update_complex_list(xpath_root=xpath_root, xpath_map=xpath_map, **update_props)
473 |
474 | return raster_info
475 |
--------------------------------------------------------------------------------
/gis_metadata/exceptions.py:
--------------------------------------------------------------------------------
1 | """ A module to define metadata parsing exceptions """
2 |
3 |
4 | class ParserError(Exception):
5 | """ A class to represent all parsing exceptions """
6 |
7 | def __init__(self, msg_format, **kwargs):
8 | """
9 | Call Exception with a message formatted with named arguments from
10 | a Dictionary with values by key, or a list of named parameters.
11 | """
12 |
13 | super(ParserError, self).__init__(msg_format.format(**kwargs))
14 |
15 |
16 | class ConfigurationError(ParserError):
17 | """
18 | A class to represent problems with a parser's configuration
19 | :raised: during parsing operation when a parser is misconfigured
20 | """
21 |
22 |
23 | class InvalidContent(ParserError):
24 | """
25 | A class to represent problems with XML parsing of metadata content
26 | :raised: while reading raw data into the XML tree before parsing
27 | """
28 |
29 |
30 | class NoContent(ParserError):
31 | """
32 | A class to represent issues with empty metadata content
33 | :raised: while reading raw data into the XML tree before parsing
34 | """
35 |
36 |
37 | class ValidationError(ParserError):
38 | """
39 | A class to represent validation exceptions:
40 | :raised: after updates when validating, updating the tree, or serializing
41 | """
42 |
43 | def __init__(self, msg_format, invalid=None, missing=None, **kwargs):
44 | """ Capture missing or invalid fields and values """
45 |
46 | # Track details about the error for handling downstream
47 | self.invalid = {} if invalid is None else invalid
48 | self.missing = [] if missing is None else missing
49 |
50 | super(ValidationError, self).__init__(msg_format, **kwargs)
51 |
--------------------------------------------------------------------------------
/gis_metadata/fgdc_metadata_parser.py:
--------------------------------------------------------------------------------
1 | """ A module to contain utility FGDC metadata parsing helpers """
2 |
3 | from frozendict import frozendict
4 | from parserutils.elements import get_element_name, remove_element
5 |
6 | from gis_metadata.exceptions import InvalidContent
7 | from gis_metadata.metadata_parser import MetadataParser
8 | from gis_metadata.utils import DATE_TYPE, DATE_TYPE_SINGLE, DATE_TYPE_MULTIPLE
9 | from gis_metadata.utils import DATE_TYPE_RANGE, DATE_TYPE_RANGE_BEGIN, DATE_TYPE_RANGE_END
10 | from gis_metadata.utils import ATTRIBUTES
11 | from gis_metadata.utils import BOUNDING_BOX
12 | from gis_metadata.utils import CONTACTS
13 | from gis_metadata.utils import DATES
14 | from gis_metadata.utils import DIGITAL_FORMS
15 | from gis_metadata.utils import KEYWORDS_PLACE, KEYWORDS_STRATUM, KEYWORDS_TEMPORAL, KEYWORDS_THEME
16 | from gis_metadata.utils import LARGER_WORKS
17 | from gis_metadata.utils import PROCESS_STEPS
18 | from gis_metadata.utils import RASTER_INFO
19 | from gis_metadata.utils import COMPLEX_DEFINITIONS, ParserProperty
20 | from gis_metadata.utils import format_xpaths, update_complex
21 |
22 |
23 | FGDC_ROOT = 'metadata'
24 |
25 | # Define backup locations for contact and raster_info sub-properties
26 | FGDC_DEFINITIONS = dict({k: dict(v) for k, v in dict(COMPLEX_DEFINITIONS).items()})
27 | FGDC_DEFINITIONS[CONTACTS].update({
28 | '_name': '{_name}',
29 | '_organization': '{_organization}'
30 | })
31 | FGDC_DEFINITIONS[RASTER_INFO].update({
32 | '_x_resolution': '{_x_resolution}',
33 | '_y_resolution': '{_y_resolution}'
34 | })
35 | FGDC_DEFINITIONS = frozendict({k: frozendict(v) for k, v in FGDC_DEFINITIONS.items()})
36 |
37 | FGDC_TAG_FORMATS = frozendict({
38 | '_attributes_root': 'eainfo/detailed/attr',
39 | '_bounding_box_root': 'idinfo/spdom/bounding',
40 | '_contacts_root': 'idinfo/ptcontac',
41 | '_dates_root': 'idinfo/timeperd/timeinfo',
42 | '_digital_forms_root': 'distinfo/stdorder/digform',
43 | '_larger_works_root': 'idinfo/citation/citeinfo/lworkcit/citeinfo',
44 | '_process_steps_root': 'dataqual/lineage/procstep',
45 |
46 | '_raster_info_root': 'spdoinfo/rastinfo',
47 | '__raster_res_root': 'spref/horizsys',
48 |
49 | '_raster_resolution': 'spref/horizsys/planar/planci/coordrep',
50 | '__raster_resolution': 'spref/horizsys/geograph',
51 |
52 | 'title': 'idinfo/citation/citeinfo/title',
53 | 'abstract': 'idinfo/descript/abstract',
54 | 'purpose': 'idinfo/descript/purpose',
55 | 'supplementary_info': 'idinfo/descript/supplinf',
56 | 'online_linkages': 'idinfo/citation/citeinfo/onlink',
57 | 'originators': 'idinfo/citation/citeinfo/origin',
58 | 'publish_date': 'idinfo/citation/citeinfo/pubdate',
59 | 'data_credits': 'idinfo/datacred',
60 | CONTACTS: 'idinfo/ptcontac/cntinfo/{ct_path}',
61 | 'dist_contact_org': 'distinfo/distrib/cntinfo/cntperp/cntorg',
62 | '_dist_contact_org': 'distinfo/distrib/cntinfo/cntorgp/cntorg', # If not in cntperp
63 | 'dist_contact_person': 'distinfo/distrib/cntinfo/cntperp/cntper',
64 | '_dist_contact_person': 'distinfo/distrib/cntinfo/cntorgp/cntper', # If not in cntperp
65 | 'dist_address_type': 'distinfo/distrib/cntinfo/cntaddr/addrtype',
66 | 'dist_address': 'distinfo/distrib/cntinfo/cntaddr/address',
67 | 'dist_city': 'distinfo/distrib/cntinfo/cntaddr/city',
68 | 'dist_state': 'distinfo/distrib/cntinfo/cntaddr/state',
69 | 'dist_postal': 'distinfo/distrib/cntinfo/cntaddr/postal',
70 | 'dist_country': 'distinfo/distrib/cntinfo/cntaddr/country',
71 | 'dist_phone': 'distinfo/distrib/cntinfo/cntvoice',
72 | 'dist_email': 'distinfo/distrib/cntinfo/cntemail',
73 | 'dist_liability': 'distinfo/distliab',
74 | 'processing_fees': 'distinfo/stdorder/fees',
75 | 'processing_instrs': 'distinfo/stdorder/ordering',
76 | 'resource_desc': 'distinfo/resdesc',
77 | 'tech_prerequisites': 'distinfo/techpreq',
78 | ATTRIBUTES: 'eainfo/detailed/attr/{ad_path}',
79 | 'attribute_accuracy': 'dataqual/attracc/attraccr',
80 | BOUNDING_BOX: 'idinfo/spdom/bounding/{bbox_path}',
81 | 'dataset_completeness': 'dataqual/complete',
82 | DIGITAL_FORMS: 'distinfo/stdorder/digform/{df_path}',
83 | PROCESS_STEPS: 'dataqual/lineage/procstep/{ps_path}',
84 | LARGER_WORKS: 'idinfo/citation/citeinfo/lworkcit/citeinfo/{lw_path}',
85 | RASTER_INFO: 'spdoinfo/rastinfo/{ri_path}',
86 | 'other_citation_info': 'idinfo/citation/citeinfo/othercit',
87 | 'use_constraints': 'idinfo/useconst',
88 | DATES: 'idinfo/timeperd/timeinfo/{type_path}',
89 | KEYWORDS_PLACE: 'idinfo/keywords/place/placekey',
90 | KEYWORDS_STRATUM: 'idinfo/keywords/stratum/stratkey',
91 | KEYWORDS_TEMPORAL: 'idinfo/keywords/temporal/tempkey',
92 | KEYWORDS_THEME: 'idinfo/keywords/theme/themekey'
93 | })
94 |
95 |
96 | class FgdcParser(MetadataParser):
97 | """ A class to parse metadata files conforming to the FGDC standard """
98 |
99 | def _init_data_map(self):
100 | """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """
101 |
102 | if self._data_map is not None:
103 | return # Initiation happens once
104 |
105 | # Parse and validate the FGDC metadata root
106 |
107 | if self._xml_tree is None:
108 | fgdc_root = FGDC_ROOT
109 | else:
110 | fgdc_root = get_element_name(self._xml_tree)
111 |
112 | if fgdc_root != FGDC_ROOT:
113 | raise InvalidContent('Invalid XML root for ISO-19115 standard: {root}', root=fgdc_root)
114 |
115 | fgdc_data_map = {'_root': FGDC_ROOT}
116 | fgdc_data_structures = {}
117 |
118 | # Capture and format other complex XPATHs
119 |
120 | ad_format = FGDC_TAG_FORMATS[ATTRIBUTES]
121 | fgdc_data_structures[ATTRIBUTES] = format_xpaths(
122 | FGDC_DEFINITIONS[ATTRIBUTES],
123 | label=ad_format.format(ad_path='attrlabl'),
124 | aliases=ad_format.format(ad_path='attalias'),
125 | definition=ad_format.format(ad_path='attrdef'),
126 | definition_src=ad_format.format(ad_path='attrdefs')
127 | )
128 |
129 | bb_format = FGDC_TAG_FORMATS[BOUNDING_BOX]
130 | fgdc_data_structures[BOUNDING_BOX] = format_xpaths(
131 | FGDC_DEFINITIONS[BOUNDING_BOX],
132 | east=bb_format.format(bbox_path='eastbc'),
133 | south=bb_format.format(bbox_path='southbc'),
134 | west=bb_format.format(bbox_path='westbc'),
135 | north=bb_format.format(bbox_path='northbc')
136 | )
137 |
138 | ct_format = FGDC_TAG_FORMATS[CONTACTS]
139 | fgdc_data_structures[CONTACTS] = format_xpaths(
140 | FGDC_DEFINITIONS[CONTACTS],
141 |
142 | name=ct_format.format(ct_path='cntperp/cntper'),
143 | _name=ct_format.format(ct_path='cntorgp/cntper'), # If not in cntperp
144 |
145 | organization=ct_format.format(ct_path='cntperp/cntorg'),
146 | _organization=ct_format.format(ct_path='cntorgp/cntorg'), # If not in cntperp
147 |
148 | position=ct_format.format(ct_path='cntpos'),
149 | email=ct_format.format(ct_path='cntemail')
150 | )
151 |
152 | dt_format = FGDC_TAG_FORMATS[DATES]
153 | fgdc_data_structures[DATES] = {
154 | DATE_TYPE_MULTIPLE: dt_format.format(type_path='mdattim/sngdate/caldate'),
155 | DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='rngdates/begdate'),
156 | DATE_TYPE_RANGE_END: dt_format.format(type_path='rngdates/enddate'),
157 | DATE_TYPE_SINGLE: dt_format.format(type_path='sngdate/caldate')
158 | }
159 | fgdc_data_structures[DATES][DATE_TYPE_RANGE] = [
160 | fgdc_data_structures[DATES][DATE_TYPE_RANGE_BEGIN],
161 | fgdc_data_structures[DATES][DATE_TYPE_RANGE_END]
162 | ]
163 |
164 | df_format = FGDC_TAG_FORMATS[DIGITAL_FORMS]
165 | fgdc_data_structures[DIGITAL_FORMS] = format_xpaths(
166 | FGDC_DEFINITIONS[DIGITAL_FORMS],
167 | name=df_format.format(df_path='digtinfo/formname'),
168 | content=df_format.format(df_path='digtinfo/formcont'),
169 | decompression=df_format.format(df_path='digtinfo/filedec'),
170 | version=df_format.format(df_path='digtinfo/formvern'),
171 | specification=df_format.format(df_path='digtinfo/formspec'),
172 | access_desc=df_format.format(df_path='digtopt/onlinopt/oncomp'),
173 | access_instrs=df_format.format(df_path='digtopt/onlinopt/accinstr'),
174 | network_resource=df_format.format(df_path='digtopt/onlinopt/computer/networka/networkr')
175 | )
176 |
177 | lw_format = FGDC_TAG_FORMATS[LARGER_WORKS]
178 | fgdc_data_structures[LARGER_WORKS] = format_xpaths(
179 | FGDC_DEFINITIONS[LARGER_WORKS],
180 | title=lw_format.format(lw_path='title'),
181 | edition=lw_format.format(lw_path='edition'),
182 | origin=lw_format.format(lw_path='origin'),
183 | online_linkage=lw_format.format(lw_path='onlink'),
184 | other_citation=lw_format.format(lw_path='othercit'),
185 | date=lw_format.format(lw_path='pubdate'),
186 | place=lw_format.format(lw_path='pubinfo/pubplace'),
187 | info=lw_format.format(lw_path='pubinfo/publish')
188 | )
189 |
190 | ps_format = FGDC_TAG_FORMATS[PROCESS_STEPS]
191 | fgdc_data_structures[PROCESS_STEPS] = format_xpaths(
192 | FGDC_DEFINITIONS[PROCESS_STEPS],
193 | description=ps_format.format(ps_path='procdesc'),
194 | date=ps_format.format(ps_path='procdate'),
195 | sources=ps_format.format(ps_path='srcused')
196 | )
197 |
198 | ri_format = FGDC_TAG_FORMATS[RASTER_INFO]
199 | fgdc_data_structures[RASTER_INFO] = format_xpaths(
200 | FGDC_DEFINITIONS[RASTER_INFO],
201 |
202 | dimensions=ri_format.format(ri_path='rasttype'),
203 | row_count=ri_format.format(ri_path='rowcount'),
204 | column_count=ri_format.format(ri_path='colcount'),
205 | vertical_count=ri_format.format(ri_path='vrtcount'),
206 |
207 | x_resolution=FGDC_TAG_FORMATS['_raster_resolution'] + '/absres',
208 | _x_resolution=FGDC_TAG_FORMATS['__raster_resolution'] + '/longres',
209 | y_resolution=FGDC_TAG_FORMATS['_raster_resolution'] + '/ordres',
210 | _y_resolution=FGDC_TAG_FORMATS['__raster_resolution'] + '/latres',
211 | )
212 |
213 | # Assign XPATHS and gis_metadata.utils.ParserProperties to fgdc_data_map
214 |
215 | for prop, xpath in FGDC_TAG_FORMATS.items():
216 | if prop in (ATTRIBUTES, CONTACTS, DIGITAL_FORMS, PROCESS_STEPS):
217 | fgdc_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list)
218 |
219 | elif prop in (BOUNDING_BOX, LARGER_WORKS):
220 | fgdc_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex)
221 |
222 | elif prop == DATES:
223 | fgdc_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates)
224 |
225 | elif prop == RASTER_INFO:
226 | fgdc_data_map[prop] = ParserProperty(self._parse_complex, self._update_raster_info)
227 |
228 | else:
229 | fgdc_data_map[prop] = xpath
230 |
231 | self._data_map = fgdc_data_map
232 | self._data_structures = fgdc_data_structures
233 |
234 | def _update_dates(self, **update_props):
235 | """
236 | Update operation for FGDC Dates metadata
237 | :see: gis_metadata.utils.COMPLEX_DEFINITIONS[DATES]
238 | """
239 |
240 | tree_to_update = update_props['tree_to_update']
241 | xpath_root = self._data_map['_dates_root']
242 |
243 | if self.dates:
244 | date_type = self.dates[DATE_TYPE]
245 |
246 | if date_type == DATE_TYPE_MULTIPLE:
247 | xpath_root += '/mdattim/sngdate'
248 |
249 | elif date_type == DATE_TYPE_RANGE:
250 | xpath_root = '' # /rngdates/begdate and enddate are siblings, not cousins
251 | remove_element(tree_to_update, self._data_map['_dates_root'])
252 |
253 | return super(FgdcParser, self)._update_dates(xpath_root, **update_props)
254 |
255 | def _update_raster_info(self, **update_props):
256 | """ Ensures complete removal of raster_info given the two roots: and """
257 |
258 | xpath_map = self._data_structures[update_props['prop']]
259 |
260 | return [
261 | update_complex(xpath_root=self._data_map.get('_raster_info_root'), xpath_map=xpath_map, **update_props),
262 | update_complex(xpath_root=self._data_map.get('__raster_res_root'), xpath_map=xpath_map, **update_props)
263 | ]
264 |
--------------------------------------------------------------------------------
/gis_metadata/metadata_parser.py:
--------------------------------------------------------------------------------
1 | """ A module to contain utility metadata parsing helpers """
2 |
3 | from copy import deepcopy
4 |
5 | from parserutils.elements import create_element_tree, element_exists, element_to_string
6 | from parserutils.elements import get_element_name, get_element_tree, remove_element, write_element
7 | from parserutils.strings import DEFAULT_ENCODING
8 |
9 | from gis_metadata.exceptions import InvalidContent, NoContent
10 | from gis_metadata.utils import DATES, DATE_TYPE, DATE_VALUES
11 | from gis_metadata.utils import DATE_TYPE_RANGE, DATE_TYPE_RANGE_BEGIN, DATE_TYPE_RANGE_END
12 | from gis_metadata.utils import SUPPORTED_PROPS
13 | from gis_metadata.utils import parse_complex, parse_complex_list, parse_dates, parse_property
14 | from gis_metadata.utils import update_complex, update_complex_list, update_property, validate_any, validate_properties
15 |
16 |
17 | # Place holders for lazy, one-time FGDC & ISO imports
18 |
19 | ArcGISParser, ARCGIS_ROOTS, ARCGIS_NODES = None, None, None
20 | FgdcParser, FGDC_ROOT = None, None
21 | IsoParser, ISO_ROOTS = None, None
22 | VALID_ROOTS = None
23 |
24 |
25 | def convert_parser_to(parser, parser_or_type, metadata_props=None):
26 | """
27 | :return: a parser of type parser_or_type, initialized with the properties of parser. If parser_or_type
28 | is a type, an instance of it must contain a update method. The update method must also process
29 | the set of properties supported by MetadataParser for the conversion to have any affect.
30 | :param parser: the parser (or content or parser type) to convert to new_type
31 | :param parser_or_type: a parser (or content) or type of parser to return
32 | :see: get_metadata_parser(metadata_container) for more on how parser_or_type is treated
33 | """
34 |
35 | old_parser = parser if isinstance(parser, MetadataParser) else get_metadata_parser(parser)
36 | new_parser = get_metadata_parser(parser_or_type)
37 |
38 | for prop in (metadata_props or SUPPORTED_PROPS):
39 | setattr(new_parser, prop, deepcopy(getattr(old_parser, prop, u'')))
40 |
41 | new_parser.update()
42 |
43 | return new_parser
44 |
45 |
46 | def get_metadata_parser(metadata_container, **metadata_defaults):
47 | """
48 | Takes a metadata_container, which may be a type or instance of a parser, a dict, string, or file.
49 | :return: a new instance of a parser corresponding to the standard represented by metadata_container
50 | :see: get_parsed_content(metdata_content) for more on types of content that can be parsed
51 | """
52 |
53 | parser_type = None
54 |
55 | if isinstance(metadata_container, MetadataParser):
56 | parser_type = type(metadata_container)
57 |
58 | elif isinstance(metadata_container, type):
59 | parser_type = metadata_container
60 | metadata_container = metadata_container().update(**metadata_defaults)
61 |
62 | xml_root, xml_tree = get_parsed_content(metadata_container)
63 |
64 | # The get_parsed_content method ensures only these roots will be returned
65 |
66 | parser = None
67 |
68 | if parser_type is not None:
69 | parser = parser_type(xml_tree, **metadata_defaults)
70 | elif xml_root in ISO_ROOTS:
71 | parser = IsoParser(xml_tree, **metadata_defaults)
72 | else:
73 | has_arcgis_data = any(element_exists(xml_tree, e) for e in ARCGIS_NODES)
74 |
75 | if xml_root == FGDC_ROOT and not has_arcgis_data:
76 | parser = FgdcParser(xml_tree, **metadata_defaults)
77 | elif xml_root in ARCGIS_ROOTS:
78 | parser = ArcGISParser(xml_tree, **metadata_defaults)
79 |
80 | return parser
81 |
82 |
83 | def get_parsed_content(metadata_content):
84 | """
85 | Parses any of the following types of content:
86 | 1. XML string or file object: parses XML content
87 | 2. MetadataParser instance: deep copies xml_tree
88 | 3. Dictionary with nested objects containing:
89 | - name (required): the name of the element tag
90 | - text: the text contained by element
91 | - tail: text immediately following the element
92 | - attributes: a Dictionary containing element attributes
93 | - children: a List of converted child elements
94 |
95 | :raises InvalidContent: if the XML is invalid or does not conform to a supported metadata standard
96 | :raises NoContent: If the content passed in is null or otherwise empty
97 |
98 | :return: the XML root along with an XML Tree parsed by and compatible with element_utils
99 | """
100 |
101 | _import_parsers() # Prevents circular dependencies between modules
102 |
103 | xml_tree = None
104 |
105 | if metadata_content is None:
106 | raise NoContent('Metadata has no data')
107 | else:
108 | if isinstance(metadata_content, MetadataParser):
109 | xml_tree = deepcopy(metadata_content._xml_tree)
110 | elif isinstance(metadata_content, dict):
111 | xml_tree = get_element_tree(metadata_content)
112 | else:
113 | try:
114 | # Strip name spaces from file or XML content
115 | xml_tree = get_element_tree(metadata_content)
116 | except Exception:
117 | xml_tree = None # Several exceptions possible, outcome is the same
118 |
119 | if xml_tree is None:
120 | raise InvalidContent(
121 | 'Cannot instantiate a {parser_type} parser with invalid content to parse',
122 | parser_type=type(metadata_content).__name__
123 | )
124 |
125 | xml_root = get_element_name(xml_tree)
126 |
127 | if xml_root is None:
128 | raise NoContent('Metadata contains no data')
129 | elif xml_root not in VALID_ROOTS:
130 | content = type(metadata_content).__name__
131 | raise InvalidContent('Invalid root element for {content}: {xml_root}', content=content, xml_root=xml_root)
132 |
133 | return xml_root, xml_tree
134 |
135 |
136 | def _import_parsers():
137 | """ Lazy imports to prevent circular dependencies between this module and utils """
138 |
139 | global ARCGIS_NODES
140 | global ARCGIS_ROOTS
141 | global ArcGISParser
142 |
143 | global FGDC_ROOT
144 | global FgdcParser
145 |
146 | global ISO_ROOTS
147 | global IsoParser
148 |
149 | global VALID_ROOTS
150 |
151 | if ARCGIS_NODES is None or ARCGIS_ROOTS is None or ArcGISParser is None:
152 | from gis_metadata.arcgis_metadata_parser import ARCGIS_NODES
153 | from gis_metadata.arcgis_metadata_parser import ARCGIS_ROOTS
154 | from gis_metadata.arcgis_metadata_parser import ArcGISParser
155 |
156 | if FGDC_ROOT is None or FgdcParser is None:
157 | from gis_metadata.fgdc_metadata_parser import FGDC_ROOT
158 | from gis_metadata.fgdc_metadata_parser import FgdcParser
159 |
160 | if ISO_ROOTS is None or IsoParser is None:
161 | from gis_metadata.iso_metadata_parser import ISO_ROOTS
162 | from gis_metadata.iso_metadata_parser import IsoParser
163 |
164 | if VALID_ROOTS is None:
165 | VALID_ROOTS = {FGDC_ROOT}.union(ARCGIS_ROOTS + ISO_ROOTS)
166 |
167 |
168 | class MetadataParser(object):
169 | """
170 | A class to parent all XML metadata parsing classes. To add more fields for parsing and updating:
171 |
172 | I. If the new field contains a String or a List of Strings, do the following and skip to step III
173 |
174 | Update the dictionary of formatted tags in each child parser that needs to read in the value.
175 | Nothing more is needed, because the _init_data_map methods should be written to put all XPATHs
176 | into the data map as they are, overriding the values for only complex XML content. If an XPATH
177 | is in the data map, it will be read and written at parsing time and updating time respectively.
178 |
179 | II. If the new field contains complex XML content:
180 |
181 | A. Add the new complex definition to utils
182 | :see: gis_metadata.utils.COMPLEX_DEFINITIONS for examples of complex XML content
183 |
184 | B. Define the necessary property parsing and updating methods in the child parsers
185 |
186 | By default, XPATH values in a data map Dictionary handle Strings or Lists of Strings.
187 | If the new property requires conversion to-and-from a Dictionary, then:
188 |
189 | 1. A parse and update method will need to be defined in the child parser
190 | - Parse methods should take zero arguments and return the value in the desired format
191 | - Update methods take a **kwargs parameter and return the updated element
192 | 2. A ParserProperties must be instantiated with them and put in data map
193 |
194 | C. Update _init_data_map() to instantiate a ParserProperty for the new field
195 |
196 | The result of _init_data_map is that _data_map is defined for use in _init_metadata.
197 | The _data_map dictionary will contain identifying property names as keys, and either
198 | XPATHs or ParserProperties as values.
199 |
200 | III. If the new content is required across standards, update utils.SUPPORTED_PROPS as needed
201 |
202 | Requiring new content does not mean a value is required from the incoming metadata. Rather,
203 | it means all MetadataParser children must provide an XPATH for parsing the value, even if
204 | the XPATH provided is blank. This ensures an initialized parser will have a property named
205 | after the identifying property name, even if its value is an empty String.
206 |
207 | """
208 |
209 | def __init__(self, metadata_to_parse=None, out_file_or_path=None, metadata_props=None, **metadata_defaults):
210 | """
211 | Initialize new parser with valid content as defined by get_parsed_content
212 | :see: get_parsed_content(metdata_content) for more on what constitutes valid content
213 | """
214 |
215 | self.has_data = False
216 | self.out_file_or_path = out_file_or_path
217 |
218 | self._xml_tree = None
219 | self._data_map = None
220 | self._data_structures = None
221 | self._metadata_props = set(metadata_props or SUPPORTED_PROPS)
222 |
223 | if metadata_to_parse is not None:
224 | self._xml_root, self._xml_tree = get_parsed_content(metadata_to_parse)
225 | else:
226 | self._xml_tree = self._get_template(**metadata_defaults)
227 | self._xml_root = self._data_map['_root']
228 |
229 | self._init_metadata()
230 |
231 | def _init_metadata(self):
232 | """
233 | Dynamically sets attributes from a Dictionary passed in by children.
234 | The Dictionary will contain the name of each attribute as keys, and
235 | either an XPATH mapping to a text value in _xml_tree, or a function
236 | that takes no parameters and returns the intended value.
237 | """
238 |
239 | if self._data_map is None:
240 | self._init_data_map()
241 |
242 | validate_properties(self._data_map, self._metadata_props)
243 |
244 | # Parse attribute values and assign them: key = parse(val)
245 |
246 | for prop in self._data_map:
247 | setattr(self, prop, parse_property(self._xml_tree, None, self._data_map, prop))
248 |
249 | self.has_data = any(getattr(self, prop) for prop in self._data_map)
250 |
251 | def _init_data_map(self):
252 | """ Default data map initialization: MUST be overridden in children """
253 |
254 | if self._data_map is None:
255 | self._data_map = {'_root': None}
256 | self._data_map.update({}.fromkeys(self._metadata_props))
257 |
258 | def _get_template(self, root=None, **metadata_defaults):
259 | """ Iterate over items metadata_defaults {prop: val, ...} to populate template """
260 |
261 | if root is None:
262 | if self._data_map is None:
263 | self._init_data_map()
264 |
265 | root = self._xml_root = self._data_map['_root']
266 |
267 | template_tree = self._xml_tree = create_element_tree(root)
268 |
269 | for prop, val in metadata_defaults.items():
270 | path = self._data_map.get(prop)
271 | if path and val:
272 | setattr(self, prop, val)
273 | update_property(template_tree, None, path, prop, val)
274 |
275 | return template_tree
276 |
277 | def _get_xpath_for(self, prop):
278 | """ :return: the configured xpath for a given property """
279 |
280 | xpath = self._data_map.get(prop)
281 | return getattr(xpath, 'xpath', xpath) # May be a ParserProperty
282 |
283 | def _get_xroot_for(self, prop):
284 | """ :return: the configured root for a given property based on the property name """
285 |
286 | return self._get_xpath_for(f'_{prop}_root')
287 |
288 | def _parse_complex(self, prop):
289 | """ Default parsing operation for a complex struct """
290 |
291 | xpath_root = None
292 | xpath_map = self._data_structures[prop]
293 |
294 | return parse_complex(self._xml_tree, xpath_root, xpath_map, prop)
295 |
296 | def _parse_complex_list(self, prop):
297 | """ Default parsing operation for lists of complex structs """
298 |
299 | xpath_root = self._get_xroot_for(prop)
300 | xpath_map = self._data_structures[prop]
301 |
302 | return parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop)
303 |
304 | def _parse_dates(self, prop=DATES):
305 | """ Creates and returns a Date Types data structure parsed from the metadata """
306 |
307 | return parse_dates(self._xml_tree, self._data_structures[prop])
308 |
309 | def _update_complex(self, **update_props):
310 | """ Default update operation for a complex struct """
311 |
312 | prop = update_props['prop']
313 | xpath_root = self._get_xroot_for(prop)
314 | xpath_map = self._data_structures[prop]
315 |
316 | return update_complex(xpath_root=xpath_root, xpath_map=xpath_map, **update_props)
317 |
318 | def _update_complex_list(self, **update_props):
319 | """ Default update operation for lists of complex structs """
320 |
321 | prop = update_props['prop']
322 | xpath_root = self._get_xroot_for(prop)
323 | xpath_map = self._data_structures[prop]
324 |
325 | return update_complex_list(xpath_root=xpath_root, xpath_map=xpath_map, **update_props)
326 |
327 | def _update_dates(self, xpath_root=None, **update_props):
328 | """
329 | Default update operation for Dates metadata
330 | :see: gis_metadata.utils.COMPLEX_DEFINITIONS[DATES]
331 | """
332 |
333 | tree_to_update = update_props['tree_to_update']
334 | prop = update_props['prop']
335 | values = (update_props['values'] or {}).get(DATE_VALUES) or u''
336 | xpaths = self._data_structures[prop]
337 |
338 | if not self.dates:
339 | date_xpaths = xpath_root
340 | elif self.dates[DATE_TYPE] != DATE_TYPE_RANGE:
341 | date_xpaths = xpaths.get(self.dates[DATE_TYPE], u'')
342 | else:
343 | date_xpaths = [
344 | xpaths[DATE_TYPE_RANGE_BEGIN],
345 | xpaths[DATE_TYPE_RANGE_END]
346 | ]
347 |
348 | if xpath_root:
349 | remove_element(tree_to_update, xpath_root)
350 |
351 | return update_property(tree_to_update, xpath_root, date_xpaths, prop, values)
352 |
353 | def convert_to(self, new_parser_or_type):
354 | """
355 | :return: a parser initialized with this parser's data. If new_parser_or_type is to be treated
356 | as a parser, it must have
357 | :param new_parser_or_type: a new parser to initialize, or parser type to instantiate
358 | """
359 |
360 | try:
361 | to_dict = issubclass(new_parser_or_type, dict)
362 | except TypeError:
363 | to_dict = isinstance(new_parser_or_type, dict)
364 |
365 | if to_dict:
366 | return {p: getattr(self, p) for p in self._metadata_props if p[0] != '_'}
367 | else:
368 | return convert_parser_to(self, new_parser_or_type, self._metadata_props)
369 |
370 | def serialize(self, use_template=False):
371 | """
372 | Validates instance properties, writes them to an XML tree, and returns the content as a string.
373 | :param use_template: if True, updates a new template XML tree; otherwise the original XML tree
374 | """
375 | return element_to_string(self.update(use_template))
376 |
377 | def write(self, use_template=False, out_file_or_path=None, encoding=DEFAULT_ENCODING):
378 | """
379 | Validates instance properties, updates an XML tree with them, and writes the content to a file.
380 | :param use_template: if True, updates a new template XML tree; otherwise the original XML tree
381 | :param out_file_or_path: optionally override self.out_file_or_path with a custom file path
382 | :param encoding: optionally use another encoding instead of UTF-8
383 | """
384 |
385 | if not out_file_or_path:
386 | out_file_or_path = self.out_file_or_path
387 |
388 | if not out_file_or_path:
389 | raise IOError('Output file path has not been provided')
390 |
391 | write_element(self.update(use_template), out_file_or_path, encoding)
392 |
393 | def update(self, use_template=False, **metadata_defaults):
394 | """
395 | Validates instance properties and updates either a template or the original XML tree with them.
396 | :param use_template: if True, updates a new template XML tree; otherwise the original XML tree
397 | """
398 |
399 | self.validate()
400 |
401 | tree_to_update = self._xml_tree if not use_template else self._get_template(**metadata_defaults)
402 | supported_props = self._metadata_props
403 |
404 | for prop, xpath in self._data_map.items():
405 | if not prop.startswith('_') or prop.strip('_') in supported_props:
406 | # Send only public or alternate properties
407 | update_property(
408 | tree_to_update, self._get_xroot_for(prop), xpath, prop, getattr(self, prop, u''), supported_props
409 | )
410 |
411 | return tree_to_update
412 |
413 | def validate(self):
414 | """ Default validation for updated properties: MAY be overridden in children """
415 |
416 | validate_properties(self._data_map, self._metadata_props)
417 |
418 | for prop in self._data_map:
419 | validate_any(prop, getattr(self, prop), self._data_structures.get(prop))
420 |
421 | return self
422 |
--------------------------------------------------------------------------------
/gis_metadata/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/consbio/gis-metadata-parser/a3cac07857bc425185d43ec819aece1a9533ec8c/gis_metadata/tests/__init__.py
--------------------------------------------------------------------------------
/gis_metadata/tests/data/arcgis_metadata.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Attributes Label 1
7 | Attributes Alias 1
8 | Attributes Definition 1
9 | Attributes Definition Source 1
10 |
11 |
12 |
13 | Attributes Label 2
14 | Attributes Alias 2
15 | Attributes Definition 2
16 | Attributes Definition Source 2
17 |
18 |
19 |
20 | Attributes Label 3
21 | Attributes Alias 3
22 | Attributes Definition 3
23 | Attributes Definition Source 3
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 | FGDC CSDGM Metadata
35 | 1.0
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 | ArcGIS Metadata
46 | 1.0
47 |
48 |
49 |
50 | Test Distribution Org
51 | Test Distribution Person
52 |
53 |
54 | Test Distribution Phone
55 |
56 |
57 | Test Distribution Address
58 | Test Distribution City
59 | OR
60 | 12345
61 | US
62 | Test Distribution Email
63 |
64 | 8:00 AM - 5:00 PM PST
65 | Backup Distribution Phone
66 |
67 |
68 |
69 |
70 | Test Processing Fees
71 |
72 | Test Processing Instructions
73 |
74 |
75 |
76 |
77 | Digital Form Name 1
78 | Digital Form Version 1
79 | Digital Form Content 1
80 | Digital Form Specification 1
81 | Digital Form Decompression 1
82 |
83 |
84 |
85 | Digital Form Resource 1
86 | Digital Form Access Instructions 1
87 | Digital Form Access Description 1
88 |
89 |
90 |
91 | Digital Form Name 2
92 | Digital Form Version 2
93 | Digital Form Content 2
94 | Digital Form Specification 2
95 | Digital Form Decompression 2
96 |
97 |
98 |
99 | Digital Form Resource 2
100 | Digital Form Access Instructions 2
101 | Digital Form Access Description 2
102 |
103 |
104 |
105 |
106 |
107 |
108 | Larger Works Title
109 | Larger Works Edition
110 | Larger Works Other Citation
111 |
112 | Larger Works Originator
113 | Larger Works Info
114 |
115 |
116 | Larger Works Place
117 |
118 |
119 | http://test.largerworks.online.linkage.com
120 |
121 |
122 |
123 |
124 | Larger Works Date
125 |
126 |
127 |
128 |
129 | Test Title
130 |
131 | Test Originators
132 |
133 |
134 | Test Other Citation Info
135 |
136 |
137 |
138 | http://test.onlinelinkages.org
139 |
140 |
141 |
142 |
143 |
144 | http://backup.onlinelinkages.org
145 |
146 |
147 | Test Resource Description
148 |
149 |
150 | Test Publish Date
151 |
152 |
153 | vector digital data
154 |
155 |
156 | Test Abstract
157 | Test Purpose
158 | Test Data Credits
159 |
160 | Test Resource Description
161 |
162 |
163 |
164 | Contact Name 1
165 | Contact Organization 1
166 | Contact Position 1
167 |
168 |
169 | Contact Email 1
170 |
171 |
172 |
173 |
174 |
175 | Contact Name 2
176 | Contact Organization 2
177 | Contact Position 2
178 |
179 |
180 | Contact Email 2
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 | Oregon
190 | Washington
191 |
192 |
193 | Layer One
194 | Layer Two
195 |
196 |
197 | Now
198 | Later
199 |
200 |
201 | Ecoregion
202 |
203 |
204 | Risk
205 | Threat
206 | Habitat
207 |
208 |
209 | ArcGIS Discipline One
210 | ArcGIS Discipline Two
211 |
212 |
213 | ArcGIS Other One
214 | ArcGIS Other Two
215 |
216 |
217 | ArcGIS Product One
218 | ArcGIS Product Two
219 |
220 |
221 | ArcGIS Search One
222 | ArcGIS Search Two
223 |
224 |
225 | ArcGIS Topical One
226 | ArcGIS Topical Two
227 |
228 |
229 |
230 | Test Distribution Liability
231 |
232 |
233 |
234 |
235 | Test Use Constraints
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 | Multiple Date 1
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 | Multiple Date 2
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 | Multiple Date 3
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 | Test Supplementary Info
277 | Test Technical Prerequisites
278 |
279 |
280 |
281 |
282 | 1
283 | -179.99999999998656
284 | 179.99999999998656
285 | 87.81211601444309
286 | -86.78249642712764
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 | Process Step Description 1
295 | Process Step Date 1
296 |
297 | Process Step Sources 1.1
298 |
299 |
300 | Process Step Sources 1.2
301 |
302 |
303 |
304 | Process Step Description 2
305 |
306 |
307 |
308 | Process Step Date 3
309 |
310 |
311 |
312 | Process Step Description 4
313 |
314 | Process Step Sources 4.1
315 |
316 |
317 | Process Step Sources 4.2
318 |
319 |
320 |
321 |
322 | Test Attribute Accuracy
323 |
324 |
325 | Test Dataset Completeness
326 |
327 |
328 |
329 |
330 | dataset
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 | Test Vertical Count
339 |
340 |
341 | Test Column Count
342 |
343 | Test X Resolution
344 |
345 |
346 |
347 | Test Row Count
348 |
349 | Test Y Resolution
350 |
351 |
352 | Test # Dimensions
353 |
354 |
355 |
356 |
--------------------------------------------------------------------------------
/gis_metadata/tests/data/fgdc_metadata.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Test Abstract
6 | Test Purpose
7 | Test Supplementary Info
8 |
9 |
10 |
11 | Test Originators
12 | Test Publish Date
13 | Test Title
14 | http://test.onlinelinkages.org
15 | Test Other Citation Info
16 |
17 |
18 |
19 |
20 |
21 |
22 | Larger Works Originator
23 | Larger Works Date
24 | Larger Works Title
25 | Larger Works Edition
26 | Larger Works Other Citation
27 | http://test.largerworks.online.linkage.com
28 |
29 | Larger Works Info
30 | Larger Works Place
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 | Date Range Start
40 | Date Range End
41 |
42 |
43 |
44 | Multiple Date 1
45 |
46 |
47 | Multiple Date 2
48 |
49 |
50 | Multiple Date 3
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 | -179.99999999998656
63 | 179.99999999998656
64 | 87.81211601444309
65 | -86.78249642712764
66 |
67 |
68 |
69 |
70 | Oregon
71 | Washington
72 |
73 |
74 | Layer One
75 | Layer Two
76 |
77 |
78 | Now
79 | Later
80 |
81 |
82 | Ecoregion
83 | Risk
84 | Threat
85 | Habitat
86 |
87 |
88 | Test Use Constraints
89 | Test Data Credits
90 |
91 |
92 | Contact Email 1
93 | Contact Position 1
94 |
95 | Contact Name 1
96 | Contact Organization 1
97 |
98 |
99 |
100 |
101 |
102 | Contact Email 2
103 | Contact Position 2
104 |
105 | Contact Name 2
106 | Contact Organization 2
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 | Test Distribution Org
116 | Test Distribution Person
117 |
118 |
119 | Test Distribution Address Type
120 | Test Distribution Address
121 | Test Distribution City
122 | OR
123 | 12345
124 | US
125 |
126 | Test Distribution Phone
127 | Test Distribution Email
128 |
129 |
130 | Test Resource Description
131 | Test Distribution Liability
132 |
133 |
134 |
135 | Digital Form Name 1
136 | Digital Form Version 1
137 | Digital Form Content 1
138 | Digital Form Specification 1
139 | Digital Form Decompression 1
140 |
141 |
142 |
143 |
144 |
145 | Digital Form Resource 1
146 |
147 |
148 | Digital Form Access Instructions 1
149 | Digital Form Access Description 1
150 |
151 |
152 |
153 |
154 |
155 | Digital Form Name 2
156 | Digital Form Version 2
157 | Digital Form Content 2
158 | Digital Form Specification 2
159 | Digital Form Decompression 2
160 |
161 |
162 |
163 |
164 |
165 | Digital Form Resource 2
166 |
167 |
168 | Digital Form Access Instructions 2
169 | Digital Form Access Description 2
170 |
171 |
172 |
173 | Test Processing Fees
174 | Test Processing Instructions
175 |
176 | Test Technical Prerequisites
177 |
178 |
179 |
180 |
181 | Attributes Label 1
182 | Attributes Alias 1
183 | Attributes Definition 1
184 | Attributes Definition Source 1
185 |
186 |
187 |
188 | Attributes Label 2
189 | Attributes Alias 2
190 | Attributes Definition 2
191 | Attributes Definition Source 2
192 |
193 |
194 |
195 | Attributes Label 3
196 | Attributes Alias 3
197 | Attributes Definition 3
198 | Attributes Definition Source 3
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 | 20070210
207 |
208 |
209 |
210 | 20070418
211 |
212 |
213 |
214 | Process Step Description 1
215 | Process Step Date 1
216 | Process Step Sources 1.1
217 | Process Step Sources 1.2
218 |
219 |
220 | Process Step Description 2
221 |
222 |
223 |
224 | Process Step Date 3
225 |
226 |
227 |
228 |
229 | Process Step Description 4
230 |
231 | Process Step Sources 4.1
232 | Process Step Sources 4.2
233 |
234 |
235 | Test Dataset Completeness
236 |
237 | Test Attribute Accuracy
238 |
239 |
240 |
241 |
242 | Test Column Count
243 | Test Vertical Count
244 | Test Row Count
245 | Test # Dimensions
246 |
247 |
248 |
249 |
250 |
251 |
252 | Test Backup Y Resolution
253 | Test Backup X Resolution
254 |
255 |
256 |
257 | Custom Projection
258 |
259 | 7
260 | 8
261 | 9
262 | 22
263 | 11
264 |
265 |
266 |
267 |
268 | Test Y Resolution
269 | Test X Resolution
270 |
271 |
272 |
273 |
274 |
275 |
276 |
--------------------------------------------------------------------------------
/gis_metadata/tests/data/iso_citation_href.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | HREF Attributes Alias 1
9 |
10 |
11 | HREF Attributes Definition 1
12 |
13 |
14 | HREF Attributes Label 1
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | HREF Attributes Definition Source 1
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 | HREF Attributes Alias 2
41 |
42 |
43 | HREF Attributes Definition 2
44 |
45 |
46 | HREF Attributes Label 2
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 | HREF Attributes Definition Source 2
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 | HREF Attributes Alias 3
77 |
78 |
79 | HREF Attributes Definition 3
80 |
81 |
82 | HREF Attributes Label 3
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 | HREF Attributes Definition Source 3
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
--------------------------------------------------------------------------------
/gis_metadata/tests/data/iso_citation_linkage.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | LINKAGE Attributes Alias 1
9 |
10 |
11 | LINKAGE Attributes Definition 1
12 |
13 |
14 | LINKAGE Attributes Label 1
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | LINKAGE Attributes Definition Source 1
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 | LINKAGE Attributes Alias 2
45 |
46 |
47 | LINKAGE Attributes Definition 2
48 |
49 |
50 | LINKAGE Attributes Label 2
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 | LINKAGE Attributes Definition Source 2
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 | LINKAGE Attributes Alias 3
77 |
78 |
79 | LINKAGE Attributes Definition 3
80 |
81 |
82 | LINKAGE Attributes Label 3
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 | LINKAGE Attributes Definition Source 3
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
--------------------------------------------------------------------------------
/gis_metadata/tests/data/iso_metadata.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | eng
5 | esp
6 |
7 |
8 |
9 |
10 | Custom Contact Name
11 |
12 |
13 | Custom Contact Organization
14 |
15 |
16 | Custom Contact Position
17 |
18 |
19 |
20 |
21 |
22 |
23 | Custom Contact Phone
24 |
25 |
26 |
27 |
28 |
29 |
30 | Custom Contact Email
31 |
32 |
33 |
34 |
35 |
36 |
37 | pointOfContact
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 | Attributes Title (unused)
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 | ftp://ftp.ncddc.noaa.gov/pub/Metadata//ISO/87ffdfd0-775a-11e0-a1f0-0800200c9a66.xml
56 |
57 |
58 | ftp
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 | Attributes Alias 1
76 |
77 |
78 | Attributes Definition 1
79 |
80 |
81 | Attributes Label 1
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 | Attributes Definition Source 1
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 | Attributes Alias 2
108 |
109 |
110 | Attributes Definition 2
111 |
112 |
113 | Attributes Label 2
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 | Attributes Definition Source 2
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 | Attributes Alias 3
140 |
141 |
142 | Attributes Definition 3
143 |
144 |
145 | Attributes Label 3
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 | Attributes Definition Source 3
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 | Larger Works Title
178 |
179 |
180 | Larger Works Other Citation
181 |
182 |
183 | Larger Works Collective Title (not currently used)
184 |
185 |
186 | Larger Works Edition
187 |
188 |
189 | Larger Works Date
190 |
191 |
192 |
193 |
194 | Larger Works Originator
195 |
196 |
197 | Larger Works Info
198 |
199 |
200 |
201 |
202 |
203 |
204 | Larger Works Place
205 |
206 |
207 |
208 |
209 |
210 |
211 | http://test.largerworks.online.linkage.com
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 | Test Title
227 |
228 |
229 | Test Other Citation Info
230 |
231 |
232 |
233 |
234 | Test Publish Date
235 |
236 |
237 | Test Publish Date Type
238 |
239 |
240 |
241 |
242 |
243 |
244 | Test Originators
245 |
246 |
247 | originator
248 |
249 |
250 |
251 |
252 |
253 |
254 | http://test.onlinelinkages.org
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 | Test Abstract
266 |
267 |
268 | Test Technical Prerequisites
269 |
270 |
271 | Test Purpose
272 |
273 |
274 | Test Data Credits
275 |
276 |
277 |
278 |
279 | Contact Name 1
280 |
281 |
282 | Contact Organization 1
283 |
284 |
285 | Contact Position 1
286 |
287 |
288 |
289 |
290 |
291 |
292 | Contact Email 1
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 | Contact Name 2
304 |
305 |
306 | Contact Organization 2
307 |
308 |
309 | Contact Position 2
310 |
311 |
312 |
313 |
314 |
315 |
316 | Contact Email 2
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 | Test Use Constraints
328 |
329 |
330 |
331 |
332 |
333 |
334 | Test Distribution Liability
335 |
336 |
337 |
338 |
339 |
340 |
341 | Test Resource Description
342 |
343 |
344 |
345 |
346 |
347 |
348 | Ecoregion
349 |
350 |
351 | Risk
352 |
353 |
354 | Threat
355 |
356 |
357 | Habitat
358 |
359 |
360 | theme
361 |
362 |
363 |
364 |
365 |
366 |
367 | Layer One
368 |
369 |
370 | Layer Two
371 |
372 |
373 | stratum
374 |
375 |
376 |
377 |
378 |
379 |
380 | Now
381 |
382 |
383 | Later
384 |
385 |
386 | temporal
387 |
388 |
389 |
390 |
391 |
392 |
393 | Oregon
394 |
395 |
396 | place
397 |
398 |
399 |
400 |
401 |
402 |
403 | Washington
404 |
405 |
406 | place
407 |
408 |
409 |
410 |
411 |
412 |
413 | publication date
414 |
415 |
416 |
417 |
418 |
419 | Multiple Date 1
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 | Multiple Date 2
429 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 | Multiple Date 3
438 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
449 | -179.99999999998656
450 |
451 |
452 | 179.99999999998656
453 |
454 |
455 | -86.78249642712764
456 |
457 |
458 | 87.81211601444309
459 |
460 |
461 |
462 |
463 |
464 |
465 | Test Supplementary Info
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 |
474 | Digital Form Name 1
475 |
476 |
477 | Digital Form Decompression 1
478 |
479 |
480 |
481 | Digital Form Specification 1
482 | @------------------------------@
483 | Digital Form Content 1
484 |
485 |
486 |
487 | Digital Form Version 1
488 |
489 |
490 |
491 |
492 |
493 |
494 | Digital Form Name 2
495 |
496 |
497 | Digital Form Decompression 2
498 |
499 |
500 |
501 | Digital Form Specification 2
502 |
503 | @------------------------------@
504 |
505 | Digital Form Content 2
506 |
507 |
508 |
509 | Digital Form Version 2
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
518 | Digital Form Access Instructions 1
519 |
520 |
521 | Digital Form Resource 1
522 |
523 |
524 | Digital Form Access Description 1
525 |
526 |
527 |
528 |
529 |
530 |
531 | Digital Form Resource 2
532 |
533 |
534 | Digital Form Access Instructions 2
535 |
536 |
537 | Digital Form Access Description 2
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
546 |
547 |
548 | Test Distribution Person
549 |
550 |
551 | Test Distribution Org
552 |
553 |
554 |
555 |
556 |
557 |
558 | Test Distribution Phone
559 |
560 |
561 |
562 |
563 |
564 |
565 | Test Distribution Address
566 |
567 |
568 | Test Distribution City
569 |
570 |
571 | OR
572 |
573 |
574 | 12345
575 |
576 |
577 | US
578 |
579 |
580 | Test Distribution Email
581 |
582 |
583 |
584 |
585 |
586 |
587 | distributor
588 |
589 |
590 |
591 |
592 |
593 |
594 | Test Processing Fees
595 |
596 |
597 | Test Processing Instructions
598 |
599 |
600 |
601 |
602 |
603 |
604 |
605 |
606 |
607 |
608 |
609 |
610 | Test Dataset Completeness
611 |
612 |
613 |
614 |
615 |
616 |
617 | Test Attribute Accuracy
618 |
619 |
620 |
621 |
622 |
623 |
624 |
625 |
626 | Process Step Description 1
627 |
628 |
629 | Process Step Date 1
630 |
631 |
632 |
633 |
634 |
635 |
636 | Process Step Sources 1 (not used)
637 |
638 |
639 | Process Step Sources 1.1
640 |
641 |
642 | Process Step Sources 1.2
643 |
644 |
645 |
646 |
647 |
648 |
649 |
650 |
651 |
652 |
653 | Process Step Description 2
654 |
655 |
656 |
657 |
658 |
659 |
660 | Process Step Date 3
661 |
662 |
663 |
664 |
665 |
666 |
667 | Process Step Description 4
668 |
669 |
670 |
671 |
672 |
673 |
674 | Process Step Sources 4.1
675 |
676 |
677 | Process Step Sources 4.2
678 |
679 |
680 |
681 |
682 |
683 |
684 |
685 |
686 |
687 |
688 |
689 |
690 |
691 |
692 |
693 |
694 |
695 |
696 |
697 |
698 | Test Vertical Count
699 |
700 |
701 |
702 |
703 |
704 |
705 | Test X Resolution
706 |
707 |
708 | column
709 |
710 |
711 | Test Column Count
712 |
713 |
714 |
715 |
716 |
717 |
718 | Test Y Resolution
719 |
720 |
721 |
722 |
723 |
724 | Test Row Count
725 |
726 |
727 |
728 |
729 | Test # Dimensions
730 |
731 |
732 |
733 |
734 |
--------------------------------------------------------------------------------
/gis_metadata/tests/data/utility_metadata.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Test Abstract
6 | Test Purpose
7 | Test Supplementary Info
8 |
9 |
10 |
11 | Test Originators
12 | Test Publish Date
13 | Test Title
14 | http://test.onlinelinkages.org
15 | Test Other Citation Info
16 |
17 |
18 |
19 |
20 |
21 |
22 | Larger Works Originator
23 | Larger Works Date
24 | Larger Works Title
25 | Larger Works Edition
26 | Larger Works Other Citation
27 | http://test.largerworks.online.linkage.com
28 |
29 | Larger Works Info
30 | Larger Works Place
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 | Date Range Start 1
40 | Date Range Start 2
41 | Date Range End 1
42 | Date Range End 2
43 |
44 |
45 |
46 | Multiple Date 1
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 | -179.99999999998656
59 | 179.99999999998656
60 | 87.81211601444309
61 | -86.78249642712764
62 |
63 |
64 |
65 |
66 | Oregon
67 | Washington
68 |
69 |
70 | Layer One
71 | Layer Two
72 |
73 |
74 | Now
75 | Later
76 |
77 |
78 | Ecoregion
79 | Risk
80 | Threat
81 | Habitat
82 |
83 |
84 | Test Use Constraints
85 | Test Data Credits
86 |
87 |
88 | Contact Email 1
89 | Contact Position 1
90 |
91 | Contact Name 1
92 | Contact Organization 1
93 |
94 |
95 |
96 |
97 |
98 | Contact Email 2
99 | Contact Position 2
100 |
101 | Contact Name 2
102 | Contact Organization 2
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 | Test Distribution Org
112 | Test Distribution Person
113 |
114 |
115 | Test Distribution Address Type
116 | Test Distribution Address
117 | Test Distribution City
118 | OR
119 | 12345
120 | US
121 |
122 | Test Distribution Phone
123 | Test Distribution Email
124 |
125 |
126 | Test Resource Description
127 | Test Distribution Liability
128 |
129 |
130 |
131 | Digital Form Name 1
132 | Digital Form Version 1
133 | Digital Form Content 1
134 | Digital Form Specification 1
135 | Digital Form Decompression 1
136 |
137 |
138 |
139 |
140 |
141 | Digital Form Resource 1
142 |
143 |
144 | Digital Form Access Instructions 1
145 | Digital Form Access Description 1
146 |
147 |
148 |
149 |
150 |
151 | Digital Form Name 2
152 | Digital Form Version 2
153 | Digital Form Content 2
154 | Digital Form Specification 2
155 | Digital Form Decompression 2
156 |
157 |
158 |
159 |
160 |
161 | Digital Form Resource 2
162 |
163 |
164 | Digital Form Access Instructions 2
165 | Digital Form Access Description 2
166 |
167 |
168 |
169 | Test Processing Fees
170 | Test Processing Instructions
171 |
172 | Test Technical Prerequisites
173 |
174 |
175 |
176 |
177 | Attributes Label 1
178 | Attributes Alias 1
179 | Attributes Definition 1
180 | Attributes Definition Source 1
181 |
182 |
183 |
184 | Attributes Label 2
185 | Attributes Alias 2
186 | Attributes Definition 2
187 | Attributes Definition Source 2
188 |
189 |
190 |
191 | Attributes Label 3
192 | Attributes Alias 3
193 | Attributes Definition 3
194 | Attributes Definition Source 3
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 | 20070210
203 |
204 |
205 |
206 | 20070418
207 |
208 |
209 |
210 | Process Step Description 1
211 | Process Step Date 1
212 | Process Step Sources 1.1
213 | Process Step Sources 1.2
214 |
215 |
216 | Process Step Description 2
217 |
218 |
219 |
220 | Process Step Date 3
221 |
222 |
223 |
224 |
225 | Process Step Description 4
226 |
227 | Process Step Sources 4.1
228 | Process Step Sources 4.2
229 |
230 |
231 | Test Dataset Completeness
232 |
233 | Test Attribute Accuracy
234 |
235 |
236 |
237 |
238 | Test Column Count
239 | Test Vertical Count
240 | Test Row Count
241 | Test # Dimensions
242 |
243 |
244 |
245 |
246 |
247 |
248 | Test Backup Y Resolution
249 | Test Backup X Resolution
250 |
251 |
252 |
253 |
254 | Test Y Resolution
255 | Test X Resolution
256 |
257 |
258 |
259 |
260 |
261 |
262 |
--------------------------------------------------------------------------------
/gis_metadata/utils.py:
--------------------------------------------------------------------------------
1 | """ Data structures and functionality used by all Metadata Parsers """
2 |
3 | from frozendict import frozendict
4 |
5 | from parserutils.collections import filter_empty, flatten_items, reduce_value, wrap_value
6 | from parserutils.elements import get_element, get_elements, get_elements_attributes, get_elements_text
7 | from parserutils.elements import insert_element, remove_element
8 | from parserutils.elements import remove_element_attributes, set_element_attributes
9 | from parserutils.elements import XPATH_DELIM
10 |
11 | from gis_metadata.exceptions import ConfigurationError, ValidationError
12 |
13 |
14 | # Generic identifying property name constants
15 |
16 | KEYWORDS_PLACE = 'place_keywords'
17 | KEYWORDS_STRATUM = 'stratum_keywords'
18 | KEYWORDS_TEMPORAL = 'temporal_keywords'
19 | KEYWORDS_THEME = 'thematic_keywords'
20 |
21 |
22 | # Identifying property name constants for all complex definitions
23 |
24 | ATTRIBUTES = 'attributes'
25 | BOUNDING_BOX = 'bounding_box'
26 | CONTACTS = 'contacts'
27 | DATES = 'dates'
28 | DIGITAL_FORMS = 'digital_forms'
29 | LARGER_WORKS = 'larger_works'
30 | PROCESS_STEPS = 'process_steps'
31 | RASTER_INFO = 'raster_info'
32 | RASTER_DIMS = '_raster_dims'
33 |
34 |
35 | # Grouping property name constants for complex definitions
36 |
37 | _COMPLEX_DELIM = '\n'
38 | _COMPLEX_LISTS = frozenset({
39 | ATTRIBUTES, CONTACTS, DIGITAL_FORMS, PROCESS_STEPS,
40 | KEYWORDS_PLACE, KEYWORDS_STRATUM, KEYWORDS_TEMPORAL, KEYWORDS_THEME,
41 | })
42 | _COMPLEX_STRUCTS = frozenset({BOUNDING_BOX, DATES, LARGER_WORKS, RASTER_INFO})
43 | _COMPLEX_WITH_MULTI = frozendict({
44 | DATES: {'values'},
45 | LARGER_WORKS: {'origin'},
46 | PROCESS_STEPS: {'sources'}
47 | })
48 |
49 |
50 | # Date specific constants for the DATES complex structure
51 |
52 | DATE_TYPE_MISSING = ''
53 | DATE_TYPE_MULTIPLE = 'multiple'
54 | DATE_TYPE_RANGE = 'range'
55 | DATE_TYPE_RANGE_BEGIN = 'range_begin'
56 | DATE_TYPE_RANGE_END = 'range_end'
57 | DATE_TYPE_SINGLE = 'single'
58 |
59 | DATE_TYPES = (
60 | DATE_TYPE_MISSING, DATE_TYPE_SINGLE, DATE_TYPE_MULTIPLE, DATE_TYPE_RANGE
61 | )
62 |
63 | DATE_TYPE = 'type'
64 | DATE_VALUES = 'values'
65 |
66 |
67 | # To add a new complex definition field:
68 | # 1. Create a new constant representing the property name for the field
69 | # 2. Create a new item in COMPLEX_DEFINITIONS that represents the structure of the field
70 | # 3. If required by all metadata parsers, add the constant to SUPPORTED_PROPS
71 | # 4. Update the target metadata parsers with a parse and update method for the new field
72 | # 5. Update the target metadata parsers' _init_data_map method to instantiate a ParserProperty
73 | # 6. Create a new validation method for the type if validate_complex or validate_complex_list won't suffice
74 | # 7. Update validate_any to recognize the constant and call the intended validation method
75 |
76 | COMPLEX_DEFINITIONS = frozendict({
77 | ATTRIBUTES: frozendict({
78 | 'label': '{label}', # Text
79 | 'aliases': '{aliases}', # Text
80 | 'definition': '{definition}', # Text
81 | 'definition_source': '{definition_src}' # Text
82 | }),
83 | BOUNDING_BOX: frozendict({
84 | 'east': '{east}', # Text
85 | 'south': '{south}', # Text
86 | 'west': '{west}', # Text
87 | 'north': '{north}' # Text
88 | }),
89 | CONTACTS: frozendict({
90 | 'name': '{name}', # Text
91 | 'email': '{email}', # Text
92 | 'organization': '{organization}', # Text
93 | 'position': '{position}' # Text
94 | }),
95 | DATES: frozendict({
96 | DATE_TYPE: '{type}', # Text
97 | DATE_VALUES: '{values}' # Text []
98 | }),
99 | DIGITAL_FORMS: frozendict({
100 | 'name': '{name}', # Text
101 | 'content': '{content}', # Text
102 | 'decompression': '{decompression}', # Text
103 | 'version': '{version}', # Text
104 | 'specification': '{specification}', # Text
105 | 'access_desc': '{access_desc}', # Text
106 | 'access_instrs': '{access_instrs}', # Text
107 | 'network_resource': '{network_resource}' # Text
108 | }),
109 | LARGER_WORKS: frozendict({
110 | 'title': '{title}', # Text
111 | 'edition': '{edition}', # Text
112 | 'origin': '{origin}', # Text []
113 | 'online_linkage': '{online_linkage}', # Text
114 | 'other_citation': '{other_citation}', # Text
115 | 'publish_date': '{date}', # Text
116 | 'publish_place': '{place}', # Text
117 | 'publish_info': '{info}' # Text
118 | }),
119 | PROCESS_STEPS: frozendict({
120 | 'description': '{description}', # Text
121 | 'date': '{date}', # Text
122 | 'sources': '{sources}' # Text []
123 | }),
124 | RASTER_INFO: frozendict({
125 | 'dimensions': '{dimensions}', # Text
126 | 'row_count': '{row_count}', # Text
127 | 'column_count': '{column_count}', # Text
128 | 'vertical_count': '{vertical_count}', # Text
129 | 'x_resolution': '{x_resolution}', # Text
130 | 'y_resolution': '{y_resolution}', # Text
131 | }),
132 | RASTER_DIMS: frozendict({
133 | # Captures dimension data for raster_info
134 | 'type': '{type}', # Text
135 | 'size': '{size}', # Text
136 | 'value': '{value}', # Text
137 | 'units': '{units}' # Text
138 | })
139 | })
140 |
141 | # A set of identifying property names that must be supported by all parsers
142 |
143 | SUPPORTED_PROPS = frozenset({
144 | 'title', 'abstract', 'purpose', 'other_citation_info', 'supplementary_info',
145 | 'online_linkages', 'originators', 'publish_date', 'data_credits', 'digital_forms',
146 | 'dist_contact_org', 'dist_contact_person', 'dist_email', 'dist_phone',
147 | 'dist_address', 'dist_address_type', 'dist_city', 'dist_state', 'dist_postal', 'dist_country',
148 | 'dist_liability', 'processing_fees', 'processing_instrs', 'resource_desc', 'tech_prerequisites',
149 | ATTRIBUTES, 'attribute_accuracy', BOUNDING_BOX, CONTACTS, 'dataset_completeness',
150 | LARGER_WORKS, PROCESS_STEPS, RASTER_INFO, 'use_constraints',
151 | DATES, KEYWORDS_PLACE, KEYWORDS_STRATUM, KEYWORDS_TEMPORAL, KEYWORDS_THEME
152 | })
153 |
154 |
155 | def format_xpaths(xpath_map, *args, **kwargs):
156 | """ :return: a copy of xpath_map, but with XPATHs formatted with ordered or keyword values """
157 |
158 | formatted = {}.fromkeys(xpath_map)
159 |
160 | for key, xpath in xpath_map.items():
161 | formatted[key] = xpath.format(*args, **kwargs)
162 |
163 | return formatted
164 |
165 |
166 | def get_xpath_root(xpath):
167 | """ :return: the base of an XPATH: the part preceding any format keys or attribute references """
168 |
169 | if xpath:
170 | if xpath.startswith('@'):
171 | xpath = ''
172 | else:
173 | index = xpath.find('/@' if '@' in xpath else '/{')
174 | xpath = xpath[:index] if index >= 0 else xpath
175 |
176 | return xpath
177 |
178 |
179 | def get_xpath_branch(xroot, xpath):
180 | """ :return: the relative part of an XPATH: that which extends past the root provided """
181 |
182 | if xroot and xpath and xpath.startswith(xroot):
183 | xpath = xpath[len(xroot):]
184 | xpath = xpath.lstrip(XPATH_DELIM)
185 |
186 | return xpath
187 |
188 |
189 | def get_xpath_tuple(xpath):
190 | """ :return: a tuple with the base of an XPATH followed by any format key or attribute reference """
191 |
192 | xroot = get_xpath_root(xpath)
193 | xattr = None
194 |
195 | if xroot != xpath:
196 | xattr = get_xpath_branch(xroot, xpath).strip('@')
197 |
198 | return (xroot, xattr)
199 |
200 |
201 | def get_default_for(prop, value):
202 | """ Ensures complex property types have the correct default values """
203 |
204 | prop = prop.strip('_') # Handle alternate props (leading underscores)
205 | val = reduce_value(value) # Filtering of value happens here
206 |
207 | if prop in _COMPLEX_LISTS:
208 | return wrap_value(val)
209 | elif prop in _COMPLEX_STRUCTS:
210 | return val or {}
211 | else:
212 | return u'' if val is None else val
213 |
214 |
215 | def get_default_for_complex(prop, value, xpath=''):
216 |
217 | # Ensure sub-props of complex structs and complex lists that take multiple values are wrapped as lists
218 | val = [
219 | {k: get_default_for_complex_sub(prop, k, v, xpath) for k, v in val.items()}
220 | for val in wrap_value(value)
221 | ]
222 |
223 | return val if prop in _COMPLEX_LISTS else reduce_value(val, {})
224 |
225 |
226 | def get_default_for_complex_sub(prop, subprop, value, xpath):
227 |
228 | # Handle alternate props (leading underscores)
229 | prop = prop.strip('_')
230 | subprop = subprop.strip('_')
231 |
232 | value = wrap_value(value)
233 | if subprop in _COMPLEX_WITH_MULTI.get(prop, ''):
234 | return value # Leave sub properties allowing lists wrapped
235 |
236 | # Join on comma for element attribute values; newline for element text values
237 | return ','.join(value) if '@' in xpath else _COMPLEX_DELIM.join(value)
238 |
239 |
240 | def has_property(elem_to_parse, xpath):
241 | """
242 | Parse xpath for any attribute reference "path/@attr" and check for root and presence of attribute.
243 | :return: True if xpath is present in the element along with any attribute referenced, otherwise False
244 | """
245 |
246 | xroot, attr = get_xpath_tuple(xpath)
247 |
248 | if not xroot and not attr:
249 | return False
250 | elif not attr:
251 | return bool(get_elements_text(elem_to_parse, xroot))
252 | else:
253 | return bool(get_elements_attributes(elem_to_parse, xroot, attr))
254 |
255 |
256 | def parse_complex(tree_to_parse, xpath_root, xpath_map, complex_key):
257 | """
258 | Creates and returns a Dictionary data structure parsed from the metadata.
259 | :param tree_to_parse: the XML tree compatible with element_utils to be parsed
260 | :param xpath_root: the XPATH location of the structure inside the parent element
261 | :param xpath_map: a dict of XPATHs corresponding to a complex definition
262 | :param complex_key: indicates which complex definition describes the structure
263 | """
264 |
265 | complex_struct = {}
266 |
267 | for prop in COMPLEX_DEFINITIONS.get(complex_key, xpath_map):
268 | # Normalize complex values: treat values with newlines like values from separate elements
269 | parsed = parse_property(tree_to_parse, xpath_root, xpath_map, prop)
270 | parsed = reduce_value(flatten_items(v.split(_COMPLEX_DELIM) for v in wrap_value(parsed)))
271 |
272 | complex_struct[prop] = get_default_for_complex_sub(complex_key, prop, parsed, xpath_map[prop])
273 |
274 | return complex_struct if any(complex_struct.values()) else {}
275 |
276 |
277 | def parse_complex_list(tree_to_parse, xpath_root, xpath_map, complex_key):
278 | """
279 | Creates and returns a list of Dictionary data structures parsed from the metadata.
280 | :param tree_to_parse: the XML tree compatible with element_utils to be parsed
281 | :param xpath_root: the XPATH location of each structure inside the parent element
282 | :param xpath_map: a dict of XPATHs corresponding to a complex definition
283 | :param complex_key: indicates which complex definition describes each structure
284 | """
285 |
286 | complex_list = []
287 |
288 | for element in get_elements(tree_to_parse, xpath_root):
289 | complex_struct = parse_complex(element, xpath_root, xpath_map, complex_key)
290 | if complex_struct:
291 | complex_list.append(complex_struct)
292 |
293 | return complex_list
294 |
295 |
296 | def parse_dates(tree_to_parse, xpath_map):
297 | """
298 | Creates and returns a Dates Dictionary data structure given the parameters provided
299 | :param tree_to_parse: the XML tree from which to construct the Dates data structure
300 | :param xpath_map: a map containing the following type-specific XPATHs:
301 | multiple, range, range_begin, range_end, and single
302 | """
303 |
304 | # Determine dates to query based on metadata elements
305 |
306 | values = wrap_value(parse_property(tree_to_parse, None, xpath_map, DATE_TYPE_SINGLE))
307 | if len(values) == 1:
308 | return {DATE_TYPE: DATE_TYPE_SINGLE, DATE_VALUES: values}
309 | elif len(values) > 1:
310 | return {DATE_TYPE: DATE_TYPE_MULTIPLE, DATE_VALUES: values}
311 |
312 | values = wrap_value(parse_property(tree_to_parse, None, xpath_map, DATE_TYPE_MULTIPLE))
313 | if len(values) == 1:
314 | return {DATE_TYPE: DATE_TYPE_SINGLE, DATE_VALUES: values}
315 | elif len(values) > 1:
316 | return {DATE_TYPE: DATE_TYPE_MULTIPLE, DATE_VALUES: values}
317 |
318 | values = flatten_items(
319 | d for x in (DATE_TYPE_RANGE_BEGIN, DATE_TYPE_RANGE_END)
320 | for d in wrap_value(parse_property(tree_to_parse, None, xpath_map, x))
321 | )
322 | if len(values) == 1:
323 | return {DATE_TYPE: DATE_TYPE_SINGLE, DATE_VALUES: values}
324 | elif len(values) == 2:
325 | return {DATE_TYPE: DATE_TYPE_RANGE, DATE_VALUES: values}
326 | elif len(values) > 2:
327 | return {DATE_TYPE: DATE_TYPE_MULTIPLE, DATE_VALUES: values}
328 |
329 | return {}
330 |
331 |
332 | def parse_property(tree_to_parse, xpath_root, xpath_map, prop):
333 | """
334 | Defines the default parsing behavior for metadata values.
335 | :param tree_to_parse: the XML tree compatible with element_utils to be parsed
336 | :param xpath_root: used to determine the relative XPATH location within the parent element
337 | :param xpath_map: a dict of XPATHs that may contain alternate locations for a property
338 | :param prop: the property to parse: corresponds to a key in xpath_map
339 | """
340 |
341 | xpath = xpath_map[prop]
342 |
343 | if isinstance(xpath, ParserProperty):
344 | if xpath.xpath is None:
345 | return xpath.get_prop(prop)
346 |
347 | xpath = xpath.xpath
348 |
349 | if xpath_root:
350 | xpath = get_xpath_branch(xpath_root, xpath)
351 |
352 | parsed = None
353 |
354 | if not has_property(tree_to_parse, xpath):
355 | # Element has no text: try next alternate location
356 |
357 | alternate = '_' + prop
358 | if alternate in xpath_map:
359 | return parse_property(tree_to_parse, xpath_root, xpath_map, alternate)
360 |
361 | elif '@' not in xpath:
362 | parsed = get_elements_text(tree_to_parse, xpath)
363 | else:
364 | xroot, xattr = get_xpath_tuple(xpath)
365 | parsed = get_elements_attributes(tree_to_parse, xroot, xattr)
366 |
367 | return get_default_for(prop, parsed)
368 |
369 |
370 | def update_property(tree_to_update, xpath_root, xpaths, prop, values, supported=None):
371 | """
372 | Either update the tree the default way, or call the custom updater
373 |
374 | Default Way: Existing values in the tree are overwritten. If xpaths contains a single path,
375 | then each value is written to the tree at that path. If xpaths contains a list of xpaths,
376 | then the values corresponding to each xpath index are written to their respective locations.
377 | In either case, empty values are ignored.
378 |
379 | :param tree_to_update: the XML tree compatible with element_utils to be updated
380 | :param xpath_root: the XPATH location shared by all the xpaths passed in
381 | :param xpaths: a string or a list of strings representing the XPATH location(s) to which to write values
382 | :param prop: the name of the property of the parser containing the value(s) with which to update the tree
383 | :param values: a single value, or a list of values to write to the specified XPATHs
384 |
385 | :see: ParserProperty for more on custom updaters
386 |
387 | :return: a list of all elements updated by this operation
388 | """
389 |
390 | if supported and prop.startswith('_') and prop.strip('_') in supported:
391 | values = u'' # Remove alternate elements: write values only to primary location
392 | else:
393 | values = get_default_for(prop, values) # Enforce defaults as required per property
394 |
395 | if not xpaths:
396 | return []
397 | elif not isinstance(xpaths, ParserProperty):
398 | return _update_property(tree_to_update, xpath_root, xpaths, values)
399 | else:
400 | # Call ParserProperty.set_prop without xpath_root (managed internally)
401 | return xpaths.set_prop(tree_to_update=tree_to_update, prop=prop, values=values)
402 |
403 |
404 | def _update_property(tree_to_update, xpath_root, xpaths, values):
405 | """
406 | Default update operation for a single parser property. If xpaths contains one xpath,
407 | then one element per value will be inserted at that location in the tree_to_update;
408 | otherwise, the number of values must match the number of xpaths.
409 | """
410 |
411 | # Inner function to update a specific XPATH with the values provided
412 |
413 | def update_element(elem, idx, root, path, vals):
414 | """ Internal helper function to encapsulate single item update """
415 |
416 | has_root = bool(root and len(path) > len(root) and path.startswith(root))
417 | path, attr = get_xpath_tuple(path) # 'path/@attr' to ('path', 'attr')
418 |
419 | if attr:
420 | removed = [get_element(elem, path)]
421 | remove_element_attributes(removed[0], attr)
422 | elif not has_root:
423 | removed = wrap_value(remove_element(elem, path))
424 | else:
425 | path = get_xpath_branch(root, path)
426 | removed = [] if idx != 0 else [remove_element(e, path, True) for e in get_elements(elem, root)]
427 |
428 | if not vals:
429 | return removed
430 |
431 | items = []
432 |
433 | for i, val in enumerate(wrap_value(vals)):
434 | elem_to_update = elem
435 |
436 | if has_root:
437 | elem_to_update = insert_element(elem, (i + idx), root)
438 |
439 | val = val.decode('utf-8') if not isinstance(val, str) else val
440 | if not attr:
441 | items.append(insert_element(elem_to_update, i, path, val))
442 | elif path:
443 | items.append(insert_element(elem_to_update, i, path, **{attr: val}))
444 | else:
445 | set_element_attributes(elem_to_update, **{attr: val})
446 | items.append(elem_to_update)
447 |
448 | return items
449 |
450 | # Code to update each of the XPATHs with each of the values
451 |
452 | xpaths = reduce_value(xpaths)
453 | values = filter_empty(values)
454 |
455 | if isinstance(xpaths, str):
456 | return update_element(tree_to_update, 0, xpath_root, xpaths, values)
457 | else:
458 | each = []
459 |
460 | for index, xpath in enumerate(xpaths):
461 | value = values[index] if values else None
462 | each.extend(update_element(tree_to_update, index, xpath_root, xpath, value))
463 |
464 | return each
465 |
466 |
467 | def update_complex(tree_to_update, xpath_root, xpath_map, prop, values):
468 | """
469 | Updates and returns the updated complex Element parsed from tree_to_update.
470 | :param tree_to_update: the XML tree compatible with element_utils to be updated
471 | :param xpath_root: the XPATH location of the root of the complex Element
472 | :param xpath_map: a Dictionary of XPATHs corresponding to the complex structure definition
473 | :param prop: the property identifying the complex structure to be serialized
474 | :param values: a Dictionary representing the complex structure to be updated
475 | """
476 |
477 | remove_element(tree_to_update, xpath_root, True)
478 |
479 | values = reduce_value(values, {})
480 |
481 | if not values:
482 | # Returns the elements corresponding to property removed from the tree
483 | updated = update_property(tree_to_update, xpath_root, xpath_root, prop, values)
484 | else:
485 | for subprop, value in values.items():
486 | xpath = xpath_map[subprop]
487 | value = get_default_for_complex_sub(prop, subprop, value, xpath)
488 | update_property(tree_to_update, None, xpath, subprop, value)
489 | updated = get_element(tree_to_update, xpath_root)
490 |
491 | return updated
492 |
493 |
494 | def update_complex_list(tree_to_update, xpath_root, xpath_map, prop, values):
495 | """
496 | Updates and returns the list of updated complex Elements parsed from tree_to_update.
497 | :param tree_to_update: the XML tree compatible with element_utils to be updated
498 | :param xpath_root: the XPATH location of each complex Element
499 | :param xpath_map: a Dictionary of XPATHs corresponding to the complex structure definition
500 | :param prop: the property identifying the complex structure to be serialized
501 | :param values: a List containing the updated complex structures as Dictionaries
502 | """
503 |
504 | complex_list = []
505 |
506 | remove_element(tree_to_update, xpath_root, True)
507 |
508 | if not values:
509 | # Returns the elements corresponding to property removed from the tree
510 | complex_list.append(update_property(tree_to_update, xpath_root, xpath_root, prop, values))
511 | else:
512 | for idx, complex_struct in enumerate(wrap_value(values)):
513 |
514 | # Insert a new complex element root for each dict in the list
515 | complex_element = insert_element(tree_to_update, idx, xpath_root)
516 |
517 | for subprop, value in complex_struct.items():
518 | xpath = get_xpath_branch(xpath_root, xpath_map[subprop])
519 | value = get_default_for_complex_sub(prop, subprop, value, xpath)
520 | complex_list.append(update_property(complex_element, None, xpath, subprop, value))
521 |
522 | return complex_list
523 |
524 |
525 | def validate_any(prop, value, xpath_map=None):
526 | """ Validates any metadata property, complex or simple (string or array) """
527 |
528 | if value is not None:
529 | if prop in (ATTRIBUTES, CONTACTS, DIGITAL_FORMS):
530 | validate_complex_list(prop, value, xpath_map)
531 |
532 | elif prop in (BOUNDING_BOX, LARGER_WORKS, RASTER_INFO):
533 | validate_complex(prop, value, xpath_map)
534 |
535 | elif prop == DATES:
536 | validate_dates(prop, value, xpath_map)
537 |
538 | elif prop == PROCESS_STEPS:
539 | validate_process_steps(prop, value)
540 |
541 | elif prop not in SUPPORTED_PROPS and xpath_map is not None:
542 | # Validate custom data structures as complex lists by default
543 | validate_complex_list(prop, value, xpath_map)
544 |
545 | else:
546 | for val in wrap_value(value, include_empty=True):
547 | validate_type(prop, val, (str, list))
548 |
549 |
550 | def validate_complex(prop, value, xpath_map=None):
551 | """ Default validation for single complex data structure """
552 |
553 | if value is not None:
554 | validate_type(prop, value, dict)
555 |
556 | if prop in COMPLEX_DEFINITIONS:
557 | complex_keys = COMPLEX_DEFINITIONS[prop]
558 | else:
559 | complex_keys = {} if xpath_map is None else xpath_map
560 |
561 | for complex_prop, complex_val in value.items():
562 | complex_key = '.'.join((prop, complex_prop))
563 |
564 | if complex_prop not in complex_keys:
565 | _validation_error(prop, None, value, ('keys: {0}'.format(','.join(complex_keys))))
566 |
567 | validate_type(complex_key, complex_val, (str, list))
568 |
569 |
570 | def validate_complex_list(prop, value, xpath_map=None):
571 | """ Default validation for Attribute Details data structure """
572 |
573 | if value is not None:
574 | validate_type(prop, value, (dict, list))
575 |
576 | if prop in COMPLEX_DEFINITIONS:
577 | complex_keys = COMPLEX_DEFINITIONS[prop]
578 | else:
579 | complex_keys = {} if xpath_map is None else xpath_map
580 |
581 | for idx, complex_struct in enumerate(wrap_value(value)):
582 | cs_idx = prop + '[' + str(idx) + ']'
583 | validate_type(cs_idx, complex_struct, dict)
584 |
585 | for cs_prop, cs_val in complex_struct.items():
586 | cs_key = '.'.join((cs_idx, cs_prop))
587 |
588 | if cs_prop not in complex_keys:
589 | _validation_error(prop, None, value, ('keys: {0}'.format(','.join(complex_keys))))
590 |
591 | if not isinstance(cs_val, list):
592 | validate_type(cs_key, cs_val, (str, list))
593 | else:
594 | for list_idx, list_val in enumerate(cs_val):
595 | list_prop = cs_key + '[' + str(list_idx) + ']'
596 | validate_type(list_prop, list_val, str)
597 |
598 |
599 | def validate_dates(prop, value, xpath_map=None):
600 | """ Default validation for Date Types data structure """
601 |
602 | if value is not None:
603 | validate_type(prop, value, dict)
604 |
605 | date_keys = set(value)
606 |
607 | if date_keys:
608 | if DATE_TYPE not in date_keys or DATE_VALUES not in date_keys:
609 | if prop in COMPLEX_DEFINITIONS:
610 | complex_keys = COMPLEX_DEFINITIONS[prop]
611 | else:
612 | complex_keys = COMPLEX_DEFINITIONS[DATES] if xpath_map is None else xpath_map
613 |
614 | _validation_error(prop, None, value, ('keys: {0}'.format(','.join(complex_keys))))
615 |
616 | date_type = value[DATE_TYPE]
617 |
618 | if date_type not in DATE_TYPES:
619 | _validation_error('dates.type', None, date_type, DATE_TYPES)
620 |
621 | date_vals = value[DATE_VALUES]
622 |
623 | validate_type('dates.values', date_vals, list)
624 |
625 | dates_len = len(date_vals)
626 |
627 | if date_type == DATE_TYPE_MISSING and dates_len != 0:
628 | _validation_error('len(dates.values)', None, dates_len, 0)
629 |
630 | if date_type == DATE_TYPE_SINGLE and dates_len != 1:
631 | _validation_error('len(dates.values)', None, dates_len, 1)
632 |
633 | if date_type == DATE_TYPE_RANGE and dates_len != 2:
634 | _validation_error('len(dates.values)', None, dates_len, 2)
635 |
636 | if date_type == DATE_TYPE_MULTIPLE and dates_len < 2:
637 | _validation_error('len(dates.values)', None, dates_len, 'at least two')
638 |
639 | for idx, date in enumerate(date_vals):
640 | date_key = 'dates.value[' + str(idx) + ']'
641 | validate_type(date_key, date, str)
642 |
643 |
644 | def validate_process_steps(prop, value):
645 | """ Default validation for Process Steps data structure """
646 |
647 | if value is not None:
648 | validate_type(prop, value, (dict, list))
649 |
650 | procstep_keys = COMPLEX_DEFINITIONS[prop]
651 |
652 | for idx, procstep in enumerate(wrap_value(value)):
653 | ps_idx = prop + '[' + str(idx) + ']'
654 | validate_type(ps_idx, procstep, dict)
655 |
656 | for ps_prop, ps_val in procstep.items():
657 | ps_key = '.'.join((ps_idx, ps_prop))
658 |
659 | if ps_prop not in procstep_keys:
660 | _validation_error(prop, None, value, ('keys: {0}'.format(','.join(procstep_keys))))
661 |
662 | if ps_prop != 'sources':
663 | validate_type(ps_key, ps_val, str)
664 | else:
665 | validate_type(ps_key, ps_val, (str, list))
666 |
667 | for src_idx, src_val in enumerate(wrap_value(ps_val)):
668 | src_key = ps_key + '[' + str(src_idx) + ']'
669 | validate_type(src_key, src_val, str)
670 |
671 |
672 | def validate_properties(props, required):
673 | """
674 | Ensures the key set contains the base supported properties for a Parser
675 | :param props: a set of property names to validate against those supported
676 | """
677 |
678 | props = set(props)
679 | required = set(required or SUPPORTED_PROPS)
680 |
681 | if len(required.intersection(props)) < len(required):
682 | missing = required - props
683 | raise ValidationError(
684 | 'Missing property names: {props}', props=','.join(missing), missing=missing
685 | )
686 |
687 |
688 | def validate_type(prop, value, expected):
689 | """ Default validation for all types """
690 |
691 | # Validate on expected type(s), but ignore None: defaults handled elsewhere
692 | if value is not None and not isinstance(value, expected):
693 | _validation_error(prop, type(value).__name__, None, expected)
694 |
695 |
696 | def _validation_error(prop, prop_type, prop_value, expected):
697 | """ Default validation for updated properties """
698 |
699 | if prop_type is None:
700 | attrib = 'value'
701 | assigned = prop_value
702 | else:
703 | attrib = 'type'
704 | assigned = prop_type
705 |
706 | raise ValidationError(
707 | 'Invalid property {attrib} for {prop}:\n\t{attrib}: {assigned}\n\texpected: {expected}',
708 | attrib=attrib, prop=prop, assigned=assigned, expected=expected,
709 | invalid={prop: prop_value} if attrib == 'value' else {}
710 | )
711 |
712 |
713 | class ParserProperty(object):
714 | """
715 | A class to manage Parser dynamic getter & setters.
716 | Usually an XPATH is sufficient to define reads and writes,
717 | but for complex data structures more processing is necessary.
718 | """
719 |
720 | def __init__(self, prop_parser, prop_updater, xpath=None):
721 | """ Initialize with callables for getting and setting """
722 |
723 | if hasattr(prop_parser, '__call__'):
724 | self._parser = prop_parser
725 | elif xpath is not None:
726 | self._parser = None
727 | else:
728 | raise ConfigurationError(
729 | 'Invalid property getter:\n\tpassed in: {param}\n\texpected: {expected}',
730 | param=type(prop_parser), expected=' or provide XPATH'
731 | )
732 |
733 | if hasattr(prop_updater, '__call__'):
734 | self._updater = prop_updater
735 | else:
736 | raise ConfigurationError(
737 | 'Invalid property setter:\n\tpassed in: {param}\n\texpected: {expected}',
738 | param=type(prop_updater), expected=''
739 | )
740 |
741 | self.xpath = xpath
742 |
743 | def get_prop(self, prop):
744 | """ Calls the getter with no arguments and returns its value """
745 |
746 | if self._parser is None:
747 | raise ConfigurationError('Cannot call ParserProperty."get_prop" with no parser configured')
748 |
749 | return self._parser(prop) if prop else self._parser()
750 |
751 | def set_prop(self, **setter_args):
752 | """
753 | Calls the setter with the specified keyword arguments for flexibility.
754 | :param setter_args: must contain tree_to_update, prop, values
755 | :return: None, or the value updated for complex values
756 | """
757 |
758 | if self.xpath:
759 | setter_args['xpaths'] = self.xpath
760 |
761 | return self._updater(**setter_args)
762 |
--------------------------------------------------------------------------------
/poetry.lock:
--------------------------------------------------------------------------------
1 | [[package]]
2 | name = "defusedxml"
3 | version = "0.7.1"
4 | description = "XML bomb protection for Python stdlib modules"
5 | category = "main"
6 | optional = false
7 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
8 |
9 | [[package]]
10 | name = "frozendict"
11 | version = "2.1.2"
12 | description = "A simple immutable dictionary"
13 | category = "main"
14 | optional = false
15 | python-versions = ">=3.6"
16 |
17 | [[package]]
18 | name = "mock"
19 | version = "4.0.3"
20 | description = "Rolling backport of unittest.mock for all Pythons"
21 | category = "dev"
22 | optional = false
23 | python-versions = ">=3.6"
24 |
25 | [package.extras]
26 | build = ["twine", "wheel", "blurb"]
27 | docs = ["sphinx"]
28 | test = ["pytest (<5.4)", "pytest-cov"]
29 |
30 | [[package]]
31 | name = "parserutils"
32 | version = "2.0.1"
33 | description = "A collection of performant parsing utilities"
34 | category = "main"
35 | optional = false
36 | python-versions = ">=3.6,<4.0"
37 |
38 | [package.dependencies]
39 | defusedxml = ">=0.7.1,<0.8.0"
40 | python-dateutil = ">=2.8.2,<3.0.0"
41 |
42 | [[package]]
43 | name = "pipdeptree"
44 | version = "2.2.0"
45 | description = "Command line utility to show dependency tree of packages"
46 | category = "dev"
47 | optional = false
48 | python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*"
49 |
50 | [package.extras]
51 | graphviz = ["graphviz"]
52 |
53 | [[package]]
54 | name = "python-dateutil"
55 | version = "2.8.2"
56 | description = "Extensions to the standard Python datetime module"
57 | category = "main"
58 | optional = false
59 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
60 |
61 | [package.dependencies]
62 | six = ">=1.5"
63 |
64 | [[package]]
65 | name = "six"
66 | version = "1.16.0"
67 | description = "Python 2 and 3 compatibility utilities"
68 | category = "main"
69 | optional = false
70 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
71 |
72 | [metadata]
73 | lock-version = "1.1"
74 | python-versions = "^3.6"
75 | content-hash = "1f23dcbcdbe374f021649194134cc936d575bd924d115b583a8e0ad1c7e4efb6"
76 |
77 | [metadata.files]
78 | defusedxml = [
79 | {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"},
80 | {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"},
81 | ]
82 | frozendict = [
83 | {file = "frozendict-2.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3cbb8bd5ddbdd1db1caa670586b50f9e665e60b1c095add5aa04d9e2bedf5f00"},
84 | {file = "frozendict-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:513a3f3a8ff2767492570c78c158845a1cc7b4c954fc9f78ed313b2463727cae"},
85 | {file = "frozendict-2.1.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:3b4ac7aafd613959a055818c3e9844822196c874b9c8bf118b6c1419ff1fbbdc"},
86 | {file = "frozendict-2.1.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb1ebca998c68c3bf28ce70f14a392431bbe01bfa610e3e9e635176d9ecbba71"},
87 | {file = "frozendict-2.1.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c0e09b7159fee7c6dafd727dec2ea7a135f47458dd3996705f9a64e3fca3bf73"},
88 | {file = "frozendict-2.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79666516d594ced95a294869988dba2daff89d9d274061f7138e1c49e49428ce"},
89 | {file = "frozendict-2.1.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3cdc8602e398d28887484226a493ae6e96ff3532b4368d4d49ab96c5ee7eb61d"},
90 | {file = "frozendict-2.1.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2cfe6d89d2af7b97726a5f789d73b5298eb067a9348a8cc8f0834fdc5349b125"},
91 | {file = "frozendict-2.1.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:afaaa4fc0343af15f1153d9e09660f6c388ab1d6d01147bbd7f7979e723258df"},
92 | {file = "frozendict-2.1.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f45f7a724e93653f0d446e0cc99e09bda4835f16de08eb1de3b007537e18daa1"},
93 | {file = "frozendict-2.1.2-py3-none-any.whl", hash = "sha256:a0650a673ce6e320e8b25a38f4620f42382c081b2088051c81073a67f14bac32"},
94 | {file = "frozendict-2.1.2.tar.gz", hash = "sha256:2eb92fbe8dde37075ed0e5dd3dac88a850a04ccfc646d20f3412b72220ddbaf2"},
95 | ]
96 | mock = [
97 | {file = "mock-4.0.3-py3-none-any.whl", hash = "sha256:122fcb64ee37cfad5b3f48d7a7d51875d7031aaf3d8be7c42e2bee25044eee62"},
98 | {file = "mock-4.0.3.tar.gz", hash = "sha256:7d3fbbde18228f4ff2f1f119a45cdffa458b4c0dee32eb4d2bb2f82554bac7bc"},
99 | ]
100 | parserutils = [
101 | {file = "parserutils-2.0.1-py3-none-any.whl", hash = "sha256:19fd3086fd360b3b53322400a236baebcd8109f0483f7c6396f4855e5d3515b1"},
102 | {file = "parserutils-2.0.1.tar.gz", hash = "sha256:f927e69779d81db508db98e6e8ec331f90ff31ff2868161d1cce30fffe92bec3"},
103 | ]
104 | pipdeptree = [
105 | {file = "pipdeptree-2.2.0-py2-none-any.whl", hash = "sha256:e31bcb4db905fe3df15e7c41bc015a3587ef4bafdd5119b011aae32948c4a371"},
106 | {file = "pipdeptree-2.2.0-py3-none-any.whl", hash = "sha256:95fb603e46343651342583c337a0ee68d3ccade7a81f5cf6b2fbd8151b79ed80"},
107 | {file = "pipdeptree-2.2.0.tar.gz", hash = "sha256:21a89e77d6eae635685e8af5ecd56561f092f8216bb290e7ae5362885d611f60"},
108 | ]
109 | python-dateutil = [
110 | {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
111 | {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
112 | ]
113 | six = [
114 | {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
115 | {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
116 | ]
117 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "gis-metadata-parser"
3 | version = "2.0.1"
4 | description = "Parser for GIS metadata standards including ArcGIS, FGDC and ISO-19115"
5 | authors = ["dharvey-consbio "]
6 | keywords = ["arcgis", "fgdc", "iso", "ISO-19115", "ISO-19139", "gis", "metadata", "parser", "xml", "gis_metadata", "gis_metadata_parser"]
7 | readme = "README.md"
8 | homepage = "https://github.com/consbio/gis-metadata-parser/"
9 | repository = "https://github.com/consbio/gis-metadata-parser/"
10 | license = "BSD"
11 | packages = [
12 | { include = "gis_metadata" },
13 | ]
14 |
15 | [tool.poetry.dependencies]
16 | python = "^3.6"
17 | frozendict = "^2.0"
18 | parserutils = "^2.0.1"
19 |
20 | [tool.poetry.dev-dependencies]
21 | mock = "*"
22 | pipdeptree = "*"
23 |
24 | [build-system]
25 | requires = ["poetry-core>=1.0.0"]
26 | build-backend = "poetry.core.masonry.api"
27 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | import sys
3 |
4 | from setuptools import Command, setup
5 |
6 |
7 | class RunTests(Command):
8 | user_options = []
9 |
10 | def initialize_options(self):
11 | pass
12 |
13 | def finalize_options(self):
14 | pass
15 |
16 | def run(self):
17 | errno = subprocess.call([sys.executable, '-m', 'unittest', 'gis_metadata.tests.tests'])
18 | raise SystemExit(errno)
19 |
20 |
21 | with open('README.md') as readme:
22 | long_description = readme.read()
23 |
24 |
25 | setup(
26 | name='gis-metadata-parser',
27 | description='Parser for GIS metadata standards including ArcGIS FGDC and ISO-19115',
28 | long_description=long_description,
29 | long_description_content_type='text/markdown',
30 | keywords='arcgis,fgdc,iso,ISO-19115,ISO-19139,gis,metadata,parser,xml,gis_metadata,gis_metadata_parser',
31 | version='2.0.1',
32 | packages=['gis_metadata'],
33 | install_requires=[
34 | 'frozendict>=2.0', 'parserutils>=2.0.1'
35 | ],
36 | tests_require=['mock'],
37 | url='https://github.com/consbio/gis-metadata-parser',
38 | license='BSD',
39 | cmdclass={'test': RunTests}
40 | )
41 |
--------------------------------------------------------------------------------