├── .coveragerc ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── docs ├── arcgis-metadata.dtd ├── fgdc-std.dtd └── iso-19139-std.dtd ├── gis_metadata ├── __init__.py ├── arcgis_metadata_parser.py ├── exceptions.py ├── fgdc_metadata_parser.py ├── iso_metadata_parser.py ├── metadata_parser.py ├── tests │ ├── __init__.py │ ├── data │ │ ├── arcgis_metadata.xml │ │ ├── fgdc_metadata.xml │ │ ├── iso_citation_href.xml │ │ ├── iso_citation_linkage.xml │ │ ├── iso_metadata.xml │ │ └── utility_metadata.xml │ └── tests.py └── utils.py ├── poetry.lock ├── pyproject.toml └── setup.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = gis_metadata 3 | data_file = .coverage 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .cache/ 3 | .coverage 4 | __pycache__/ 5 | build/ 6 | dist/ 7 | gis-metadata-parser.egg-info/ 8 | gis_metadata_parser.egg-info/ -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.9" 4 | - "3.8" 5 | - "3.7" 6 | - "3.6" 7 | 8 | install: 9 | - "pip install mock" 10 | - "pip install parserutils" 11 | - "pip install frozendict" 12 | - "pip install coveralls" 13 | 14 | script: 15 | coverage run --omit=gis_metadata/tests/*.py --source=gis_metadata -m unittest gis_metadata.tests.tests 16 | 17 | after_success: 18 | coveralls -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Conservation Biology Institute 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of gis-metadata-parser nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # gis-metadata-parser 2 | 3 | XML parsers for GIS metadata that are designed to read in, validate, update and output a core set of properties that have been mapped between the most common standards, currently: 4 | 5 | * FGDC 6 | * ISO-19139 (and ISO-19115) 7 | * ArcGIS (tested with ArcGIS format 1.0). 8 | 9 | This library is compatible with Python versions 2.7 and 3.4 through 3.6. 10 | 11 | [![Build Status](https://api.travis-ci.com/consbio/gis-metadata-parser.png?branch=main)](https://app.travis-ci.com/github/consbio/gis-metadata-parser) 12 | [![Coverage Status](https://coveralls.io/repos/github/consbio/gis-metadata-parser/badge.svg?branch=main)](https://coveralls.io/github/consbio/gis-metadata-parser?branch=main) 13 | 14 | ## Installation 15 | Install with `pip install gis-metadata-parser`. 16 | 17 | ## Usage 18 | 19 | Parsers can be instantiated from files, XML strings or URLs. They can be converted from one standard to another as well. 20 | ```python 21 | from gis_metadata.arcgis_metadata_parser import ArcGISParser 22 | from gis_metadata.fgdc_metadata_parser import FgdcParser 23 | from gis_metadata.iso_metadata_parser import IsoParser 24 | from gis_metadata.metadata_parser import get_metadata_parser 25 | 26 | # From file objects 27 | with open(r'/path/to/metadata.xml') as metadata: 28 | fgdc_from_file = FgdcParser(metadata) 29 | 30 | with open(r'/path/to/metadata.xml') as metadata: 31 | iso_from_file = IsoParser(metadata) 32 | 33 | # Detect standard based on root element, metadata 34 | fgdc_from_string = get_metadata_parser( 35 | """ 36 | 37 | 38 | 39 | 40 | 41 | """ 42 | ) 43 | 44 | # Detect ArcGIS standard based on root element and its nodes 45 | iso_from_string = get_metadata_parser( 46 | """ 47 | 48 | 49 | 50 | 51 | 52 | 53 | """ 54 | ) 55 | 56 | # Detect ISO standard based on root element, MD_Metadata or MI_Metadata 57 | iso_from_string = get_metadata_parser( 58 | """ 59 | 60 | 61 | 62 | 63 | 64 | """ 65 | ) 66 | 67 | # Convert from one standard to another 68 | fgdc_converted = iso_from_file.convert_to(FgdcParser) 69 | iso_converted = fgdc_from_file.convert_to(IsoParser) 70 | arcgis_converted = iso_converted.convert_to(ArcGISParser) 71 | 72 | # Output supported properties as key value pairs (dict) 73 | fgdc_key_vals = fgdc_from_file.convert_to(dict) 74 | iso_key_vals = iso_from_file.convert_to(dict) 75 | ``` 76 | 77 | Finally, the properties of the parser can be updated, validated, applied and output: 78 | ```python 79 | with open(r'/path/to/metadata.xml') as metadata: 80 | fgdc_from_file = FgdcParser(metadata) 81 | 82 | # Example simple properties 83 | fgdc_from_file.title 84 | fgdc_from_file.abstract 85 | fgdc_from_file.place_keywords 86 | fgdc_from_file.thematic_keywords 87 | 88 | # :see: gis_metadata.utils.SUPPORTED_PROPS for list of all supported properties 89 | 90 | # Complex properties 91 | fgdc_from_file.attributes 92 | fgdc_from_file.bounding_box 93 | fgdc_from_file.contacts 94 | fgdc_from_file.dates 95 | fgdc_from_file.digital_forms 96 | fgdc_from_file.larger_works 97 | fgdc_from_file.process_steps 98 | fgdc_from_file.raster_info 99 | 100 | # :see: gis_metadata.utils.COMPLEX_DEFINITIONS for structure of all complex properties 101 | 102 | # Update properties 103 | fgdc_from_file.title = 'New Title' 104 | fgdc_from_file.dates = {'type': 'single' 'values': '1/1/2016'} 105 | 106 | # Apply updates 107 | fgdc_from_file.validate() # Ensure updated properties are valid 108 | fgdc_from_file.serialize() # Output updated XML as a string 109 | fgdc_from_file.write() # Output updated XML to existing file 110 | fgdc_from_file.write(out_file_or_path='/path/to/updated.xml') # Output updated XML to new file 111 | ``` 112 | 113 | ## Extending and Customizing 114 | 115 | ### Tips 116 | 117 | There are a few unwritten (until now) rules about the way the metadata parsers are wired to work: 118 | 119 | 1. Properties are generally defined by XPATH in each `parser._data_map` 120 | 2. Simple parser properties accept only values of `string` and `list`'s of `string`'s 121 | 3. XPATH's configured in the data map support references to element attributes: `'path/to/element/@attr'` 122 | 4. Complex parser properties are defined by custom parser/updater functions instead of by XPATH 123 | 5. Complex parser properties accept values of type `dict` containing simple properties, or a list of said `dict`'s 124 | 6. XPATH keys in the data map with leading underscores are parsed, but not validated or written out 125 | 7. XPATH keys in the data map that "shadow" other properties but with a leading underscore serve as secondary values 126 | 8. Secondary values are used in the absence of a primary value if primary location (element or attribute) is missing 127 | 9. Additional underscores indicate further locations to check for missing values, i.e. `title`, `_title`, `__title` 128 | 129 | Some examples of existing secondary properties are as follows: 130 | ```python 131 | # In the ArcGIS parser for distribution contact phone: 132 | 133 | ARCGIS_TAG_FORMATS = frozendict({ 134 | ... 135 | 'dist_phone': 'distInfo/distributor/distorCont/rpCntInfo/cntPhone/voiceNum', 136 | '_dist_phone': 'distInfo/distributor/distorCont/rpCntInfo/voiceNum', # If not in cntPhone 137 | ... 138 | }) 139 | 140 | # In the FGDC parser for sub-properties in the contacts definition: 141 | 142 | FGDC_DEFINITIONS = dict({k: dict(v) for k, v in iteritems(COMPLEX_DEFINITIONS)}) 143 | FGDC_DEFINITIONS[CONTACTS].update({ 144 | '_name': '{_name}', 145 | '_organization': '{_organization}' 146 | }) 147 | ... 148 | class FgdcParser(MetadataParser): 149 | ... 150 | def _init_data_map(self): 151 | ... 152 | ct_format = FGDC_TAG_FORMATS[CONTACTS] 153 | fgdc_data_structures[CONTACTS] = format_xpaths( 154 | ... 155 | name=ct_format.format(ct_path='cntperp/cntper'), 156 | _name=ct_format.format(ct_path='cntorgp/cntper'), # If not in cntperp 157 | organization=ct_format.format(ct_path='cntperp/cntorg'), 158 | _organization=ct_format.format(ct_path='cntorgp/cntorg'), # If not in cntperp 159 | ) 160 | 161 | # Also see the ISO parser for secondary and tertiary sub-properties in the attributes definition: 162 | 163 | ISO_DEFINITIONS = dict({k: dict(v) for k, v in iteritems(COMPLEX_DEFINITIONS)}) 164 | ISO_DEFINITIONS[ATTRIBUTES].update({ 165 | '_definition_source': '{_definition_src}', 166 | '__definition_source': '{__definition_src}', 167 | '___definition_source': '{___definition_src}' 168 | }) 169 | ``` 170 | 171 | 172 | ### Examples 173 | 174 | Any of the supported parsers can be extended to include more of a standard's supported data. In this example we'll add two new properties to the `IsoParser`: 175 | 176 | * `metadata_language`: a simple string field describing the language of the metadata file itself (not the dataset) 177 | * `metadata_contacts`: a complex structure with contact info leveraging and enhancing the existing contact structure 178 | 179 | This example will cover: 180 | 181 | 1. Adding a new simple property 182 | 2. Configuring a secondary location for a property value 183 | 3. Referencing an element attribute in an XPATH 184 | 4. Adding a new complex property 185 | 5. Customizing the complex property to include a new sub-property 186 | 187 | Also, this example is specifically covered by unit tests. 188 | 189 | ```python 190 | from gis_metadata.iso_metadata_parser import IsoParser 191 | from gis_metadata.utils import COMPLEX_DEFINITIONS, CONTACTS, format_xpaths, ParserProperty 192 | 193 | 194 | class CustomIsoParser(IsoParser): 195 | 196 | def _init_data_map(self): 197 | super(CustomIsoParser, self)._init_data_map() 198 | 199 | # 1. Basic property: text or list (with secondary location referencing `codeListValue` attribute) 200 | 201 | lang_prop = 'metadata_language' 202 | self._data_map[lang_prop] = 'language/CharacterString' # Parse from here if present 203 | self._data_map['_' + lang_prop] = 'language/LanguageCode/@codeListValue' # Otherwise, try from here 204 | 205 | # 2. Complex structure (reuse of contacts structure plus phone) 206 | 207 | # 2.1 Define some basic variables 208 | ct_prop = 'metadata_contacts' 209 | ct_xpath = 'contact/CI_ResponsibleParty/{ct_path}' 210 | ct_defintion = COMPLEX_DEFINITIONS[CONTACTS] 211 | ct_defintion['phone'] = '{phone}' 212 | 213 | # 2.2 Reuse CONTACT structure to specify locations per prop (adapted from parent to add `phone`) 214 | self._data_structures[ct_prop] = format_xpaths( 215 | ct_defintion, 216 | name=ct_xpath.format(ct_path='individualName/CharacterString'), 217 | organization=ct_xpath.format(ct_path='organisationName/CharacterString'), 218 | position=ct_xpath.format(ct_path='positionName/CharacterString'), 219 | phone=ct_xpath.format( 220 | ct_path='contactInfo/CI_Contact/phone/CI_Telephone/voice/CharacterString' 221 | ), 222 | email=ct_xpath.format( 223 | ct_path='contactInfo/CI_Contact/address/CI_Address/electronicMailAddress/CharacterString' 224 | ) 225 | ) 226 | 227 | # 2.3 Set the contact root to insert new elements at "contact" level given the defined path: 228 | # 'contact/CI_ResponsibleParty/...' 229 | # By default we would get multiple "CI_ResponsibleParty" elements under a single "contact" 230 | # This way we get multiple "contact" elements, each with its own single "CI_ResponsibleParty" 231 | self._data_map['_{prop}_root'.format(prop=ct_prop)] = 'contact' 232 | 233 | # 2.4 Leverage the default methods for parsing complex properties (or write your own parser/updater) 234 | self._data_map[ct_prop] = ParserProperty(self._parse_complex_list, self._update_complex_list) 235 | 236 | # 3. And finally, let the parent validation logic know about the two new custom properties 237 | 238 | self._metadata_props.add(lang_prop) 239 | self._metadata_props.add(ct_prop) 240 | 241 | 242 | with open(r'/path/to/metadata.xml') as metadata: 243 | iso_from_file = CustomIsoParser(metadata) 244 | 245 | iso_from_file.metadata_language 246 | iso_from_file.metadata_contacts 247 | ``` 248 | -------------------------------------------------------------------------------- /docs/fgdc-std.dtd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | -------------------------------------------------------------------------------- /gis_metadata/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/consbio/gis-metadata-parser/a3cac07857bc425185d43ec819aece1a9533ec8c/gis_metadata/__init__.py -------------------------------------------------------------------------------- /gis_metadata/arcgis_metadata_parser.py: -------------------------------------------------------------------------------- 1 | """ A module to contain utility ArcGIS metadata parsing helpers """ 2 | 3 | from frozendict import frozendict 4 | from parserutils.collections import flatten_items, reduce_value, wrap_value 5 | from parserutils.elements import get_elements, get_element_name, get_element_attributes 6 | from parserutils.elements import clear_element, element_to_dict, insert_element, remove_element, remove_empty_element 7 | 8 | from gis_metadata.exceptions import InvalidContent 9 | from gis_metadata.metadata_parser import MetadataParser 10 | from gis_metadata.utils import DATE_TYPE, DATE_TYPE_SINGLE, DATE_TYPE_MULTIPLE 11 | from gis_metadata.utils import DATE_TYPE_RANGE, DATE_TYPE_RANGE_BEGIN, DATE_TYPE_RANGE_END 12 | from gis_metadata.utils import ATTRIBUTES 13 | from gis_metadata.utils import BOUNDING_BOX 14 | from gis_metadata.utils import CONTACTS 15 | from gis_metadata.utils import DATES 16 | from gis_metadata.utils import DIGITAL_FORMS 17 | from gis_metadata.utils import KEYWORDS_PLACE, KEYWORDS_STRATUM, KEYWORDS_TEMPORAL, KEYWORDS_THEME 18 | from gis_metadata.utils import LARGER_WORKS 19 | from gis_metadata.utils import PROCESS_STEPS 20 | from gis_metadata.utils import RASTER_DIMS, RASTER_INFO 21 | from gis_metadata.utils import COMPLEX_DEFINITIONS, ParserProperty 22 | from gis_metadata.utils import format_xpaths, get_default_for_complex, get_default_for_complex_sub 23 | from gis_metadata.utils import parse_complex_list, parse_property, update_complex_list, update_property 24 | 25 | 26 | ARCGIS_ROOTS = ('metadata', 'Metadata') 27 | ARCGIS_NODES = ('dataIdInfo', 'distInfo', 'dqInfo', 'Esri') 28 | 29 | ARCGIS_TAG_FORMATS = frozendict({ 30 | '_attribute_accuracy_root': 'dqInfo/report', 31 | '_attributes_root': 'eainfo/detailed/attr', 32 | '_bounding_box_root': 'dataIdInfo/dataExt/geoEle', 33 | '_contacts_root': 'dataIdInfo/idPoC', 34 | '_dataset_completeness_root': 'dqInfo/report', 35 | '_dates_root': 'dataIdInfo/dataExt/tempEle', 36 | '_digital_forms_root': 'distInfo/distFormat', 37 | '_dist_liability_root': 'dataIdInfo/resConst', 38 | '_transfer_options_root': 'distInfo/distTranOps/onLineSrc', 39 | '_larger_works_root': 'dataIdInfo/aggrInfo/aggrDSName', 40 | '_process_steps_root': 'dqInfo/dataLineage/prcStep', 41 | '_raster_info_root': 'spatRepInfo/GridSpatRep/axisDimension', 42 | '_use_constraints_root': 'dataIdInfo/resConst', 43 | 44 | '_srinfo_grid_rep': 'spatRepInfo/GridSpatRep', 45 | 46 | 'title': 'dataIdInfo/idCitation/resTitle', 47 | 'abstract': 'dataIdInfo/idAbs', 48 | 'purpose': 'dataIdInfo/idPurp', 49 | 'supplementary_info': 'dataIdInfo/suppInfo', 50 | 'online_linkages': 'dataIdInfo/idCitation/citRespParty/rpCntInfo/cntOnlineRes/linkage', 51 | '_online_linkages': 'dataIdInfo/idCitation/citOnlineRes/linkage', # If not in citRespParty 52 | 'originators': 'dataIdInfo/idCitation/citRespParty/rpOrgName', 53 | 'publish_date': 'dataIdInfo/idCitation/date/pubDate', 54 | 'data_credits': 'dataIdInfo/idCredit', 55 | CONTACTS: 'dataIdInfo/idPoC/{ct_path}', 56 | 'dist_contact_org': 'distInfo/distributor/distorCont/rpOrgName', 57 | 'dist_contact_person': 'distInfo/distributor/distorCont/rpIndName', 58 | 'dist_address_type': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/@addressType', 59 | 'dist_address': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/delPoint', 60 | 'dist_city': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/city', 61 | 'dist_state': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/adminArea', 62 | 'dist_postal': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/postCode', 63 | 'dist_country': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/country', 64 | 'dist_phone': 'distInfo/distributor/distorCont/rpCntInfo/cntPhone/voiceNum', 65 | '_dist_phone': 'distInfo/distributor/distorCont/rpCntInfo/voiceNum', # If not in cntPhone 66 | 'dist_email': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/eMailAdd', 67 | 'dist_liability': 'dataIdInfo/resConst/LegConsts/othConsts', 68 | 'processing_fees': 'distInfo/distributor/distorOrdPrc/resFees', 69 | 'processing_instrs': 'distInfo/distributor/distorOrdPrc/ordInstr', 70 | 'resource_desc': 'dataIdInfo/idSpecUse/specUsage', 71 | 'tech_prerequisites': 'dataIdInfo/envirDesc', 72 | ATTRIBUTES: 'eainfo/detailed/attr/{ad_path}', # Same as in FGDC (and for good reason) 73 | 'attribute_accuracy': 'dqInfo/report/measDesc', 74 | BOUNDING_BOX: 'dataIdInfo/dataExt/geoEle/GeoBndBox/{bbox_path}', 75 | 'dataset_completeness': 'dqInfo/report/measDesc', 76 | DIGITAL_FORMS: 'distInfo/distFormat/{df_path}', 77 | '_access_desc': 'distInfo/distTranOps/onLineSrc/orDesc', 78 | '_access_instrs': 'distInfo/distTranOps/onLineSrc/protocol', 79 | '_network_resource': 'distInfo/distTranOps/onLineSrc/linkage', 80 | PROCESS_STEPS: 'dqInfo/dataLineage/prcStep/{ps_path}', 81 | LARGER_WORKS: 'dataIdInfo/aggrInfo/aggrDSName/{lw_path}', 82 | RASTER_INFO: 'spatRepInfo/GridSpatRep/axisDimension/{ri_path}', 83 | '_ri_num_dims': 'spatRepInfo/GridSpatRep/numDims', 84 | 'other_citation_info': 'dataIdInfo/idCitation/otherCitDet', 85 | 'use_constraints': 'dataIdInfo/resConst/Consts/useLimit', 86 | '_use_constraints': 'dataIdInfo/resConst/LegConsts/useLimit', 87 | DATES: 'dataIdInfo/dataExt/tempEle/TempExtent/exTemp/{type_path}', 88 | KEYWORDS_PLACE: 'dataIdInfo/placeKeys/keyword', 89 | KEYWORDS_STRATUM: 'dataIdInfo/stratKeys/keyword', 90 | KEYWORDS_TEMPORAL: 'dataIdInfo/tempKeys/keyword', 91 | KEYWORDS_THEME: 'dataIdInfo/themeKeys/keyword', 92 | 93 | # Other ArcGIS keywords not supported by other standards 94 | 'discipline_keywords': 'dataIdInfo/discKeys/keyword', 95 | 'other_keywords': 'dataIdInfo/otherKeys/keyword', 96 | 'product_keywords': 'dataIdInfo/productKeys/keyword', 97 | 'search_keywords': 'dataIdInfo/searchKeys/keyword', 98 | 'topic_category_keywords': 'dataIdInfo/subTopicCatKeys/keyword' 99 | }) 100 | 101 | 102 | class ArcGISParser(MetadataParser): 103 | """ A class to parse metadata files generated by ArcGIS """ 104 | 105 | def _init_data_map(self): 106 | """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """ 107 | 108 | if self._data_map is not None: 109 | return # Initiation happens once 110 | 111 | # Parse and validate the ArcGIS metadata root 112 | 113 | if self._xml_tree is None: 114 | agis_root = ARCGIS_ROOTS[0] # Default to uncapitalized 115 | else: 116 | agis_root = get_element_name(self._xml_tree) 117 | 118 | if agis_root not in ARCGIS_ROOTS: 119 | raise InvalidContent('Invalid XML root for ArcGIS metadata: {root}', root=agis_root) 120 | 121 | agis_data_map = {'_root': agis_root} 122 | agis_data_map.update(ARCGIS_TAG_FORMATS) 123 | 124 | agis_data_structures = {} 125 | 126 | # Capture and format complex XPATHs 127 | 128 | ad_format = agis_data_map[ATTRIBUTES] 129 | agis_data_structures[ATTRIBUTES] = format_xpaths( 130 | COMPLEX_DEFINITIONS[ATTRIBUTES], 131 | label=ad_format.format(ad_path='attrlabl'), 132 | aliases=ad_format.format(ad_path='attalias'), 133 | definition=ad_format.format(ad_path='attrdef'), 134 | definition_src=ad_format.format(ad_path='attrdefs') 135 | ) 136 | 137 | bb_format = agis_data_map[BOUNDING_BOX] 138 | agis_data_structures[BOUNDING_BOX] = format_xpaths( 139 | COMPLEX_DEFINITIONS[BOUNDING_BOX], 140 | east=bb_format.format(bbox_path='eastBL'), 141 | south=bb_format.format(bbox_path='southBL'), 142 | west=bb_format.format(bbox_path='westBL'), 143 | north=bb_format.format(bbox_path='northBL') 144 | ) 145 | 146 | ct_format = agis_data_map[CONTACTS] 147 | agis_data_structures[CONTACTS] = format_xpaths( 148 | COMPLEX_DEFINITIONS[CONTACTS], 149 | name=ct_format.format(ct_path='rpIndName'), 150 | organization=ct_format.format(ct_path='rpOrgName'), 151 | position=ct_format.format(ct_path='rpPosName'), 152 | email=ct_format.format(ct_path='rpCntInfo/cntAddress/eMailAdd') 153 | ) 154 | 155 | dt_format = agis_data_map[DATES] 156 | agis_data_structures[DATES] = { 157 | DATE_TYPE_MULTIPLE: dt_format.format(type_path='TM_Instant/tmPosition'), 158 | '_' + DATE_TYPE_MULTIPLE: dt_format.format(type_path='TM_Instant/tmPosition/@date'), 159 | DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TM_Period/tmBegin'), 160 | '_' + DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TM_Period/tmBegin/@date'), 161 | DATE_TYPE_RANGE_END: dt_format.format(type_path='TM_Period/tmEnd'), 162 | '_' + DATE_TYPE_RANGE_END: dt_format.format(type_path='TM_Period/tmEnd/@date'), 163 | 164 | # Same as multiple dates, but will contain only one 165 | DATE_TYPE_SINGLE: dt_format.format(type_path='TM_Instant/tmPosition'), 166 | '_' + DATE_TYPE_SINGLE: dt_format.format(type_path='TM_Instant/tmPosition/@date') 167 | } 168 | agis_data_structures[DATES][DATE_TYPE_RANGE] = [ 169 | agis_data_structures[DATES][DATE_TYPE_RANGE_BEGIN], 170 | agis_data_structures[DATES][DATE_TYPE_RANGE_END] 171 | ] 172 | agis_data_structures[DATES]['_' + DATE_TYPE_RANGE] = [ 173 | agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_BEGIN], 174 | agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_END] 175 | ] 176 | 177 | df_format = agis_data_map[DIGITAL_FORMS] 178 | agis_data_structures[DIGITAL_FORMS] = format_xpaths( 179 | COMPLEX_DEFINITIONS[DIGITAL_FORMS], 180 | name=df_format.format(df_path='formatName'), 181 | content=df_format.format(df_path='formatInfo'), 182 | decompression=df_format.format(df_path='fileDecmTech'), 183 | version=df_format.format(df_path='formatVer'), 184 | specification=df_format.format(df_path='formatSpec'), 185 | access_desc=agis_data_map['_access_desc'], 186 | access_instrs=agis_data_map['_access_instrs'], 187 | network_resource=agis_data_map['_network_resource'] 188 | ) 189 | 190 | lw_format = agis_data_map[LARGER_WORKS] 191 | agis_data_structures[LARGER_WORKS] = format_xpaths( 192 | COMPLEX_DEFINITIONS[LARGER_WORKS], 193 | title=lw_format.format(lw_path='resTitle'), 194 | edition=lw_format.format(lw_path='resEd'), 195 | origin=lw_format.format(lw_path='citRespParty/rpIndName'), 196 | online_linkage=lw_format.format(lw_path='citRespParty/rpCntInfo/cntOnlineRes/linkage'), 197 | other_citation=lw_format.format(lw_path='otherCitDet'), 198 | date=lw_format.format(lw_path='date/pubDate'), 199 | place=lw_format.format(lw_path='citRespParty/rpCntInfo/cntAddress/city'), 200 | info=lw_format.format(lw_path='citRespParty/rpOrgName') 201 | ) 202 | 203 | ps_format = agis_data_map[PROCESS_STEPS] 204 | agis_data_structures[PROCESS_STEPS] = format_xpaths( 205 | COMPLEX_DEFINITIONS[PROCESS_STEPS], 206 | description=ps_format.format(ps_path='stepDesc'), 207 | date=ps_format.format(ps_path='stepDateTm'), 208 | sources=ps_format.format(ps_path='stepSrc/srcDesc') 209 | ) 210 | 211 | ri_format = agis_data_map[RASTER_INFO] 212 | agis_data_structures[RASTER_INFO] = format_xpaths( 213 | COMPLEX_DEFINITIONS[RASTER_DIMS], 214 | type=ri_format.format(ri_path='@type'), 215 | size=ri_format.format(ri_path='dimSize'), 216 | value=ri_format.format(ri_path='dimResol/value'), 217 | units=ri_format.format(ri_path='dimResol/value/@uom') 218 | ) 219 | 220 | # Assign XPATHS and gis_metadata.utils.ParserProperties to data map 221 | 222 | for prop, xpath in dict(agis_data_map).items(): 223 | if prop in (ATTRIBUTES, CONTACTS, PROCESS_STEPS): 224 | agis_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list) 225 | 226 | elif prop in (BOUNDING_BOX, LARGER_WORKS): 227 | agis_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex) 228 | 229 | elif prop in ('attribute_accuracy', 'dataset_completeness'): 230 | agis_data_map[prop] = ParserProperty(self._parse_report_item, self._update_report_item) 231 | 232 | elif prop == DATES: 233 | agis_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates) 234 | 235 | elif prop == DIGITAL_FORMS: 236 | agis_data_map[prop] = ParserProperty(self._parse_digital_forms, self._update_digital_forms) 237 | 238 | elif prop == RASTER_INFO: 239 | agis_data_map[prop] = ParserProperty(self._parse_raster_info, self._update_raster_info) 240 | 241 | else: 242 | agis_data_map[prop] = xpath 243 | 244 | self._data_map = agis_data_map 245 | self._data_structures = agis_data_structures 246 | 247 | def _parse_digital_forms(self, prop=DIGITAL_FORMS): 248 | """ Concatenates a list of Digital Form data structures parsed from the metadata """ 249 | 250 | xpath_map = self._data_structures[prop] 251 | 252 | # Parse base digital form fields: 'name', 'content', 'decompression', 'version', 'specification' 253 | xpath_root = self._data_map['_digital_forms_root'] 254 | digital_forms = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop) 255 | 256 | # Parse digital form transfer option fields: 'access_desc', 'access_instrs', 'network_resource' 257 | xpath_root = self._data_map['_transfer_options_root'] 258 | transfer_opts = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop) 259 | 260 | # Combine digital forms and transfer options into a single complex struct 261 | 262 | df_len = len(digital_forms) 263 | to_len = len(transfer_opts) 264 | parsed_forms = [] 265 | 266 | for idx in range(0, max(df_len, to_len)): 267 | digital_form = {}.fromkeys(COMPLEX_DEFINITIONS[prop], u'') 268 | 269 | if idx < df_len: 270 | digital_form.update(i for i in digital_forms[idx].items() if i[1]) 271 | if idx < to_len: 272 | digital_form.update(i for i in transfer_opts[idx].items() if i[1]) 273 | 274 | if any(digital_form.values()): 275 | parsed_forms.append(digital_form) 276 | 277 | return get_default_for_complex(prop, parsed_forms) 278 | 279 | def _parse_report_item(self, prop): 280 | """ :return: the text for each element at the configured path if type attribute matches""" 281 | 282 | item_type = None 283 | 284 | if prop == 'attribute_accuracy': 285 | item_type = 'DQQuanAttAcc' 286 | elif prop == 'dataset_completeness': 287 | item_type = 'DQCompOm' 288 | 289 | xroot = self._get_xroot_for(prop) 290 | 291 | parsed = (element_to_dict(e) for e in get_elements(self._xml_tree, xroot)) 292 | parsed = flatten_items(e['children'] for e in parsed if e['attributes'].get('type') == item_type) 293 | 294 | return reduce_value([p['text'] for p in parsed if p and p['name'] == 'measDesc']) 295 | 296 | def _parse_raster_info(self, prop=RASTER_INFO): 297 | """ Collapses multiple dimensions into a single raster_info complex struct """ 298 | 299 | raster_info = {}.fromkeys(COMPLEX_DEFINITIONS[prop], u'') 300 | 301 | # Ensure conversion of lists to newlines is in place 302 | raster_info['dimensions'] = get_default_for_complex_sub( 303 | prop=prop, 304 | subprop='dimensions', 305 | value=parse_property(self._xml_tree, None, self._data_map, '_ri_num_dims'), 306 | xpath=self._data_map['_ri_num_dims'] 307 | ) 308 | 309 | xpath_root = self._get_xroot_for(prop) 310 | xpath_map = self._data_structures[prop] 311 | 312 | for dimension in parse_complex_list(self._xml_tree, xpath_root, xpath_map, RASTER_DIMS): 313 | dimension_type = dimension['type'].lower() 314 | 315 | if dimension_type == 'vertical': 316 | raster_info['vertical_count'] = dimension['size'] 317 | 318 | elif dimension_type == 'column': 319 | raster_info['column_count'] = dimension['size'] 320 | raster_info['x_resolution'] = u' '.join(dimension[k] for k in ['value', 'units']).strip() 321 | 322 | elif dimension_type == 'row': 323 | raster_info['row_count'] = dimension['size'] 324 | raster_info['y_resolution'] = u' '.join(dimension[k] for k in ['value', 'units']).strip() 325 | 326 | return raster_info if any(raster_info[k] for k in raster_info) else {} 327 | 328 | def _update_digital_forms(self, **update_props): 329 | """ 330 | Update operation for ArcGIS Digital Forms metadata 331 | :see: gis_metadata.utils.COMPLEX_DEFINITIONS[DIGITAL_FORMS] 332 | """ 333 | 334 | digital_forms = wrap_value(update_props['values']) 335 | 336 | # Update all Digital Form properties: distFormat* 337 | 338 | xpath_map = self._data_structures[update_props['prop']] 339 | 340 | dist_format_props = ('name', 'content', 'decompression', 'version', 'specification') 341 | dist_format_xroot = self._data_map['_digital_forms_root'] 342 | dist_format_xmap = {prop: xpath_map[prop] for prop in dist_format_props} 343 | dist_formats = [] 344 | 345 | for digital_form in digital_forms: 346 | dist_formats.append({prop: digital_form[prop] for prop in dist_format_props}) 347 | 348 | update_props['values'] = dist_formats 349 | dist_formats = update_complex_list( 350 | xpath_root=dist_format_xroot, xpath_map=dist_format_xmap, **update_props 351 | ) 352 | 353 | # Update all Network Resources: distTranOps+ 354 | 355 | trans_option_props = ('access_desc', 'access_instrs', 'network_resource') 356 | trans_option_xroot = self._data_map['_transfer_options_root'] 357 | trans_option_xmap = {prop: self._data_map['_' + prop] for prop in trans_option_props} 358 | 359 | trans_options = [] 360 | for digital_form in digital_forms: 361 | trans_options.append({prop: digital_form[prop] for prop in trans_option_props}) 362 | 363 | update_props['values'] = trans_options 364 | trans_options = update_complex_list( 365 | xpath_root=trans_option_xroot, xpath_map=trans_option_xmap, **update_props 366 | ) 367 | 368 | return { 369 | 'distribution_formats': dist_formats, 370 | 'transfer_options': trans_options 371 | } 372 | 373 | def _update_dates(self, **update_props): 374 | """ 375 | Update operation for ArcGIS Dates metadata 376 | :see: gis_metadata.utils.COMPLEX_DEFINITIONS[DATES] 377 | """ 378 | 379 | tree_to_update = update_props['tree_to_update'] 380 | xpath_root = self._data_map['_dates_root'] 381 | 382 | if self.dates: 383 | date_type = self.dates[DATE_TYPE] 384 | 385 | # First remove all date info from common root 386 | remove_element(tree_to_update, xpath_root) 387 | 388 | if date_type == DATE_TYPE_MULTIPLE: 389 | xpath_root += '/TempExtent/TM_Instant' 390 | elif date_type == DATE_TYPE_RANGE: 391 | xpath_root += '/TempExtent/TM_Period' 392 | 393 | return super(ArcGISParser, self)._update_dates(xpath_root, **update_props) 394 | 395 | def _update_report_item(self, **update_props): 396 | """ Update the text for each element at the configured path if attribute matches """ 397 | 398 | tree_to_update = update_props['tree_to_update'] 399 | prop = update_props['prop'] 400 | values = wrap_value(update_props['values']) 401 | xroot = self._get_xroot_for(prop) 402 | 403 | attr_key = 'type' 404 | attr_val = u'' 405 | 406 | if prop == 'attribute_accuracy': 407 | attr_val = 'DQQuanAttAcc' 408 | elif prop == 'dataset_completeness': 409 | attr_val = 'DQCompOm' 410 | 411 | # Clear (make empty) all elements of the appropriate type 412 | for elem in get_elements(tree_to_update, xroot): 413 | if get_element_attributes(elem).get(attr_key) == attr_val: 414 | clear_element(elem) 415 | 416 | # Remove all empty elements, including those previously cleared 417 | remove_empty_element(tree_to_update, xroot) 418 | 419 | # Insert elements with correct attributes for each new value 420 | 421 | attrs = {attr_key: attr_val} 422 | updated = [] 423 | 424 | for idx, value in enumerate(values): 425 | elem = insert_element(tree_to_update, idx, xroot, **attrs) 426 | updated.append(insert_element(elem, idx, 'measDesc', value)) 427 | 428 | return updated 429 | 430 | def _update_raster_info(self, **update_props): 431 | """ Derives multiple dimensions from a single raster_info complex struct """ 432 | 433 | tree_to_update = update_props['tree_to_update'] 434 | prop = update_props['prop'] 435 | values = update_props.pop('values') 436 | 437 | # Update number of dimensions at raster_info root (applies to all dimensions below) 438 | 439 | xroot, xpath = None, self._data_map['_ri_num_dims'] 440 | raster_info = [update_property(tree_to_update, xroot, xpath, prop, values.get('dimensions', u''))] 441 | 442 | # Derive vertical, longitude, and latitude dimensions from raster_info 443 | 444 | xpath_root = self._get_xroot_for(prop) 445 | xpath_map = self._data_structures[prop] 446 | 447 | v_dimension = {} 448 | if values.get('vertical_count'): 449 | v_dimension = v_dimension.fromkeys(xpath_map, u'') 450 | v_dimension['type'] = 'vertical' 451 | v_dimension['size'] = values.get('vertical_count', u'') 452 | 453 | x_dimension = {} 454 | if values.get('column_count') or values.get('x_resolution'): 455 | x_dimension = x_dimension.fromkeys(xpath_map, u'') 456 | x_dimension['type'] = 'column' 457 | x_dimension['size'] = values.get('column_count', u'') 458 | x_dimension['value'] = values.get('x_resolution', u'') 459 | 460 | y_dimension = {} 461 | if values.get('row_count') or values.get('y_resolution'): 462 | y_dimension = y_dimension.fromkeys(xpath_map, u'') 463 | y_dimension['type'] = 'row' 464 | y_dimension['size'] = values.get('row_count', u'') 465 | y_dimension['value'] = values.get('y_resolution', u'') 466 | 467 | # Update derived dimensions as complex list, and append affected elements for return 468 | 469 | update_props['prop'] = RASTER_DIMS 470 | update_props['values'] = [v_dimension, x_dimension, y_dimension] 471 | 472 | raster_info += update_complex_list(xpath_root=xpath_root, xpath_map=xpath_map, **update_props) 473 | 474 | return raster_info 475 | -------------------------------------------------------------------------------- /gis_metadata/exceptions.py: -------------------------------------------------------------------------------- 1 | """ A module to define metadata parsing exceptions """ 2 | 3 | 4 | class ParserError(Exception): 5 | """ A class to represent all parsing exceptions """ 6 | 7 | def __init__(self, msg_format, **kwargs): 8 | """ 9 | Call Exception with a message formatted with named arguments from 10 | a Dictionary with values by key, or a list of named parameters. 11 | """ 12 | 13 | super(ParserError, self).__init__(msg_format.format(**kwargs)) 14 | 15 | 16 | class ConfigurationError(ParserError): 17 | """ 18 | A class to represent problems with a parser's configuration 19 | :raised: during parsing operation when a parser is misconfigured 20 | """ 21 | 22 | 23 | class InvalidContent(ParserError): 24 | """ 25 | A class to represent problems with XML parsing of metadata content 26 | :raised: while reading raw data into the XML tree before parsing 27 | """ 28 | 29 | 30 | class NoContent(ParserError): 31 | """ 32 | A class to represent issues with empty metadata content 33 | :raised: while reading raw data into the XML tree before parsing 34 | """ 35 | 36 | 37 | class ValidationError(ParserError): 38 | """ 39 | A class to represent validation exceptions: 40 | :raised: after updates when validating, updating the tree, or serializing 41 | """ 42 | 43 | def __init__(self, msg_format, invalid=None, missing=None, **kwargs): 44 | """ Capture missing or invalid fields and values """ 45 | 46 | # Track details about the error for handling downstream 47 | self.invalid = {} if invalid is None else invalid 48 | self.missing = [] if missing is None else missing 49 | 50 | super(ValidationError, self).__init__(msg_format, **kwargs) 51 | -------------------------------------------------------------------------------- /gis_metadata/fgdc_metadata_parser.py: -------------------------------------------------------------------------------- 1 | """ A module to contain utility FGDC metadata parsing helpers """ 2 | 3 | from frozendict import frozendict 4 | from parserutils.elements import get_element_name, remove_element 5 | 6 | from gis_metadata.exceptions import InvalidContent 7 | from gis_metadata.metadata_parser import MetadataParser 8 | from gis_metadata.utils import DATE_TYPE, DATE_TYPE_SINGLE, DATE_TYPE_MULTIPLE 9 | from gis_metadata.utils import DATE_TYPE_RANGE, DATE_TYPE_RANGE_BEGIN, DATE_TYPE_RANGE_END 10 | from gis_metadata.utils import ATTRIBUTES 11 | from gis_metadata.utils import BOUNDING_BOX 12 | from gis_metadata.utils import CONTACTS 13 | from gis_metadata.utils import DATES 14 | from gis_metadata.utils import DIGITAL_FORMS 15 | from gis_metadata.utils import KEYWORDS_PLACE, KEYWORDS_STRATUM, KEYWORDS_TEMPORAL, KEYWORDS_THEME 16 | from gis_metadata.utils import LARGER_WORKS 17 | from gis_metadata.utils import PROCESS_STEPS 18 | from gis_metadata.utils import RASTER_INFO 19 | from gis_metadata.utils import COMPLEX_DEFINITIONS, ParserProperty 20 | from gis_metadata.utils import format_xpaths, update_complex 21 | 22 | 23 | FGDC_ROOT = 'metadata' 24 | 25 | # Define backup locations for contact and raster_info sub-properties 26 | FGDC_DEFINITIONS = dict({k: dict(v) for k, v in dict(COMPLEX_DEFINITIONS).items()}) 27 | FGDC_DEFINITIONS[CONTACTS].update({ 28 | '_name': '{_name}', 29 | '_organization': '{_organization}' 30 | }) 31 | FGDC_DEFINITIONS[RASTER_INFO].update({ 32 | '_x_resolution': '{_x_resolution}', 33 | '_y_resolution': '{_y_resolution}' 34 | }) 35 | FGDC_DEFINITIONS = frozendict({k: frozendict(v) for k, v in FGDC_DEFINITIONS.items()}) 36 | 37 | FGDC_TAG_FORMATS = frozendict({ 38 | '_attributes_root': 'eainfo/detailed/attr', 39 | '_bounding_box_root': 'idinfo/spdom/bounding', 40 | '_contacts_root': 'idinfo/ptcontac', 41 | '_dates_root': 'idinfo/timeperd/timeinfo', 42 | '_digital_forms_root': 'distinfo/stdorder/digform', 43 | '_larger_works_root': 'idinfo/citation/citeinfo/lworkcit/citeinfo', 44 | '_process_steps_root': 'dataqual/lineage/procstep', 45 | 46 | '_raster_info_root': 'spdoinfo/rastinfo', 47 | '__raster_res_root': 'spref/horizsys', 48 | 49 | '_raster_resolution': 'spref/horizsys/planar/planci/coordrep', 50 | '__raster_resolution': 'spref/horizsys/geograph', 51 | 52 | 'title': 'idinfo/citation/citeinfo/title', 53 | 'abstract': 'idinfo/descript/abstract', 54 | 'purpose': 'idinfo/descript/purpose', 55 | 'supplementary_info': 'idinfo/descript/supplinf', 56 | 'online_linkages': 'idinfo/citation/citeinfo/onlink', 57 | 'originators': 'idinfo/citation/citeinfo/origin', 58 | 'publish_date': 'idinfo/citation/citeinfo/pubdate', 59 | 'data_credits': 'idinfo/datacred', 60 | CONTACTS: 'idinfo/ptcontac/cntinfo/{ct_path}', 61 | 'dist_contact_org': 'distinfo/distrib/cntinfo/cntperp/cntorg', 62 | '_dist_contact_org': 'distinfo/distrib/cntinfo/cntorgp/cntorg', # If not in cntperp 63 | 'dist_contact_person': 'distinfo/distrib/cntinfo/cntperp/cntper', 64 | '_dist_contact_person': 'distinfo/distrib/cntinfo/cntorgp/cntper', # If not in cntperp 65 | 'dist_address_type': 'distinfo/distrib/cntinfo/cntaddr/addrtype', 66 | 'dist_address': 'distinfo/distrib/cntinfo/cntaddr/address', 67 | 'dist_city': 'distinfo/distrib/cntinfo/cntaddr/city', 68 | 'dist_state': 'distinfo/distrib/cntinfo/cntaddr/state', 69 | 'dist_postal': 'distinfo/distrib/cntinfo/cntaddr/postal', 70 | 'dist_country': 'distinfo/distrib/cntinfo/cntaddr/country', 71 | 'dist_phone': 'distinfo/distrib/cntinfo/cntvoice', 72 | 'dist_email': 'distinfo/distrib/cntinfo/cntemail', 73 | 'dist_liability': 'distinfo/distliab', 74 | 'processing_fees': 'distinfo/stdorder/fees', 75 | 'processing_instrs': 'distinfo/stdorder/ordering', 76 | 'resource_desc': 'distinfo/resdesc', 77 | 'tech_prerequisites': 'distinfo/techpreq', 78 | ATTRIBUTES: 'eainfo/detailed/attr/{ad_path}', 79 | 'attribute_accuracy': 'dataqual/attracc/attraccr', 80 | BOUNDING_BOX: 'idinfo/spdom/bounding/{bbox_path}', 81 | 'dataset_completeness': 'dataqual/complete', 82 | DIGITAL_FORMS: 'distinfo/stdorder/digform/{df_path}', 83 | PROCESS_STEPS: 'dataqual/lineage/procstep/{ps_path}', 84 | LARGER_WORKS: 'idinfo/citation/citeinfo/lworkcit/citeinfo/{lw_path}', 85 | RASTER_INFO: 'spdoinfo/rastinfo/{ri_path}', 86 | 'other_citation_info': 'idinfo/citation/citeinfo/othercit', 87 | 'use_constraints': 'idinfo/useconst', 88 | DATES: 'idinfo/timeperd/timeinfo/{type_path}', 89 | KEYWORDS_PLACE: 'idinfo/keywords/place/placekey', 90 | KEYWORDS_STRATUM: 'idinfo/keywords/stratum/stratkey', 91 | KEYWORDS_TEMPORAL: 'idinfo/keywords/temporal/tempkey', 92 | KEYWORDS_THEME: 'idinfo/keywords/theme/themekey' 93 | }) 94 | 95 | 96 | class FgdcParser(MetadataParser): 97 | """ A class to parse metadata files conforming to the FGDC standard """ 98 | 99 | def _init_data_map(self): 100 | """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """ 101 | 102 | if self._data_map is not None: 103 | return # Initiation happens once 104 | 105 | # Parse and validate the FGDC metadata root 106 | 107 | if self._xml_tree is None: 108 | fgdc_root = FGDC_ROOT 109 | else: 110 | fgdc_root = get_element_name(self._xml_tree) 111 | 112 | if fgdc_root != FGDC_ROOT: 113 | raise InvalidContent('Invalid XML root for ISO-19115 standard: {root}', root=fgdc_root) 114 | 115 | fgdc_data_map = {'_root': FGDC_ROOT} 116 | fgdc_data_structures = {} 117 | 118 | # Capture and format other complex XPATHs 119 | 120 | ad_format = FGDC_TAG_FORMATS[ATTRIBUTES] 121 | fgdc_data_structures[ATTRIBUTES] = format_xpaths( 122 | FGDC_DEFINITIONS[ATTRIBUTES], 123 | label=ad_format.format(ad_path='attrlabl'), 124 | aliases=ad_format.format(ad_path='attalias'), 125 | definition=ad_format.format(ad_path='attrdef'), 126 | definition_src=ad_format.format(ad_path='attrdefs') 127 | ) 128 | 129 | bb_format = FGDC_TAG_FORMATS[BOUNDING_BOX] 130 | fgdc_data_structures[BOUNDING_BOX] = format_xpaths( 131 | FGDC_DEFINITIONS[BOUNDING_BOX], 132 | east=bb_format.format(bbox_path='eastbc'), 133 | south=bb_format.format(bbox_path='southbc'), 134 | west=bb_format.format(bbox_path='westbc'), 135 | north=bb_format.format(bbox_path='northbc') 136 | ) 137 | 138 | ct_format = FGDC_TAG_FORMATS[CONTACTS] 139 | fgdc_data_structures[CONTACTS] = format_xpaths( 140 | FGDC_DEFINITIONS[CONTACTS], 141 | 142 | name=ct_format.format(ct_path='cntperp/cntper'), 143 | _name=ct_format.format(ct_path='cntorgp/cntper'), # If not in cntperp 144 | 145 | organization=ct_format.format(ct_path='cntperp/cntorg'), 146 | _organization=ct_format.format(ct_path='cntorgp/cntorg'), # If not in cntperp 147 | 148 | position=ct_format.format(ct_path='cntpos'), 149 | email=ct_format.format(ct_path='cntemail') 150 | ) 151 | 152 | dt_format = FGDC_TAG_FORMATS[DATES] 153 | fgdc_data_structures[DATES] = { 154 | DATE_TYPE_MULTIPLE: dt_format.format(type_path='mdattim/sngdate/caldate'), 155 | DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='rngdates/begdate'), 156 | DATE_TYPE_RANGE_END: dt_format.format(type_path='rngdates/enddate'), 157 | DATE_TYPE_SINGLE: dt_format.format(type_path='sngdate/caldate') 158 | } 159 | fgdc_data_structures[DATES][DATE_TYPE_RANGE] = [ 160 | fgdc_data_structures[DATES][DATE_TYPE_RANGE_BEGIN], 161 | fgdc_data_structures[DATES][DATE_TYPE_RANGE_END] 162 | ] 163 | 164 | df_format = FGDC_TAG_FORMATS[DIGITAL_FORMS] 165 | fgdc_data_structures[DIGITAL_FORMS] = format_xpaths( 166 | FGDC_DEFINITIONS[DIGITAL_FORMS], 167 | name=df_format.format(df_path='digtinfo/formname'), 168 | content=df_format.format(df_path='digtinfo/formcont'), 169 | decompression=df_format.format(df_path='digtinfo/filedec'), 170 | version=df_format.format(df_path='digtinfo/formvern'), 171 | specification=df_format.format(df_path='digtinfo/formspec'), 172 | access_desc=df_format.format(df_path='digtopt/onlinopt/oncomp'), 173 | access_instrs=df_format.format(df_path='digtopt/onlinopt/accinstr'), 174 | network_resource=df_format.format(df_path='digtopt/onlinopt/computer/networka/networkr') 175 | ) 176 | 177 | lw_format = FGDC_TAG_FORMATS[LARGER_WORKS] 178 | fgdc_data_structures[LARGER_WORKS] = format_xpaths( 179 | FGDC_DEFINITIONS[LARGER_WORKS], 180 | title=lw_format.format(lw_path='title'), 181 | edition=lw_format.format(lw_path='edition'), 182 | origin=lw_format.format(lw_path='origin'), 183 | online_linkage=lw_format.format(lw_path='onlink'), 184 | other_citation=lw_format.format(lw_path='othercit'), 185 | date=lw_format.format(lw_path='pubdate'), 186 | place=lw_format.format(lw_path='pubinfo/pubplace'), 187 | info=lw_format.format(lw_path='pubinfo/publish') 188 | ) 189 | 190 | ps_format = FGDC_TAG_FORMATS[PROCESS_STEPS] 191 | fgdc_data_structures[PROCESS_STEPS] = format_xpaths( 192 | FGDC_DEFINITIONS[PROCESS_STEPS], 193 | description=ps_format.format(ps_path='procdesc'), 194 | date=ps_format.format(ps_path='procdate'), 195 | sources=ps_format.format(ps_path='srcused') 196 | ) 197 | 198 | ri_format = FGDC_TAG_FORMATS[RASTER_INFO] 199 | fgdc_data_structures[RASTER_INFO] = format_xpaths( 200 | FGDC_DEFINITIONS[RASTER_INFO], 201 | 202 | dimensions=ri_format.format(ri_path='rasttype'), 203 | row_count=ri_format.format(ri_path='rowcount'), 204 | column_count=ri_format.format(ri_path='colcount'), 205 | vertical_count=ri_format.format(ri_path='vrtcount'), 206 | 207 | x_resolution=FGDC_TAG_FORMATS['_raster_resolution'] + '/absres', 208 | _x_resolution=FGDC_TAG_FORMATS['__raster_resolution'] + '/longres', 209 | y_resolution=FGDC_TAG_FORMATS['_raster_resolution'] + '/ordres', 210 | _y_resolution=FGDC_TAG_FORMATS['__raster_resolution'] + '/latres', 211 | ) 212 | 213 | # Assign XPATHS and gis_metadata.utils.ParserProperties to fgdc_data_map 214 | 215 | for prop, xpath in FGDC_TAG_FORMATS.items(): 216 | if prop in (ATTRIBUTES, CONTACTS, DIGITAL_FORMS, PROCESS_STEPS): 217 | fgdc_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list) 218 | 219 | elif prop in (BOUNDING_BOX, LARGER_WORKS): 220 | fgdc_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex) 221 | 222 | elif prop == DATES: 223 | fgdc_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates) 224 | 225 | elif prop == RASTER_INFO: 226 | fgdc_data_map[prop] = ParserProperty(self._parse_complex, self._update_raster_info) 227 | 228 | else: 229 | fgdc_data_map[prop] = xpath 230 | 231 | self._data_map = fgdc_data_map 232 | self._data_structures = fgdc_data_structures 233 | 234 | def _update_dates(self, **update_props): 235 | """ 236 | Update operation for FGDC Dates metadata 237 | :see: gis_metadata.utils.COMPLEX_DEFINITIONS[DATES] 238 | """ 239 | 240 | tree_to_update = update_props['tree_to_update'] 241 | xpath_root = self._data_map['_dates_root'] 242 | 243 | if self.dates: 244 | date_type = self.dates[DATE_TYPE] 245 | 246 | if date_type == DATE_TYPE_MULTIPLE: 247 | xpath_root += '/mdattim/sngdate' 248 | 249 | elif date_type == DATE_TYPE_RANGE: 250 | xpath_root = '' # /rngdates/begdate and enddate are siblings, not cousins 251 | remove_element(tree_to_update, self._data_map['_dates_root']) 252 | 253 | return super(FgdcParser, self)._update_dates(xpath_root, **update_props) 254 | 255 | def _update_raster_info(self, **update_props): 256 | """ Ensures complete removal of raster_info given the two roots: and """ 257 | 258 | xpath_map = self._data_structures[update_props['prop']] 259 | 260 | return [ 261 | update_complex(xpath_root=self._data_map.get('_raster_info_root'), xpath_map=xpath_map, **update_props), 262 | update_complex(xpath_root=self._data_map.get('__raster_res_root'), xpath_map=xpath_map, **update_props) 263 | ] 264 | -------------------------------------------------------------------------------- /gis_metadata/metadata_parser.py: -------------------------------------------------------------------------------- 1 | """ A module to contain utility metadata parsing helpers """ 2 | 3 | from copy import deepcopy 4 | 5 | from parserutils.elements import create_element_tree, element_exists, element_to_string 6 | from parserutils.elements import get_element_name, get_element_tree, remove_element, write_element 7 | from parserutils.strings import DEFAULT_ENCODING 8 | 9 | from gis_metadata.exceptions import InvalidContent, NoContent 10 | from gis_metadata.utils import DATES, DATE_TYPE, DATE_VALUES 11 | from gis_metadata.utils import DATE_TYPE_RANGE, DATE_TYPE_RANGE_BEGIN, DATE_TYPE_RANGE_END 12 | from gis_metadata.utils import SUPPORTED_PROPS 13 | from gis_metadata.utils import parse_complex, parse_complex_list, parse_dates, parse_property 14 | from gis_metadata.utils import update_complex, update_complex_list, update_property, validate_any, validate_properties 15 | 16 | 17 | # Place holders for lazy, one-time FGDC & ISO imports 18 | 19 | ArcGISParser, ARCGIS_ROOTS, ARCGIS_NODES = None, None, None 20 | FgdcParser, FGDC_ROOT = None, None 21 | IsoParser, ISO_ROOTS = None, None 22 | VALID_ROOTS = None 23 | 24 | 25 | def convert_parser_to(parser, parser_or_type, metadata_props=None): 26 | """ 27 | :return: a parser of type parser_or_type, initialized with the properties of parser. If parser_or_type 28 | is a type, an instance of it must contain a update method. The update method must also process 29 | the set of properties supported by MetadataParser for the conversion to have any affect. 30 | :param parser: the parser (or content or parser type) to convert to new_type 31 | :param parser_or_type: a parser (or content) or type of parser to return 32 | :see: get_metadata_parser(metadata_container) for more on how parser_or_type is treated 33 | """ 34 | 35 | old_parser = parser if isinstance(parser, MetadataParser) else get_metadata_parser(parser) 36 | new_parser = get_metadata_parser(parser_or_type) 37 | 38 | for prop in (metadata_props or SUPPORTED_PROPS): 39 | setattr(new_parser, prop, deepcopy(getattr(old_parser, prop, u''))) 40 | 41 | new_parser.update() 42 | 43 | return new_parser 44 | 45 | 46 | def get_metadata_parser(metadata_container, **metadata_defaults): 47 | """ 48 | Takes a metadata_container, which may be a type or instance of a parser, a dict, string, or file. 49 | :return: a new instance of a parser corresponding to the standard represented by metadata_container 50 | :see: get_parsed_content(metdata_content) for more on types of content that can be parsed 51 | """ 52 | 53 | parser_type = None 54 | 55 | if isinstance(metadata_container, MetadataParser): 56 | parser_type = type(metadata_container) 57 | 58 | elif isinstance(metadata_container, type): 59 | parser_type = metadata_container 60 | metadata_container = metadata_container().update(**metadata_defaults) 61 | 62 | xml_root, xml_tree = get_parsed_content(metadata_container) 63 | 64 | # The get_parsed_content method ensures only these roots will be returned 65 | 66 | parser = None 67 | 68 | if parser_type is not None: 69 | parser = parser_type(xml_tree, **metadata_defaults) 70 | elif xml_root in ISO_ROOTS: 71 | parser = IsoParser(xml_tree, **metadata_defaults) 72 | else: 73 | has_arcgis_data = any(element_exists(xml_tree, e) for e in ARCGIS_NODES) 74 | 75 | if xml_root == FGDC_ROOT and not has_arcgis_data: 76 | parser = FgdcParser(xml_tree, **metadata_defaults) 77 | elif xml_root in ARCGIS_ROOTS: 78 | parser = ArcGISParser(xml_tree, **metadata_defaults) 79 | 80 | return parser 81 | 82 | 83 | def get_parsed_content(metadata_content): 84 | """ 85 | Parses any of the following types of content: 86 | 1. XML string or file object: parses XML content 87 | 2. MetadataParser instance: deep copies xml_tree 88 | 3. Dictionary with nested objects containing: 89 | - name (required): the name of the element tag 90 | - text: the text contained by element 91 | - tail: text immediately following the element 92 | - attributes: a Dictionary containing element attributes 93 | - children: a List of converted child elements 94 | 95 | :raises InvalidContent: if the XML is invalid or does not conform to a supported metadata standard 96 | :raises NoContent: If the content passed in is null or otherwise empty 97 | 98 | :return: the XML root along with an XML Tree parsed by and compatible with element_utils 99 | """ 100 | 101 | _import_parsers() # Prevents circular dependencies between modules 102 | 103 | xml_tree = None 104 | 105 | if metadata_content is None: 106 | raise NoContent('Metadata has no data') 107 | else: 108 | if isinstance(metadata_content, MetadataParser): 109 | xml_tree = deepcopy(metadata_content._xml_tree) 110 | elif isinstance(metadata_content, dict): 111 | xml_tree = get_element_tree(metadata_content) 112 | else: 113 | try: 114 | # Strip name spaces from file or XML content 115 | xml_tree = get_element_tree(metadata_content) 116 | except Exception: 117 | xml_tree = None # Several exceptions possible, outcome is the same 118 | 119 | if xml_tree is None: 120 | raise InvalidContent( 121 | 'Cannot instantiate a {parser_type} parser with invalid content to parse', 122 | parser_type=type(metadata_content).__name__ 123 | ) 124 | 125 | xml_root = get_element_name(xml_tree) 126 | 127 | if xml_root is None: 128 | raise NoContent('Metadata contains no data') 129 | elif xml_root not in VALID_ROOTS: 130 | content = type(metadata_content).__name__ 131 | raise InvalidContent('Invalid root element for {content}: {xml_root}', content=content, xml_root=xml_root) 132 | 133 | return xml_root, xml_tree 134 | 135 | 136 | def _import_parsers(): 137 | """ Lazy imports to prevent circular dependencies between this module and utils """ 138 | 139 | global ARCGIS_NODES 140 | global ARCGIS_ROOTS 141 | global ArcGISParser 142 | 143 | global FGDC_ROOT 144 | global FgdcParser 145 | 146 | global ISO_ROOTS 147 | global IsoParser 148 | 149 | global VALID_ROOTS 150 | 151 | if ARCGIS_NODES is None or ARCGIS_ROOTS is None or ArcGISParser is None: 152 | from gis_metadata.arcgis_metadata_parser import ARCGIS_NODES 153 | from gis_metadata.arcgis_metadata_parser import ARCGIS_ROOTS 154 | from gis_metadata.arcgis_metadata_parser import ArcGISParser 155 | 156 | if FGDC_ROOT is None or FgdcParser is None: 157 | from gis_metadata.fgdc_metadata_parser import FGDC_ROOT 158 | from gis_metadata.fgdc_metadata_parser import FgdcParser 159 | 160 | if ISO_ROOTS is None or IsoParser is None: 161 | from gis_metadata.iso_metadata_parser import ISO_ROOTS 162 | from gis_metadata.iso_metadata_parser import IsoParser 163 | 164 | if VALID_ROOTS is None: 165 | VALID_ROOTS = {FGDC_ROOT}.union(ARCGIS_ROOTS + ISO_ROOTS) 166 | 167 | 168 | class MetadataParser(object): 169 | """ 170 | A class to parent all XML metadata parsing classes. To add more fields for parsing and updating: 171 | 172 | I. If the new field contains a String or a List of Strings, do the following and skip to step III 173 | 174 | Update the dictionary of formatted tags in each child parser that needs to read in the value. 175 | Nothing more is needed, because the _init_data_map methods should be written to put all XPATHs 176 | into the data map as they are, overriding the values for only complex XML content. If an XPATH 177 | is in the data map, it will be read and written at parsing time and updating time respectively. 178 | 179 | II. If the new field contains complex XML content: 180 | 181 | A. Add the new complex definition to utils 182 | :see: gis_metadata.utils.COMPLEX_DEFINITIONS for examples of complex XML content 183 | 184 | B. Define the necessary property parsing and updating methods in the child parsers 185 | 186 | By default, XPATH values in a data map Dictionary handle Strings or Lists of Strings. 187 | If the new property requires conversion to-and-from a Dictionary, then: 188 | 189 | 1. A parse and update method will need to be defined in the child parser 190 | - Parse methods should take zero arguments and return the value in the desired format 191 | - Update methods take a **kwargs parameter and return the updated element 192 | 2. A ParserProperties must be instantiated with them and put in data map 193 | 194 | C. Update _init_data_map() to instantiate a ParserProperty for the new field 195 | 196 | The result of _init_data_map is that _data_map is defined for use in _init_metadata. 197 | The _data_map dictionary will contain identifying property names as keys, and either 198 | XPATHs or ParserProperties as values. 199 | 200 | III. If the new content is required across standards, update utils.SUPPORTED_PROPS as needed 201 | 202 | Requiring new content does not mean a value is required from the incoming metadata. Rather, 203 | it means all MetadataParser children must provide an XPATH for parsing the value, even if 204 | the XPATH provided is blank. This ensures an initialized parser will have a property named 205 | after the identifying property name, even if its value is an empty String. 206 | 207 | """ 208 | 209 | def __init__(self, metadata_to_parse=None, out_file_or_path=None, metadata_props=None, **metadata_defaults): 210 | """ 211 | Initialize new parser with valid content as defined by get_parsed_content 212 | :see: get_parsed_content(metdata_content) for more on what constitutes valid content 213 | """ 214 | 215 | self.has_data = False 216 | self.out_file_or_path = out_file_or_path 217 | 218 | self._xml_tree = None 219 | self._data_map = None 220 | self._data_structures = None 221 | self._metadata_props = set(metadata_props or SUPPORTED_PROPS) 222 | 223 | if metadata_to_parse is not None: 224 | self._xml_root, self._xml_tree = get_parsed_content(metadata_to_parse) 225 | else: 226 | self._xml_tree = self._get_template(**metadata_defaults) 227 | self._xml_root = self._data_map['_root'] 228 | 229 | self._init_metadata() 230 | 231 | def _init_metadata(self): 232 | """ 233 | Dynamically sets attributes from a Dictionary passed in by children. 234 | The Dictionary will contain the name of each attribute as keys, and 235 | either an XPATH mapping to a text value in _xml_tree, or a function 236 | that takes no parameters and returns the intended value. 237 | """ 238 | 239 | if self._data_map is None: 240 | self._init_data_map() 241 | 242 | validate_properties(self._data_map, self._metadata_props) 243 | 244 | # Parse attribute values and assign them: key = parse(val) 245 | 246 | for prop in self._data_map: 247 | setattr(self, prop, parse_property(self._xml_tree, None, self._data_map, prop)) 248 | 249 | self.has_data = any(getattr(self, prop) for prop in self._data_map) 250 | 251 | def _init_data_map(self): 252 | """ Default data map initialization: MUST be overridden in children """ 253 | 254 | if self._data_map is None: 255 | self._data_map = {'_root': None} 256 | self._data_map.update({}.fromkeys(self._metadata_props)) 257 | 258 | def _get_template(self, root=None, **metadata_defaults): 259 | """ Iterate over items metadata_defaults {prop: val, ...} to populate template """ 260 | 261 | if root is None: 262 | if self._data_map is None: 263 | self._init_data_map() 264 | 265 | root = self._xml_root = self._data_map['_root'] 266 | 267 | template_tree = self._xml_tree = create_element_tree(root) 268 | 269 | for prop, val in metadata_defaults.items(): 270 | path = self._data_map.get(prop) 271 | if path and val: 272 | setattr(self, prop, val) 273 | update_property(template_tree, None, path, prop, val) 274 | 275 | return template_tree 276 | 277 | def _get_xpath_for(self, prop): 278 | """ :return: the configured xpath for a given property """ 279 | 280 | xpath = self._data_map.get(prop) 281 | return getattr(xpath, 'xpath', xpath) # May be a ParserProperty 282 | 283 | def _get_xroot_for(self, prop): 284 | """ :return: the configured root for a given property based on the property name """ 285 | 286 | return self._get_xpath_for(f'_{prop}_root') 287 | 288 | def _parse_complex(self, prop): 289 | """ Default parsing operation for a complex struct """ 290 | 291 | xpath_root = None 292 | xpath_map = self._data_structures[prop] 293 | 294 | return parse_complex(self._xml_tree, xpath_root, xpath_map, prop) 295 | 296 | def _parse_complex_list(self, prop): 297 | """ Default parsing operation for lists of complex structs """ 298 | 299 | xpath_root = self._get_xroot_for(prop) 300 | xpath_map = self._data_structures[prop] 301 | 302 | return parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop) 303 | 304 | def _parse_dates(self, prop=DATES): 305 | """ Creates and returns a Date Types data structure parsed from the metadata """ 306 | 307 | return parse_dates(self._xml_tree, self._data_structures[prop]) 308 | 309 | def _update_complex(self, **update_props): 310 | """ Default update operation for a complex struct """ 311 | 312 | prop = update_props['prop'] 313 | xpath_root = self._get_xroot_for(prop) 314 | xpath_map = self._data_structures[prop] 315 | 316 | return update_complex(xpath_root=xpath_root, xpath_map=xpath_map, **update_props) 317 | 318 | def _update_complex_list(self, **update_props): 319 | """ Default update operation for lists of complex structs """ 320 | 321 | prop = update_props['prop'] 322 | xpath_root = self._get_xroot_for(prop) 323 | xpath_map = self._data_structures[prop] 324 | 325 | return update_complex_list(xpath_root=xpath_root, xpath_map=xpath_map, **update_props) 326 | 327 | def _update_dates(self, xpath_root=None, **update_props): 328 | """ 329 | Default update operation for Dates metadata 330 | :see: gis_metadata.utils.COMPLEX_DEFINITIONS[DATES] 331 | """ 332 | 333 | tree_to_update = update_props['tree_to_update'] 334 | prop = update_props['prop'] 335 | values = (update_props['values'] or {}).get(DATE_VALUES) or u'' 336 | xpaths = self._data_structures[prop] 337 | 338 | if not self.dates: 339 | date_xpaths = xpath_root 340 | elif self.dates[DATE_TYPE] != DATE_TYPE_RANGE: 341 | date_xpaths = xpaths.get(self.dates[DATE_TYPE], u'') 342 | else: 343 | date_xpaths = [ 344 | xpaths[DATE_TYPE_RANGE_BEGIN], 345 | xpaths[DATE_TYPE_RANGE_END] 346 | ] 347 | 348 | if xpath_root: 349 | remove_element(tree_to_update, xpath_root) 350 | 351 | return update_property(tree_to_update, xpath_root, date_xpaths, prop, values) 352 | 353 | def convert_to(self, new_parser_or_type): 354 | """ 355 | :return: a parser initialized with this parser's data. If new_parser_or_type is to be treated 356 | as a parser, it must have 357 | :param new_parser_or_type: a new parser to initialize, or parser type to instantiate 358 | """ 359 | 360 | try: 361 | to_dict = issubclass(new_parser_or_type, dict) 362 | except TypeError: 363 | to_dict = isinstance(new_parser_or_type, dict) 364 | 365 | if to_dict: 366 | return {p: getattr(self, p) for p in self._metadata_props if p[0] != '_'} 367 | else: 368 | return convert_parser_to(self, new_parser_or_type, self._metadata_props) 369 | 370 | def serialize(self, use_template=False): 371 | """ 372 | Validates instance properties, writes them to an XML tree, and returns the content as a string. 373 | :param use_template: if True, updates a new template XML tree; otherwise the original XML tree 374 | """ 375 | return element_to_string(self.update(use_template)) 376 | 377 | def write(self, use_template=False, out_file_or_path=None, encoding=DEFAULT_ENCODING): 378 | """ 379 | Validates instance properties, updates an XML tree with them, and writes the content to a file. 380 | :param use_template: if True, updates a new template XML tree; otherwise the original XML tree 381 | :param out_file_or_path: optionally override self.out_file_or_path with a custom file path 382 | :param encoding: optionally use another encoding instead of UTF-8 383 | """ 384 | 385 | if not out_file_or_path: 386 | out_file_or_path = self.out_file_or_path 387 | 388 | if not out_file_or_path: 389 | raise IOError('Output file path has not been provided') 390 | 391 | write_element(self.update(use_template), out_file_or_path, encoding) 392 | 393 | def update(self, use_template=False, **metadata_defaults): 394 | """ 395 | Validates instance properties and updates either a template or the original XML tree with them. 396 | :param use_template: if True, updates a new template XML tree; otherwise the original XML tree 397 | """ 398 | 399 | self.validate() 400 | 401 | tree_to_update = self._xml_tree if not use_template else self._get_template(**metadata_defaults) 402 | supported_props = self._metadata_props 403 | 404 | for prop, xpath in self._data_map.items(): 405 | if not prop.startswith('_') or prop.strip('_') in supported_props: 406 | # Send only public or alternate properties 407 | update_property( 408 | tree_to_update, self._get_xroot_for(prop), xpath, prop, getattr(self, prop, u''), supported_props 409 | ) 410 | 411 | return tree_to_update 412 | 413 | def validate(self): 414 | """ Default validation for updated properties: MAY be overridden in children """ 415 | 416 | validate_properties(self._data_map, self._metadata_props) 417 | 418 | for prop in self._data_map: 419 | validate_any(prop, getattr(self, prop), self._data_structures.get(prop)) 420 | 421 | return self 422 | -------------------------------------------------------------------------------- /gis_metadata/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/consbio/gis-metadata-parser/a3cac07857bc425185d43ec819aece1a9533ec8c/gis_metadata/tests/__init__.py -------------------------------------------------------------------------------- /gis_metadata/tests/data/arcgis_metadata.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Attributes Label 1 7 | Attributes Alias 1 8 | Attributes Definition 1 9 | Attributes Definition Source 1 10 | 11 | 12 | 13 | Attributes Label 2 14 | Attributes Alias 2 15 | Attributes Definition 2 16 | Attributes Definition Source 2 17 | 18 | 19 | 20 | Attributes Label 3 21 | Attributes Alias 3 22 | Attributes Definition 3 23 | Attributes Definition Source 3 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | FGDC CSDGM Metadata 35 | 1.0 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | ArcGIS Metadata 46 | 1.0 47 | 48 | 49 | 50 | Test Distribution Org 51 | Test Distribution Person 52 | 53 | 54 | Test Distribution Phone 55 | 56 | 57 | Test Distribution Address 58 | Test Distribution City 59 | OR 60 | 12345 61 | US 62 | Test Distribution Email 63 | 64 | 8:00 AM - 5:00 PM PST 65 | Backup Distribution Phone 66 | 67 | 68 | 69 | 70 | Test Processing Fees 71 | 72 | Test Processing Instructions 73 | 74 | 75 | 76 | 77 | Digital Form Name 1 78 | Digital Form Version 1 79 | Digital Form Content 1 80 | Digital Form Specification 1 81 | Digital Form Decompression 1 82 | 83 | 84 | 85 | Digital Form Resource 1 86 | Digital Form Access Instructions 1 87 | Digital Form Access Description 1 88 | 89 | 90 | 91 | Digital Form Name 2 92 | Digital Form Version 2 93 | Digital Form Content 2 94 | Digital Form Specification 2 95 | Digital Form Decompression 2 96 | 97 | 98 | 99 | Digital Form Resource 2 100 | Digital Form Access Instructions 2 101 | Digital Form Access Description 2 102 | 103 | 104 | 105 | 106 | 107 | 108 | Larger Works Title 109 | Larger Works Edition 110 | Larger Works Other Citation 111 | 112 | Larger Works Originator 113 | Larger Works Info 114 | 115 | 116 | Larger Works Place 117 | 118 | 119 | http://test.largerworks.online.linkage.com 120 | 121 | 122 | 123 | 124 | Larger Works Date 125 | 126 | 127 | 128 | 129 | Test Title 130 | 131 | Test Originators 132 | 133 | 134 | Test Other Citation Info 135 | 136 | 137 | 138 | http://test.onlinelinkages.org 139 | 140 | 141 | 142 | 143 | 144 | http://backup.onlinelinkages.org 145 | 146 | 147 | Test Resource Description 148 | 149 | 150 | Test Publish Date 151 | 152 | 153 | vector digital data 154 | 155 | 156 | Test Abstract 157 | Test Purpose 158 | Test Data Credits 159 | 160 | Test Resource Description 161 | 162 | 163 | 164 | Contact Name 1 165 | Contact Organization 1 166 | Contact Position 1 167 | 168 | 169 | Contact Email 1 170 | 171 | 172 | 173 | 174 | 175 | Contact Name 2 176 | Contact Organization 2 177 | Contact Position 2 178 | 179 | 180 | Contact Email 2 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | Oregon 190 | Washington 191 | 192 | 193 | Layer One 194 | Layer Two 195 | 196 | 197 | Now 198 | Later 199 | 200 | 201 | Ecoregion 202 | 203 | 204 | Risk 205 | Threat 206 | Habitat 207 | 208 | 209 | ArcGIS Discipline One 210 | ArcGIS Discipline Two 211 | 212 | 213 | ArcGIS Other One 214 | ArcGIS Other Two 215 | 216 | 217 | ArcGIS Product One 218 | ArcGIS Product Two 219 | 220 | 221 | ArcGIS Search One 222 | ArcGIS Search Two 223 | 224 | 225 | ArcGIS Topical One 226 | ArcGIS Topical Two 227 | 228 | 229 | 230 | Test Distribution Liability 231 | 232 | 233 | 234 | 235 | Test Use Constraints 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | Multiple Date 1 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | Multiple Date 2 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | Multiple Date 3 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | Test Supplementary Info 277 | Test Technical Prerequisites 278 | 279 | 280 | 281 | 282 | 1 283 | -179.99999999998656 284 | 179.99999999998656 285 | 87.81211601444309 286 | -86.78249642712764 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | Process Step Description 1 295 | Process Step Date 1 296 | 297 | Process Step Sources 1.1 298 | 299 | 300 | Process Step Sources 1.2 301 | 302 | 303 | 304 | Process Step Description 2 305 | 306 | 307 | 308 | Process Step Date 3 309 | 310 | 311 | 312 | Process Step Description 4 313 | 314 | Process Step Sources 4.1 315 | 316 | 317 | Process Step Sources 4.2 318 | 319 | 320 | 321 | 322 | Test Attribute Accuracy 323 | 324 | 325 | Test Dataset Completeness 326 | 327 | 328 | 329 | 330 | dataset 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | Test Vertical Count 339 | 340 | 341 | Test Column Count 342 | 343 | Test X Resolution 344 | 345 | 346 | 347 | Test Row Count 348 | 349 | Test Y Resolution 350 | 351 | 352 | Test # Dimensions 353 | 354 | 355 | 356 | -------------------------------------------------------------------------------- /gis_metadata/tests/data/fgdc_metadata.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Test Abstract 6 | Test Purpose 7 | Test Supplementary Info 8 | 9 | 10 | 11 | Test Originators 12 | Test Publish Date 13 | Test Title 14 | http://test.onlinelinkages.org 15 | Test Other Citation Info 16 | 17 | 18 | 19 | 20 | 21 | 22 | Larger Works Originator 23 | Larger Works Date 24 | Larger Works Title 25 | Larger Works Edition 26 | Larger Works Other Citation 27 | http://test.largerworks.online.linkage.com 28 | 29 | Larger Works Info 30 | Larger Works Place 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | Date Range Start 40 | Date Range End 41 | 42 | 43 | 44 | Multiple Date 1 45 | 46 | 47 | Multiple Date 2 48 | 49 | 50 | Multiple Date 3 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | -179.99999999998656 63 | 179.99999999998656 64 | 87.81211601444309 65 | -86.78249642712764 66 | 67 | 68 | 69 | 70 | Oregon 71 | Washington 72 | 73 | 74 | Layer One 75 | Layer Two 76 | 77 | 78 | Now 79 | Later 80 | 81 | 82 | Ecoregion 83 | Risk 84 | Threat 85 | Habitat 86 | 87 | 88 | Test Use Constraints 89 | Test Data Credits 90 | 91 | 92 | Contact Email 1 93 | Contact Position 1 94 | 95 | Contact Name 1 96 | Contact Organization 1 97 | 98 | 99 | 100 | 101 | 102 | Contact Email 2 103 | Contact Position 2 104 | 105 | Contact Name 2 106 | Contact Organization 2 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | Test Distribution Org 116 | Test Distribution Person 117 | 118 | 119 | Test Distribution Address Type 120 |
Test Distribution Address
121 | Test Distribution City 122 | OR 123 | 12345 124 | US 125 |
126 | Test Distribution Phone 127 | Test Distribution Email 128 |
129 |
130 | Test Resource Description 131 | Test Distribution Liability 132 | 133 | 134 | 135 | Digital Form Name 1 136 | Digital Form Version 1 137 | Digital Form Content 1 138 | Digital Form Specification 1 139 | Digital Form Decompression 1 140 | 141 | 142 | 143 | 144 | 145 | Digital Form Resource 1 146 | 147 | 148 | Digital Form Access Instructions 1 149 | Digital Form Access Description 1 150 | 151 | 152 | 153 | 154 | 155 | Digital Form Name 2 156 | Digital Form Version 2 157 | Digital Form Content 2 158 | Digital Form Specification 2 159 | Digital Form Decompression 2 160 | 161 | 162 | 163 | 164 | 165 | Digital Form Resource 2 166 | 167 | 168 | Digital Form Access Instructions 2 169 | Digital Form Access Description 2 170 | 171 | 172 | 173 | Test Processing Fees 174 | Test Processing Instructions 175 | 176 | Test Technical Prerequisites 177 |
178 | 179 | 180 | 181 | Attributes Label 1 182 | Attributes Alias 1 183 | Attributes Definition 1 184 | Attributes Definition Source 1 185 | 186 | 187 | 188 | Attributes Label 2 189 | Attributes Alias 2 190 | Attributes Definition 2 191 | Attributes Definition Source 2 192 | 193 | 194 | 195 | Attributes Label 3 196 | Attributes Alias 3 197 | Attributes Definition 3 198 | Attributes Definition Source 3 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 20070210 207 | 208 | 209 | 210 | 20070418 211 | 212 | 213 | 214 | Process Step Description 1 215 | Process Step Date 1 216 | Process Step Sources 1.1 217 | Process Step Sources 1.2 218 | 219 | 220 | Process Step Description 2 221 | 222 | 223 | 224 | Process Step Date 3 225 | 226 | 227 | 228 | 229 | Process Step Description 4 230 | 231 | Process Step Sources 4.1 232 | Process Step Sources 4.2 233 | 234 | 235 | Test Dataset Completeness 236 | 237 | Test Attribute Accuracy 238 | 239 | 240 | 241 | 242 | Test Column Count 243 | Test Vertical Count 244 | Test Row Count 245 | Test # Dimensions 246 | 247 | 248 | 249 | 250 | 251 | 252 | Test Backup Y Resolution 253 | Test Backup X Resolution 254 | 255 | 256 | 257 | Custom Projection 258 | 259 | 7 260 | 8 261 | 9 262 | 22 263 | 11 264 | 265 | 266 | 267 | 268 | Test Y Resolution 269 | Test X Resolution 270 | 271 | 272 | 273 | 274 | 275 |
276 | -------------------------------------------------------------------------------- /gis_metadata/tests/data/iso_citation_href.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | HREF Attributes Alias 1 9 | 10 | 11 | HREF Attributes Definition 1 12 | 13 | 14 | HREF Attributes Label 1 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | HREF Attributes Definition Source 1 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | HREF Attributes Alias 2 41 | 42 | 43 | HREF Attributes Definition 2 44 | 45 | 46 | HREF Attributes Label 2 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | HREF Attributes Definition Source 2 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | HREF Attributes Alias 3 77 | 78 | 79 | HREF Attributes Definition 3 80 | 81 | 82 | HREF Attributes Label 3 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | HREF Attributes Definition Source 3 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /gis_metadata/tests/data/iso_citation_linkage.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | LINKAGE Attributes Alias 1 9 | 10 | 11 | LINKAGE Attributes Definition 1 12 | 13 | 14 | LINKAGE Attributes Label 1 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | LINKAGE Attributes Definition Source 1 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | LINKAGE Attributes Alias 2 45 | 46 | 47 | LINKAGE Attributes Definition 2 48 | 49 | 50 | LINKAGE Attributes Label 2 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | LINKAGE Attributes Definition Source 2 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | LINKAGE Attributes Alias 3 77 | 78 | 79 | LINKAGE Attributes Definition 3 80 | 81 | 82 | LINKAGE Attributes Label 3 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | LINKAGE Attributes Definition Source 3 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /gis_metadata/tests/data/iso_metadata.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | eng 5 | esp 6 | 7 | 8 | 9 | 10 | Custom Contact Name 11 | 12 | 13 | Custom Contact Organization 14 | 15 | 16 | Custom Contact Position 17 | 18 | 19 | 20 | 21 | 22 | 23 | Custom Contact Phone 24 | 25 | 26 | 27 |
28 | 29 | 30 | Custom Contact Email 31 | 32 | 33 |
34 |
35 |
36 | 37 | pointOfContact 38 | 39 |
40 |
41 | 42 | 43 | 44 | 45 | 46 | Attributes Title (unused) 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | ftp://ftp.ncddc.noaa.gov/pub/Metadata//ISO/87ffdfd0-775a-11e0-a1f0-0800200c9a66.xml 56 | 57 | 58 | ftp 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | Attributes Alias 1 76 | 77 | 78 | Attributes Definition 1 79 | 80 | 81 | Attributes Label 1 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | Attributes Definition Source 1 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | Attributes Alias 2 108 | 109 | 110 | Attributes Definition 2 111 | 112 | 113 | Attributes Label 2 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | Attributes Definition Source 2 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | Attributes Alias 3 140 | 141 | 142 | Attributes Definition 3 143 | 144 | 145 | Attributes Label 3 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | Attributes Definition Source 3 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | <CharacterString>Larger Works Title</CharacterString> 178 | 179 | 180 | Larger Works Other Citation 181 | 182 | 183 | Larger Works Collective Title (not currently used) 184 | 185 | 186 | Larger Works Edition 187 | 188 | 189 | Larger Works Date 190 | 191 | 192 | 193 | 194 | Larger Works Originator 195 | 196 | 197 | Larger Works Info 198 | 199 | 200 | 201 |
202 | 203 | 204 | Larger Works Place 205 | 206 | 207 |
208 | 209 | 210 | 211 | http://test.largerworks.online.linkage.com 212 | 213 | 214 | 215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 | 224 | 225 | 226 | <CharacterString>Test Title</CharacterString> 227 | 228 | 229 | Test Other Citation Info 230 | 231 | 232 | 233 | 234 | Test Publish Date 235 | 236 | 237 | Test Publish Date Type 238 | 239 | 240 | 241 | 242 | 243 | 244 | Test Originators 245 | 246 | 247 | originator 248 | 249 | 250 | 251 | 252 | 253 | 254 | http://test.onlinelinkages.org 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | Test Abstract 266 | 267 | 268 | Test Technical Prerequisites 269 | 270 | 271 | Test Purpose 272 | 273 | 274 | Test Data Credits 275 | 276 | 277 | 278 | 279 | Contact Name 1 280 | 281 | 282 | Contact Organization 1 283 | 284 | 285 | Contact Position 1 286 | 287 | 288 | 289 |
290 | 291 | 292 | Contact Email 1 293 | 294 | 295 |
296 |
297 |
298 |
299 |
300 | 301 | 302 | 303 | Contact Name 2 304 | 305 | 306 | Contact Organization 2 307 | 308 | 309 | Contact Position 2 310 | 311 | 312 | 313 |
314 | 315 | 316 | Contact Email 2 317 | 318 | 319 |
320 |
321 |
322 |
323 |
324 | 325 | 326 | 327 | Test Use Constraints 328 | 329 | 330 | 331 | 332 | 333 | 334 | Test Distribution Liability 335 | 336 | 337 | 338 | 339 | 340 | 341 | Test Resource Description 342 | 343 | 344 | 345 | 346 | 347 | 348 | Ecoregion 349 | 350 | 351 | Risk 352 | 353 | 354 | Threat 355 | 356 | 357 | Habitat 358 | 359 | 360 | theme 361 | 362 | 363 | 364 | 365 | 366 | 367 | Layer One 368 | 369 | 370 | Layer Two 371 | 372 | 373 | stratum 374 | 375 | 376 | 377 | 378 | 379 | 380 | Now 381 | 382 | 383 | Later 384 | 385 | 386 | temporal 387 | 388 | 389 | 390 | 391 | 392 | 393 | Oregon 394 | 395 | 396 | place 397 | 398 | 399 | 400 | 401 | 402 | 403 | Washington 404 | 405 | 406 | place 407 | 408 | 409 | 410 | 411 | 412 | 413 | publication date 414 | 415 | 416 | 417 | 418 | 419 | Multiple Date 1 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | Multiple Date 2 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | Multiple Date 3 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | -179.99999999998656 450 | 451 | 452 | 179.99999999998656 453 | 454 | 455 | -86.78249642712764 456 | 457 | 458 | 87.81211601444309 459 | 460 | 461 | 462 | 463 | 464 | 465 | Test Supplementary Info 466 | 467 |
468 |
469 | 470 | 471 | 472 | 473 | 474 | Digital Form Name 1 475 | 476 | 477 | Digital Form Decompression 1 478 | 479 | 480 | 481 | Digital Form Specification 1 482 | @------------------------------@ 483 | Digital Form Content 1 484 | 485 | 486 | 487 | Digital Form Version 1 488 | 489 | 490 | 491 | 492 | 493 | 494 | Digital Form Name 2 495 | 496 | 497 | Digital Form Decompression 2 498 | 499 | 500 | 501 | Digital Form Specification 2 502 | 503 | @------------------------------@ 504 | 505 | Digital Form Content 2 506 | 507 | 508 | 509 | Digital Form Version 2 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | Digital Form Access Instructions 1 519 | 520 | 521 | Digital Form Resource 1 522 | 523 | 524 | Digital Form Access Description 1 525 | 526 | 527 | 528 | 529 | 530 | 531 | Digital Form Resource 2 532 | 533 | 534 | Digital Form Access Instructions 2 535 | 536 | 537 | Digital Form Access Description 2 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | Test Distribution Person 549 | 550 | 551 | Test Distribution Org 552 | 553 | 554 | 555 | 556 | 557 | 558 | Test Distribution Phone 559 | 560 | 561 | 562 |
563 | 564 | 565 | Test Distribution Address 566 | 567 | 568 | Test Distribution City 569 | 570 | 571 | OR 572 | 573 | 574 | 12345 575 | 576 | 577 | US 578 | 579 | 580 | Test Distribution Email 581 | 582 | 583 |
584 |
585 |
586 | 587 | distributor 588 | 589 |
590 |
591 | 592 | 593 | 594 | Test Processing Fees 595 | 596 | 597 | Test Processing Instructions 598 | 599 | 600 | 601 |
602 |
603 |
604 |
605 | 606 | 607 | 608 | 609 | 610 | Test Dataset Completeness 611 | 612 | 613 | 614 | 615 | 616 | 617 | Test Attribute Accuracy 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | Process Step Description 1 627 | 628 | 629 | Process Step Date 1 630 | 631 | 632 | 633 | 634 | 635 | 636 | <CharacterString>Process Step Sources 1 (not used)</CharacterString> 637 | 638 | 639 | Process Step Sources 1.1 640 | 641 | 642 | Process Step Sources 1.2 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 652 | 653 | Process Step Description 2 654 | 655 | 656 | 657 | 658 | 659 | 660 | Process Step Date 3 661 | 662 | 663 | 664 | 665 | 666 | 667 | Process Step Description 4 668 | 669 | 670 | 671 | 672 | 673 | 674 | Process Step Sources 4.1 675 | 676 | 677 | Process Step Sources 4.2 678 | 679 | 680 | 681 | 682 | 683 | 684 | 685 | 686 | 687 | 688 | 689 | 690 | 691 | 692 | 693 | 694 | 695 | 696 | 697 | 698 | Test Vertical Count 699 | 700 | 701 | 702 | 703 | 704 | 705 | Test X Resolution 706 | 707 | 708 | column 709 | 710 | 711 | Test Column Count 712 | 713 | 714 | 715 | 716 | 717 | 718 | Test Y Resolution 719 | 720 | 721 | 722 | 723 | 724 | Test Row Count 725 | 726 | 727 | 728 | 729 | Test # Dimensions 730 | 731 | 732 | 733 |
734 | -------------------------------------------------------------------------------- /gis_metadata/tests/data/utility_metadata.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Test Abstract 6 | Test Purpose 7 | Test Supplementary Info 8 | 9 | 10 | 11 | Test Originators 12 | Test Publish Date 13 | Test Title 14 | http://test.onlinelinkages.org 15 | Test Other Citation Info 16 | 17 | 18 | 19 | 20 | 21 | 22 | Larger Works Originator 23 | Larger Works Date 24 | Larger Works Title 25 | Larger Works Edition 26 | Larger Works Other Citation 27 | http://test.largerworks.online.linkage.com 28 | 29 | Larger Works Info 30 | Larger Works Place 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | Date Range Start 1 40 | Date Range Start 2 41 | Date Range End 1 42 | Date Range End 2 43 | 44 | 45 | 46 | Multiple Date 1 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | -179.99999999998656 59 | 179.99999999998656 60 | 87.81211601444309 61 | -86.78249642712764 62 | 63 | 64 | 65 | 66 | Oregon 67 | Washington 68 | 69 | 70 | Layer One 71 | Layer Two 72 | 73 | 74 | Now 75 | Later 76 | 77 | 78 | Ecoregion 79 | Risk 80 | Threat 81 | Habitat 82 | 83 | 84 | Test Use Constraints 85 | Test Data Credits 86 | 87 | 88 | Contact Email 1 89 | Contact Position 1 90 | 91 | Contact Name 1 92 | Contact Organization 1 93 | 94 | 95 | 96 | 97 | 98 | Contact Email 2 99 | Contact Position 2 100 | 101 | Contact Name 2 102 | Contact Organization 2 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | Test Distribution Org 112 | Test Distribution Person 113 | 114 | 115 | Test Distribution Address Type 116 |
Test Distribution Address
117 | Test Distribution City 118 | OR 119 | 12345 120 | US 121 |
122 | Test Distribution Phone 123 | Test Distribution Email 124 |
125 |
126 | Test Resource Description 127 | Test Distribution Liability 128 | 129 | 130 | 131 | Digital Form Name 1 132 | Digital Form Version 1 133 | Digital Form Content 1 134 | Digital Form Specification 1 135 | Digital Form Decompression 1 136 | 137 | 138 | 139 | 140 | 141 | Digital Form Resource 1 142 | 143 | 144 | Digital Form Access Instructions 1 145 | Digital Form Access Description 1 146 | 147 | 148 | 149 | 150 | 151 | Digital Form Name 2 152 | Digital Form Version 2 153 | Digital Form Content 2 154 | Digital Form Specification 2 155 | Digital Form Decompression 2 156 | 157 | 158 | 159 | 160 | 161 | Digital Form Resource 2 162 | 163 | 164 | Digital Form Access Instructions 2 165 | Digital Form Access Description 2 166 | 167 | 168 | 169 | Test Processing Fees 170 | Test Processing Instructions 171 | 172 | Test Technical Prerequisites 173 |
174 | 175 | 176 | 177 | Attributes Label 1 178 | Attributes Alias 1 179 | Attributes Definition 1 180 | Attributes Definition Source 1 181 | 182 | 183 | 184 | Attributes Label 2 185 | Attributes Alias 2 186 | Attributes Definition 2 187 | Attributes Definition Source 2 188 | 189 | 190 | 191 | Attributes Label 3 192 | Attributes Alias 3 193 | Attributes Definition 3 194 | Attributes Definition Source 3 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 20070210 203 | 204 | 205 | 206 | 20070418 207 | 208 | 209 | 210 | Process Step Description 1 211 | Process Step Date 1 212 | Process Step Sources 1.1 213 | Process Step Sources 1.2 214 | 215 | 216 | Process Step Description 2 217 | 218 | 219 | 220 | Process Step Date 3 221 | 222 | 223 | 224 | 225 | Process Step Description 4 226 | 227 | Process Step Sources 4.1 228 | Process Step Sources 4.2 229 | 230 | 231 | Test Dataset Completeness 232 | 233 | Test Attribute Accuracy 234 | 235 | 236 | 237 | 238 | Test Column Count 239 | Test Vertical Count 240 | Test Row Count 241 | Test # Dimensions 242 | 243 | 244 | 245 | 246 | 247 | 248 | Test Backup Y Resolution 249 | Test Backup X Resolution 250 | 251 | 252 | 253 | 254 | Test Y Resolution 255 | Test X Resolution 256 | 257 | 258 | 259 | 260 | 261 |
262 | -------------------------------------------------------------------------------- /gis_metadata/utils.py: -------------------------------------------------------------------------------- 1 | """ Data structures and functionality used by all Metadata Parsers """ 2 | 3 | from frozendict import frozendict 4 | 5 | from parserutils.collections import filter_empty, flatten_items, reduce_value, wrap_value 6 | from parserutils.elements import get_element, get_elements, get_elements_attributes, get_elements_text 7 | from parserutils.elements import insert_element, remove_element 8 | from parserutils.elements import remove_element_attributes, set_element_attributes 9 | from parserutils.elements import XPATH_DELIM 10 | 11 | from gis_metadata.exceptions import ConfigurationError, ValidationError 12 | 13 | 14 | # Generic identifying property name constants 15 | 16 | KEYWORDS_PLACE = 'place_keywords' 17 | KEYWORDS_STRATUM = 'stratum_keywords' 18 | KEYWORDS_TEMPORAL = 'temporal_keywords' 19 | KEYWORDS_THEME = 'thematic_keywords' 20 | 21 | 22 | # Identifying property name constants for all complex definitions 23 | 24 | ATTRIBUTES = 'attributes' 25 | BOUNDING_BOX = 'bounding_box' 26 | CONTACTS = 'contacts' 27 | DATES = 'dates' 28 | DIGITAL_FORMS = 'digital_forms' 29 | LARGER_WORKS = 'larger_works' 30 | PROCESS_STEPS = 'process_steps' 31 | RASTER_INFO = 'raster_info' 32 | RASTER_DIMS = '_raster_dims' 33 | 34 | 35 | # Grouping property name constants for complex definitions 36 | 37 | _COMPLEX_DELIM = '\n' 38 | _COMPLEX_LISTS = frozenset({ 39 | ATTRIBUTES, CONTACTS, DIGITAL_FORMS, PROCESS_STEPS, 40 | KEYWORDS_PLACE, KEYWORDS_STRATUM, KEYWORDS_TEMPORAL, KEYWORDS_THEME, 41 | }) 42 | _COMPLEX_STRUCTS = frozenset({BOUNDING_BOX, DATES, LARGER_WORKS, RASTER_INFO}) 43 | _COMPLEX_WITH_MULTI = frozendict({ 44 | DATES: {'values'}, 45 | LARGER_WORKS: {'origin'}, 46 | PROCESS_STEPS: {'sources'} 47 | }) 48 | 49 | 50 | # Date specific constants for the DATES complex structure 51 | 52 | DATE_TYPE_MISSING = '' 53 | DATE_TYPE_MULTIPLE = 'multiple' 54 | DATE_TYPE_RANGE = 'range' 55 | DATE_TYPE_RANGE_BEGIN = 'range_begin' 56 | DATE_TYPE_RANGE_END = 'range_end' 57 | DATE_TYPE_SINGLE = 'single' 58 | 59 | DATE_TYPES = ( 60 | DATE_TYPE_MISSING, DATE_TYPE_SINGLE, DATE_TYPE_MULTIPLE, DATE_TYPE_RANGE 61 | ) 62 | 63 | DATE_TYPE = 'type' 64 | DATE_VALUES = 'values' 65 | 66 | 67 | # To add a new complex definition field: 68 | # 1. Create a new constant representing the property name for the field 69 | # 2. Create a new item in COMPLEX_DEFINITIONS that represents the structure of the field 70 | # 3. If required by all metadata parsers, add the constant to SUPPORTED_PROPS 71 | # 4. Update the target metadata parsers with a parse and update method for the new field 72 | # 5. Update the target metadata parsers' _init_data_map method to instantiate a ParserProperty 73 | # 6. Create a new validation method for the type if validate_complex or validate_complex_list won't suffice 74 | # 7. Update validate_any to recognize the constant and call the intended validation method 75 | 76 | COMPLEX_DEFINITIONS = frozendict({ 77 | ATTRIBUTES: frozendict({ 78 | 'label': '{label}', # Text 79 | 'aliases': '{aliases}', # Text 80 | 'definition': '{definition}', # Text 81 | 'definition_source': '{definition_src}' # Text 82 | }), 83 | BOUNDING_BOX: frozendict({ 84 | 'east': '{east}', # Text 85 | 'south': '{south}', # Text 86 | 'west': '{west}', # Text 87 | 'north': '{north}' # Text 88 | }), 89 | CONTACTS: frozendict({ 90 | 'name': '{name}', # Text 91 | 'email': '{email}', # Text 92 | 'organization': '{organization}', # Text 93 | 'position': '{position}' # Text 94 | }), 95 | DATES: frozendict({ 96 | DATE_TYPE: '{type}', # Text 97 | DATE_VALUES: '{values}' # Text [] 98 | }), 99 | DIGITAL_FORMS: frozendict({ 100 | 'name': '{name}', # Text 101 | 'content': '{content}', # Text 102 | 'decompression': '{decompression}', # Text 103 | 'version': '{version}', # Text 104 | 'specification': '{specification}', # Text 105 | 'access_desc': '{access_desc}', # Text 106 | 'access_instrs': '{access_instrs}', # Text 107 | 'network_resource': '{network_resource}' # Text 108 | }), 109 | LARGER_WORKS: frozendict({ 110 | 'title': '{title}', # Text 111 | 'edition': '{edition}', # Text 112 | 'origin': '{origin}', # Text [] 113 | 'online_linkage': '{online_linkage}', # Text 114 | 'other_citation': '{other_citation}', # Text 115 | 'publish_date': '{date}', # Text 116 | 'publish_place': '{place}', # Text 117 | 'publish_info': '{info}' # Text 118 | }), 119 | PROCESS_STEPS: frozendict({ 120 | 'description': '{description}', # Text 121 | 'date': '{date}', # Text 122 | 'sources': '{sources}' # Text [] 123 | }), 124 | RASTER_INFO: frozendict({ 125 | 'dimensions': '{dimensions}', # Text 126 | 'row_count': '{row_count}', # Text 127 | 'column_count': '{column_count}', # Text 128 | 'vertical_count': '{vertical_count}', # Text 129 | 'x_resolution': '{x_resolution}', # Text 130 | 'y_resolution': '{y_resolution}', # Text 131 | }), 132 | RASTER_DIMS: frozendict({ 133 | # Captures dimension data for raster_info 134 | 'type': '{type}', # Text 135 | 'size': '{size}', # Text 136 | 'value': '{value}', # Text 137 | 'units': '{units}' # Text 138 | }) 139 | }) 140 | 141 | # A set of identifying property names that must be supported by all parsers 142 | 143 | SUPPORTED_PROPS = frozenset({ 144 | 'title', 'abstract', 'purpose', 'other_citation_info', 'supplementary_info', 145 | 'online_linkages', 'originators', 'publish_date', 'data_credits', 'digital_forms', 146 | 'dist_contact_org', 'dist_contact_person', 'dist_email', 'dist_phone', 147 | 'dist_address', 'dist_address_type', 'dist_city', 'dist_state', 'dist_postal', 'dist_country', 148 | 'dist_liability', 'processing_fees', 'processing_instrs', 'resource_desc', 'tech_prerequisites', 149 | ATTRIBUTES, 'attribute_accuracy', BOUNDING_BOX, CONTACTS, 'dataset_completeness', 150 | LARGER_WORKS, PROCESS_STEPS, RASTER_INFO, 'use_constraints', 151 | DATES, KEYWORDS_PLACE, KEYWORDS_STRATUM, KEYWORDS_TEMPORAL, KEYWORDS_THEME 152 | }) 153 | 154 | 155 | def format_xpaths(xpath_map, *args, **kwargs): 156 | """ :return: a copy of xpath_map, but with XPATHs formatted with ordered or keyword values """ 157 | 158 | formatted = {}.fromkeys(xpath_map) 159 | 160 | for key, xpath in xpath_map.items(): 161 | formatted[key] = xpath.format(*args, **kwargs) 162 | 163 | return formatted 164 | 165 | 166 | def get_xpath_root(xpath): 167 | """ :return: the base of an XPATH: the part preceding any format keys or attribute references """ 168 | 169 | if xpath: 170 | if xpath.startswith('@'): 171 | xpath = '' 172 | else: 173 | index = xpath.find('/@' if '@' in xpath else '/{') 174 | xpath = xpath[:index] if index >= 0 else xpath 175 | 176 | return xpath 177 | 178 | 179 | def get_xpath_branch(xroot, xpath): 180 | """ :return: the relative part of an XPATH: that which extends past the root provided """ 181 | 182 | if xroot and xpath and xpath.startswith(xroot): 183 | xpath = xpath[len(xroot):] 184 | xpath = xpath.lstrip(XPATH_DELIM) 185 | 186 | return xpath 187 | 188 | 189 | def get_xpath_tuple(xpath): 190 | """ :return: a tuple with the base of an XPATH followed by any format key or attribute reference """ 191 | 192 | xroot = get_xpath_root(xpath) 193 | xattr = None 194 | 195 | if xroot != xpath: 196 | xattr = get_xpath_branch(xroot, xpath).strip('@') 197 | 198 | return (xroot, xattr) 199 | 200 | 201 | def get_default_for(prop, value): 202 | """ Ensures complex property types have the correct default values """ 203 | 204 | prop = prop.strip('_') # Handle alternate props (leading underscores) 205 | val = reduce_value(value) # Filtering of value happens here 206 | 207 | if prop in _COMPLEX_LISTS: 208 | return wrap_value(val) 209 | elif prop in _COMPLEX_STRUCTS: 210 | return val or {} 211 | else: 212 | return u'' if val is None else val 213 | 214 | 215 | def get_default_for_complex(prop, value, xpath=''): 216 | 217 | # Ensure sub-props of complex structs and complex lists that take multiple values are wrapped as lists 218 | val = [ 219 | {k: get_default_for_complex_sub(prop, k, v, xpath) for k, v in val.items()} 220 | for val in wrap_value(value) 221 | ] 222 | 223 | return val if prop in _COMPLEX_LISTS else reduce_value(val, {}) 224 | 225 | 226 | def get_default_for_complex_sub(prop, subprop, value, xpath): 227 | 228 | # Handle alternate props (leading underscores) 229 | prop = prop.strip('_') 230 | subprop = subprop.strip('_') 231 | 232 | value = wrap_value(value) 233 | if subprop in _COMPLEX_WITH_MULTI.get(prop, ''): 234 | return value # Leave sub properties allowing lists wrapped 235 | 236 | # Join on comma for element attribute values; newline for element text values 237 | return ','.join(value) if '@' in xpath else _COMPLEX_DELIM.join(value) 238 | 239 | 240 | def has_property(elem_to_parse, xpath): 241 | """ 242 | Parse xpath for any attribute reference "path/@attr" and check for root and presence of attribute. 243 | :return: True if xpath is present in the element along with any attribute referenced, otherwise False 244 | """ 245 | 246 | xroot, attr = get_xpath_tuple(xpath) 247 | 248 | if not xroot and not attr: 249 | return False 250 | elif not attr: 251 | return bool(get_elements_text(elem_to_parse, xroot)) 252 | else: 253 | return bool(get_elements_attributes(elem_to_parse, xroot, attr)) 254 | 255 | 256 | def parse_complex(tree_to_parse, xpath_root, xpath_map, complex_key): 257 | """ 258 | Creates and returns a Dictionary data structure parsed from the metadata. 259 | :param tree_to_parse: the XML tree compatible with element_utils to be parsed 260 | :param xpath_root: the XPATH location of the structure inside the parent element 261 | :param xpath_map: a dict of XPATHs corresponding to a complex definition 262 | :param complex_key: indicates which complex definition describes the structure 263 | """ 264 | 265 | complex_struct = {} 266 | 267 | for prop in COMPLEX_DEFINITIONS.get(complex_key, xpath_map): 268 | # Normalize complex values: treat values with newlines like values from separate elements 269 | parsed = parse_property(tree_to_parse, xpath_root, xpath_map, prop) 270 | parsed = reduce_value(flatten_items(v.split(_COMPLEX_DELIM) for v in wrap_value(parsed))) 271 | 272 | complex_struct[prop] = get_default_for_complex_sub(complex_key, prop, parsed, xpath_map[prop]) 273 | 274 | return complex_struct if any(complex_struct.values()) else {} 275 | 276 | 277 | def parse_complex_list(tree_to_parse, xpath_root, xpath_map, complex_key): 278 | """ 279 | Creates and returns a list of Dictionary data structures parsed from the metadata. 280 | :param tree_to_parse: the XML tree compatible with element_utils to be parsed 281 | :param xpath_root: the XPATH location of each structure inside the parent element 282 | :param xpath_map: a dict of XPATHs corresponding to a complex definition 283 | :param complex_key: indicates which complex definition describes each structure 284 | """ 285 | 286 | complex_list = [] 287 | 288 | for element in get_elements(tree_to_parse, xpath_root): 289 | complex_struct = parse_complex(element, xpath_root, xpath_map, complex_key) 290 | if complex_struct: 291 | complex_list.append(complex_struct) 292 | 293 | return complex_list 294 | 295 | 296 | def parse_dates(tree_to_parse, xpath_map): 297 | """ 298 | Creates and returns a Dates Dictionary data structure given the parameters provided 299 | :param tree_to_parse: the XML tree from which to construct the Dates data structure 300 | :param xpath_map: a map containing the following type-specific XPATHs: 301 | multiple, range, range_begin, range_end, and single 302 | """ 303 | 304 | # Determine dates to query based on metadata elements 305 | 306 | values = wrap_value(parse_property(tree_to_parse, None, xpath_map, DATE_TYPE_SINGLE)) 307 | if len(values) == 1: 308 | return {DATE_TYPE: DATE_TYPE_SINGLE, DATE_VALUES: values} 309 | elif len(values) > 1: 310 | return {DATE_TYPE: DATE_TYPE_MULTIPLE, DATE_VALUES: values} 311 | 312 | values = wrap_value(parse_property(tree_to_parse, None, xpath_map, DATE_TYPE_MULTIPLE)) 313 | if len(values) == 1: 314 | return {DATE_TYPE: DATE_TYPE_SINGLE, DATE_VALUES: values} 315 | elif len(values) > 1: 316 | return {DATE_TYPE: DATE_TYPE_MULTIPLE, DATE_VALUES: values} 317 | 318 | values = flatten_items( 319 | d for x in (DATE_TYPE_RANGE_BEGIN, DATE_TYPE_RANGE_END) 320 | for d in wrap_value(parse_property(tree_to_parse, None, xpath_map, x)) 321 | ) 322 | if len(values) == 1: 323 | return {DATE_TYPE: DATE_TYPE_SINGLE, DATE_VALUES: values} 324 | elif len(values) == 2: 325 | return {DATE_TYPE: DATE_TYPE_RANGE, DATE_VALUES: values} 326 | elif len(values) > 2: 327 | return {DATE_TYPE: DATE_TYPE_MULTIPLE, DATE_VALUES: values} 328 | 329 | return {} 330 | 331 | 332 | def parse_property(tree_to_parse, xpath_root, xpath_map, prop): 333 | """ 334 | Defines the default parsing behavior for metadata values. 335 | :param tree_to_parse: the XML tree compatible with element_utils to be parsed 336 | :param xpath_root: used to determine the relative XPATH location within the parent element 337 | :param xpath_map: a dict of XPATHs that may contain alternate locations for a property 338 | :param prop: the property to parse: corresponds to a key in xpath_map 339 | """ 340 | 341 | xpath = xpath_map[prop] 342 | 343 | if isinstance(xpath, ParserProperty): 344 | if xpath.xpath is None: 345 | return xpath.get_prop(prop) 346 | 347 | xpath = xpath.xpath 348 | 349 | if xpath_root: 350 | xpath = get_xpath_branch(xpath_root, xpath) 351 | 352 | parsed = None 353 | 354 | if not has_property(tree_to_parse, xpath): 355 | # Element has no text: try next alternate location 356 | 357 | alternate = '_' + prop 358 | if alternate in xpath_map: 359 | return parse_property(tree_to_parse, xpath_root, xpath_map, alternate) 360 | 361 | elif '@' not in xpath: 362 | parsed = get_elements_text(tree_to_parse, xpath) 363 | else: 364 | xroot, xattr = get_xpath_tuple(xpath) 365 | parsed = get_elements_attributes(tree_to_parse, xroot, xattr) 366 | 367 | return get_default_for(prop, parsed) 368 | 369 | 370 | def update_property(tree_to_update, xpath_root, xpaths, prop, values, supported=None): 371 | """ 372 | Either update the tree the default way, or call the custom updater 373 | 374 | Default Way: Existing values in the tree are overwritten. If xpaths contains a single path, 375 | then each value is written to the tree at that path. If xpaths contains a list of xpaths, 376 | then the values corresponding to each xpath index are written to their respective locations. 377 | In either case, empty values are ignored. 378 | 379 | :param tree_to_update: the XML tree compatible with element_utils to be updated 380 | :param xpath_root: the XPATH location shared by all the xpaths passed in 381 | :param xpaths: a string or a list of strings representing the XPATH location(s) to which to write values 382 | :param prop: the name of the property of the parser containing the value(s) with which to update the tree 383 | :param values: a single value, or a list of values to write to the specified XPATHs 384 | 385 | :see: ParserProperty for more on custom updaters 386 | 387 | :return: a list of all elements updated by this operation 388 | """ 389 | 390 | if supported and prop.startswith('_') and prop.strip('_') in supported: 391 | values = u'' # Remove alternate elements: write values only to primary location 392 | else: 393 | values = get_default_for(prop, values) # Enforce defaults as required per property 394 | 395 | if not xpaths: 396 | return [] 397 | elif not isinstance(xpaths, ParserProperty): 398 | return _update_property(tree_to_update, xpath_root, xpaths, values) 399 | else: 400 | # Call ParserProperty.set_prop without xpath_root (managed internally) 401 | return xpaths.set_prop(tree_to_update=tree_to_update, prop=prop, values=values) 402 | 403 | 404 | def _update_property(tree_to_update, xpath_root, xpaths, values): 405 | """ 406 | Default update operation for a single parser property. If xpaths contains one xpath, 407 | then one element per value will be inserted at that location in the tree_to_update; 408 | otherwise, the number of values must match the number of xpaths. 409 | """ 410 | 411 | # Inner function to update a specific XPATH with the values provided 412 | 413 | def update_element(elem, idx, root, path, vals): 414 | """ Internal helper function to encapsulate single item update """ 415 | 416 | has_root = bool(root and len(path) > len(root) and path.startswith(root)) 417 | path, attr = get_xpath_tuple(path) # 'path/@attr' to ('path', 'attr') 418 | 419 | if attr: 420 | removed = [get_element(elem, path)] 421 | remove_element_attributes(removed[0], attr) 422 | elif not has_root: 423 | removed = wrap_value(remove_element(elem, path)) 424 | else: 425 | path = get_xpath_branch(root, path) 426 | removed = [] if idx != 0 else [remove_element(e, path, True) for e in get_elements(elem, root)] 427 | 428 | if not vals: 429 | return removed 430 | 431 | items = [] 432 | 433 | for i, val in enumerate(wrap_value(vals)): 434 | elem_to_update = elem 435 | 436 | if has_root: 437 | elem_to_update = insert_element(elem, (i + idx), root) 438 | 439 | val = val.decode('utf-8') if not isinstance(val, str) else val 440 | if not attr: 441 | items.append(insert_element(elem_to_update, i, path, val)) 442 | elif path: 443 | items.append(insert_element(elem_to_update, i, path, **{attr: val})) 444 | else: 445 | set_element_attributes(elem_to_update, **{attr: val}) 446 | items.append(elem_to_update) 447 | 448 | return items 449 | 450 | # Code to update each of the XPATHs with each of the values 451 | 452 | xpaths = reduce_value(xpaths) 453 | values = filter_empty(values) 454 | 455 | if isinstance(xpaths, str): 456 | return update_element(tree_to_update, 0, xpath_root, xpaths, values) 457 | else: 458 | each = [] 459 | 460 | for index, xpath in enumerate(xpaths): 461 | value = values[index] if values else None 462 | each.extend(update_element(tree_to_update, index, xpath_root, xpath, value)) 463 | 464 | return each 465 | 466 | 467 | def update_complex(tree_to_update, xpath_root, xpath_map, prop, values): 468 | """ 469 | Updates and returns the updated complex Element parsed from tree_to_update. 470 | :param tree_to_update: the XML tree compatible with element_utils to be updated 471 | :param xpath_root: the XPATH location of the root of the complex Element 472 | :param xpath_map: a Dictionary of XPATHs corresponding to the complex structure definition 473 | :param prop: the property identifying the complex structure to be serialized 474 | :param values: a Dictionary representing the complex structure to be updated 475 | """ 476 | 477 | remove_element(tree_to_update, xpath_root, True) 478 | 479 | values = reduce_value(values, {}) 480 | 481 | if not values: 482 | # Returns the elements corresponding to property removed from the tree 483 | updated = update_property(tree_to_update, xpath_root, xpath_root, prop, values) 484 | else: 485 | for subprop, value in values.items(): 486 | xpath = xpath_map[subprop] 487 | value = get_default_for_complex_sub(prop, subprop, value, xpath) 488 | update_property(tree_to_update, None, xpath, subprop, value) 489 | updated = get_element(tree_to_update, xpath_root) 490 | 491 | return updated 492 | 493 | 494 | def update_complex_list(tree_to_update, xpath_root, xpath_map, prop, values): 495 | """ 496 | Updates and returns the list of updated complex Elements parsed from tree_to_update. 497 | :param tree_to_update: the XML tree compatible with element_utils to be updated 498 | :param xpath_root: the XPATH location of each complex Element 499 | :param xpath_map: a Dictionary of XPATHs corresponding to the complex structure definition 500 | :param prop: the property identifying the complex structure to be serialized 501 | :param values: a List containing the updated complex structures as Dictionaries 502 | """ 503 | 504 | complex_list = [] 505 | 506 | remove_element(tree_to_update, xpath_root, True) 507 | 508 | if not values: 509 | # Returns the elements corresponding to property removed from the tree 510 | complex_list.append(update_property(tree_to_update, xpath_root, xpath_root, prop, values)) 511 | else: 512 | for idx, complex_struct in enumerate(wrap_value(values)): 513 | 514 | # Insert a new complex element root for each dict in the list 515 | complex_element = insert_element(tree_to_update, idx, xpath_root) 516 | 517 | for subprop, value in complex_struct.items(): 518 | xpath = get_xpath_branch(xpath_root, xpath_map[subprop]) 519 | value = get_default_for_complex_sub(prop, subprop, value, xpath) 520 | complex_list.append(update_property(complex_element, None, xpath, subprop, value)) 521 | 522 | return complex_list 523 | 524 | 525 | def validate_any(prop, value, xpath_map=None): 526 | """ Validates any metadata property, complex or simple (string or array) """ 527 | 528 | if value is not None: 529 | if prop in (ATTRIBUTES, CONTACTS, DIGITAL_FORMS): 530 | validate_complex_list(prop, value, xpath_map) 531 | 532 | elif prop in (BOUNDING_BOX, LARGER_WORKS, RASTER_INFO): 533 | validate_complex(prop, value, xpath_map) 534 | 535 | elif prop == DATES: 536 | validate_dates(prop, value, xpath_map) 537 | 538 | elif prop == PROCESS_STEPS: 539 | validate_process_steps(prop, value) 540 | 541 | elif prop not in SUPPORTED_PROPS and xpath_map is not None: 542 | # Validate custom data structures as complex lists by default 543 | validate_complex_list(prop, value, xpath_map) 544 | 545 | else: 546 | for val in wrap_value(value, include_empty=True): 547 | validate_type(prop, val, (str, list)) 548 | 549 | 550 | def validate_complex(prop, value, xpath_map=None): 551 | """ Default validation for single complex data structure """ 552 | 553 | if value is not None: 554 | validate_type(prop, value, dict) 555 | 556 | if prop in COMPLEX_DEFINITIONS: 557 | complex_keys = COMPLEX_DEFINITIONS[prop] 558 | else: 559 | complex_keys = {} if xpath_map is None else xpath_map 560 | 561 | for complex_prop, complex_val in value.items(): 562 | complex_key = '.'.join((prop, complex_prop)) 563 | 564 | if complex_prop not in complex_keys: 565 | _validation_error(prop, None, value, ('keys: {0}'.format(','.join(complex_keys)))) 566 | 567 | validate_type(complex_key, complex_val, (str, list)) 568 | 569 | 570 | def validate_complex_list(prop, value, xpath_map=None): 571 | """ Default validation for Attribute Details data structure """ 572 | 573 | if value is not None: 574 | validate_type(prop, value, (dict, list)) 575 | 576 | if prop in COMPLEX_DEFINITIONS: 577 | complex_keys = COMPLEX_DEFINITIONS[prop] 578 | else: 579 | complex_keys = {} if xpath_map is None else xpath_map 580 | 581 | for idx, complex_struct in enumerate(wrap_value(value)): 582 | cs_idx = prop + '[' + str(idx) + ']' 583 | validate_type(cs_idx, complex_struct, dict) 584 | 585 | for cs_prop, cs_val in complex_struct.items(): 586 | cs_key = '.'.join((cs_idx, cs_prop)) 587 | 588 | if cs_prop not in complex_keys: 589 | _validation_error(prop, None, value, ('keys: {0}'.format(','.join(complex_keys)))) 590 | 591 | if not isinstance(cs_val, list): 592 | validate_type(cs_key, cs_val, (str, list)) 593 | else: 594 | for list_idx, list_val in enumerate(cs_val): 595 | list_prop = cs_key + '[' + str(list_idx) + ']' 596 | validate_type(list_prop, list_val, str) 597 | 598 | 599 | def validate_dates(prop, value, xpath_map=None): 600 | """ Default validation for Date Types data structure """ 601 | 602 | if value is not None: 603 | validate_type(prop, value, dict) 604 | 605 | date_keys = set(value) 606 | 607 | if date_keys: 608 | if DATE_TYPE not in date_keys or DATE_VALUES not in date_keys: 609 | if prop in COMPLEX_DEFINITIONS: 610 | complex_keys = COMPLEX_DEFINITIONS[prop] 611 | else: 612 | complex_keys = COMPLEX_DEFINITIONS[DATES] if xpath_map is None else xpath_map 613 | 614 | _validation_error(prop, None, value, ('keys: {0}'.format(','.join(complex_keys)))) 615 | 616 | date_type = value[DATE_TYPE] 617 | 618 | if date_type not in DATE_TYPES: 619 | _validation_error('dates.type', None, date_type, DATE_TYPES) 620 | 621 | date_vals = value[DATE_VALUES] 622 | 623 | validate_type('dates.values', date_vals, list) 624 | 625 | dates_len = len(date_vals) 626 | 627 | if date_type == DATE_TYPE_MISSING and dates_len != 0: 628 | _validation_error('len(dates.values)', None, dates_len, 0) 629 | 630 | if date_type == DATE_TYPE_SINGLE and dates_len != 1: 631 | _validation_error('len(dates.values)', None, dates_len, 1) 632 | 633 | if date_type == DATE_TYPE_RANGE and dates_len != 2: 634 | _validation_error('len(dates.values)', None, dates_len, 2) 635 | 636 | if date_type == DATE_TYPE_MULTIPLE and dates_len < 2: 637 | _validation_error('len(dates.values)', None, dates_len, 'at least two') 638 | 639 | for idx, date in enumerate(date_vals): 640 | date_key = 'dates.value[' + str(idx) + ']' 641 | validate_type(date_key, date, str) 642 | 643 | 644 | def validate_process_steps(prop, value): 645 | """ Default validation for Process Steps data structure """ 646 | 647 | if value is not None: 648 | validate_type(prop, value, (dict, list)) 649 | 650 | procstep_keys = COMPLEX_DEFINITIONS[prop] 651 | 652 | for idx, procstep in enumerate(wrap_value(value)): 653 | ps_idx = prop + '[' + str(idx) + ']' 654 | validate_type(ps_idx, procstep, dict) 655 | 656 | for ps_prop, ps_val in procstep.items(): 657 | ps_key = '.'.join((ps_idx, ps_prop)) 658 | 659 | if ps_prop not in procstep_keys: 660 | _validation_error(prop, None, value, ('keys: {0}'.format(','.join(procstep_keys)))) 661 | 662 | if ps_prop != 'sources': 663 | validate_type(ps_key, ps_val, str) 664 | else: 665 | validate_type(ps_key, ps_val, (str, list)) 666 | 667 | for src_idx, src_val in enumerate(wrap_value(ps_val)): 668 | src_key = ps_key + '[' + str(src_idx) + ']' 669 | validate_type(src_key, src_val, str) 670 | 671 | 672 | def validate_properties(props, required): 673 | """ 674 | Ensures the key set contains the base supported properties for a Parser 675 | :param props: a set of property names to validate against those supported 676 | """ 677 | 678 | props = set(props) 679 | required = set(required or SUPPORTED_PROPS) 680 | 681 | if len(required.intersection(props)) < len(required): 682 | missing = required - props 683 | raise ValidationError( 684 | 'Missing property names: {props}', props=','.join(missing), missing=missing 685 | ) 686 | 687 | 688 | def validate_type(prop, value, expected): 689 | """ Default validation for all types """ 690 | 691 | # Validate on expected type(s), but ignore None: defaults handled elsewhere 692 | if value is not None and not isinstance(value, expected): 693 | _validation_error(prop, type(value).__name__, None, expected) 694 | 695 | 696 | def _validation_error(prop, prop_type, prop_value, expected): 697 | """ Default validation for updated properties """ 698 | 699 | if prop_type is None: 700 | attrib = 'value' 701 | assigned = prop_value 702 | else: 703 | attrib = 'type' 704 | assigned = prop_type 705 | 706 | raise ValidationError( 707 | 'Invalid property {attrib} for {prop}:\n\t{attrib}: {assigned}\n\texpected: {expected}', 708 | attrib=attrib, prop=prop, assigned=assigned, expected=expected, 709 | invalid={prop: prop_value} if attrib == 'value' else {} 710 | ) 711 | 712 | 713 | class ParserProperty(object): 714 | """ 715 | A class to manage Parser dynamic getter & setters. 716 | Usually an XPATH is sufficient to define reads and writes, 717 | but for complex data structures more processing is necessary. 718 | """ 719 | 720 | def __init__(self, prop_parser, prop_updater, xpath=None): 721 | """ Initialize with callables for getting and setting """ 722 | 723 | if hasattr(prop_parser, '__call__'): 724 | self._parser = prop_parser 725 | elif xpath is not None: 726 | self._parser = None 727 | else: 728 | raise ConfigurationError( 729 | 'Invalid property getter:\n\tpassed in: {param}\n\texpected: {expected}', 730 | param=type(prop_parser), expected=' or provide XPATH' 731 | ) 732 | 733 | if hasattr(prop_updater, '__call__'): 734 | self._updater = prop_updater 735 | else: 736 | raise ConfigurationError( 737 | 'Invalid property setter:\n\tpassed in: {param}\n\texpected: {expected}', 738 | param=type(prop_updater), expected='' 739 | ) 740 | 741 | self.xpath = xpath 742 | 743 | def get_prop(self, prop): 744 | """ Calls the getter with no arguments and returns its value """ 745 | 746 | if self._parser is None: 747 | raise ConfigurationError('Cannot call ParserProperty."get_prop" with no parser configured') 748 | 749 | return self._parser(prop) if prop else self._parser() 750 | 751 | def set_prop(self, **setter_args): 752 | """ 753 | Calls the setter with the specified keyword arguments for flexibility. 754 | :param setter_args: must contain tree_to_update, prop, values 755 | :return: None, or the value updated for complex values 756 | """ 757 | 758 | if self.xpath: 759 | setter_args['xpaths'] = self.xpath 760 | 761 | return self._updater(**setter_args) 762 | -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | name = "defusedxml" 3 | version = "0.7.1" 4 | description = "XML bomb protection for Python stdlib modules" 5 | category = "main" 6 | optional = false 7 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 8 | 9 | [[package]] 10 | name = "frozendict" 11 | version = "2.1.2" 12 | description = "A simple immutable dictionary" 13 | category = "main" 14 | optional = false 15 | python-versions = ">=3.6" 16 | 17 | [[package]] 18 | name = "mock" 19 | version = "4.0.3" 20 | description = "Rolling backport of unittest.mock for all Pythons" 21 | category = "dev" 22 | optional = false 23 | python-versions = ">=3.6" 24 | 25 | [package.extras] 26 | build = ["twine", "wheel", "blurb"] 27 | docs = ["sphinx"] 28 | test = ["pytest (<5.4)", "pytest-cov"] 29 | 30 | [[package]] 31 | name = "parserutils" 32 | version = "2.0.1" 33 | description = "A collection of performant parsing utilities" 34 | category = "main" 35 | optional = false 36 | python-versions = ">=3.6,<4.0" 37 | 38 | [package.dependencies] 39 | defusedxml = ">=0.7.1,<0.8.0" 40 | python-dateutil = ">=2.8.2,<3.0.0" 41 | 42 | [[package]] 43 | name = "pipdeptree" 44 | version = "2.2.0" 45 | description = "Command line utility to show dependency tree of packages" 46 | category = "dev" 47 | optional = false 48 | python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*" 49 | 50 | [package.extras] 51 | graphviz = ["graphviz"] 52 | 53 | [[package]] 54 | name = "python-dateutil" 55 | version = "2.8.2" 56 | description = "Extensions to the standard Python datetime module" 57 | category = "main" 58 | optional = false 59 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" 60 | 61 | [package.dependencies] 62 | six = ">=1.5" 63 | 64 | [[package]] 65 | name = "six" 66 | version = "1.16.0" 67 | description = "Python 2 and 3 compatibility utilities" 68 | category = "main" 69 | optional = false 70 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" 71 | 72 | [metadata] 73 | lock-version = "1.1" 74 | python-versions = "^3.6" 75 | content-hash = "1f23dcbcdbe374f021649194134cc936d575bd924d115b583a8e0ad1c7e4efb6" 76 | 77 | [metadata.files] 78 | defusedxml = [ 79 | {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"}, 80 | {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, 81 | ] 82 | frozendict = [ 83 | {file = "frozendict-2.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3cbb8bd5ddbdd1db1caa670586b50f9e665e60b1c095add5aa04d9e2bedf5f00"}, 84 | {file = "frozendict-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:513a3f3a8ff2767492570c78c158845a1cc7b4c954fc9f78ed313b2463727cae"}, 85 | {file = "frozendict-2.1.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:3b4ac7aafd613959a055818c3e9844822196c874b9c8bf118b6c1419ff1fbbdc"}, 86 | {file = "frozendict-2.1.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb1ebca998c68c3bf28ce70f14a392431bbe01bfa610e3e9e635176d9ecbba71"}, 87 | {file = "frozendict-2.1.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c0e09b7159fee7c6dafd727dec2ea7a135f47458dd3996705f9a64e3fca3bf73"}, 88 | {file = "frozendict-2.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79666516d594ced95a294869988dba2daff89d9d274061f7138e1c49e49428ce"}, 89 | {file = "frozendict-2.1.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3cdc8602e398d28887484226a493ae6e96ff3532b4368d4d49ab96c5ee7eb61d"}, 90 | {file = "frozendict-2.1.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2cfe6d89d2af7b97726a5f789d73b5298eb067a9348a8cc8f0834fdc5349b125"}, 91 | {file = "frozendict-2.1.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:afaaa4fc0343af15f1153d9e09660f6c388ab1d6d01147bbd7f7979e723258df"}, 92 | {file = "frozendict-2.1.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f45f7a724e93653f0d446e0cc99e09bda4835f16de08eb1de3b007537e18daa1"}, 93 | {file = "frozendict-2.1.2-py3-none-any.whl", hash = "sha256:a0650a673ce6e320e8b25a38f4620f42382c081b2088051c81073a67f14bac32"}, 94 | {file = "frozendict-2.1.2.tar.gz", hash = "sha256:2eb92fbe8dde37075ed0e5dd3dac88a850a04ccfc646d20f3412b72220ddbaf2"}, 95 | ] 96 | mock = [ 97 | {file = "mock-4.0.3-py3-none-any.whl", hash = "sha256:122fcb64ee37cfad5b3f48d7a7d51875d7031aaf3d8be7c42e2bee25044eee62"}, 98 | {file = "mock-4.0.3.tar.gz", hash = "sha256:7d3fbbde18228f4ff2f1f119a45cdffa458b4c0dee32eb4d2bb2f82554bac7bc"}, 99 | ] 100 | parserutils = [ 101 | {file = "parserutils-2.0.1-py3-none-any.whl", hash = "sha256:19fd3086fd360b3b53322400a236baebcd8109f0483f7c6396f4855e5d3515b1"}, 102 | {file = "parserutils-2.0.1.tar.gz", hash = "sha256:f927e69779d81db508db98e6e8ec331f90ff31ff2868161d1cce30fffe92bec3"}, 103 | ] 104 | pipdeptree = [ 105 | {file = "pipdeptree-2.2.0-py2-none-any.whl", hash = "sha256:e31bcb4db905fe3df15e7c41bc015a3587ef4bafdd5119b011aae32948c4a371"}, 106 | {file = "pipdeptree-2.2.0-py3-none-any.whl", hash = "sha256:95fb603e46343651342583c337a0ee68d3ccade7a81f5cf6b2fbd8151b79ed80"}, 107 | {file = "pipdeptree-2.2.0.tar.gz", hash = "sha256:21a89e77d6eae635685e8af5ecd56561f092f8216bb290e7ae5362885d611f60"}, 108 | ] 109 | python-dateutil = [ 110 | {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, 111 | {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, 112 | ] 113 | six = [ 114 | {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, 115 | {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, 116 | ] 117 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "gis-metadata-parser" 3 | version = "2.0.1" 4 | description = "Parser for GIS metadata standards including ArcGIS, FGDC and ISO-19115" 5 | authors = ["dharvey-consbio "] 6 | keywords = ["arcgis", "fgdc", "iso", "ISO-19115", "ISO-19139", "gis", "metadata", "parser", "xml", "gis_metadata", "gis_metadata_parser"] 7 | readme = "README.md" 8 | homepage = "https://github.com/consbio/gis-metadata-parser/" 9 | repository = "https://github.com/consbio/gis-metadata-parser/" 10 | license = "BSD" 11 | packages = [ 12 | { include = "gis_metadata" }, 13 | ] 14 | 15 | [tool.poetry.dependencies] 16 | python = "^3.6" 17 | frozendict = "^2.0" 18 | parserutils = "^2.0.1" 19 | 20 | [tool.poetry.dev-dependencies] 21 | mock = "*" 22 | pipdeptree = "*" 23 | 24 | [build-system] 25 | requires = ["poetry-core>=1.0.0"] 26 | build-backend = "poetry.core.masonry.api" 27 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | 4 | from setuptools import Command, setup 5 | 6 | 7 | class RunTests(Command): 8 | user_options = [] 9 | 10 | def initialize_options(self): 11 | pass 12 | 13 | def finalize_options(self): 14 | pass 15 | 16 | def run(self): 17 | errno = subprocess.call([sys.executable, '-m', 'unittest', 'gis_metadata.tests.tests']) 18 | raise SystemExit(errno) 19 | 20 | 21 | with open('README.md') as readme: 22 | long_description = readme.read() 23 | 24 | 25 | setup( 26 | name='gis-metadata-parser', 27 | description='Parser for GIS metadata standards including ArcGIS FGDC and ISO-19115', 28 | long_description=long_description, 29 | long_description_content_type='text/markdown', 30 | keywords='arcgis,fgdc,iso,ISO-19115,ISO-19139,gis,metadata,parser,xml,gis_metadata,gis_metadata_parser', 31 | version='2.0.1', 32 | packages=['gis_metadata'], 33 | install_requires=[ 34 | 'frozendict>=2.0', 'parserutils>=2.0.1' 35 | ], 36 | tests_require=['mock'], 37 | url='https://github.com/consbio/gis-metadata-parser', 38 | license='BSD', 39 | cmdclass={'test': RunTests} 40 | ) 41 | --------------------------------------------------------------------------------