├── .github ├── ISSUE_TEMPLATE │ ├── bug-report.md │ └── feature_request.md ├── dependabot.yml └── workflows │ └── python-app.yml ├── .gitignore ├── CHANGELOG.md ├── CHECKS.md ├── CITATION.cff ├── LICENSE ├── README.md ├── images └── architecture.png ├── pyQuARC ├── __init__.py ├── code │ ├── __init__.py │ ├── base_validator.py │ ├── checker.py │ ├── constants.py │ ├── custom_checker.py │ ├── custom_validator.py │ ├── datetime_validator.py │ ├── downloader.py │ ├── gcmd_validator.py │ ├── scheduler.py │ ├── schema_validator.py │ ├── string_validator.py │ ├── tracker.py │ ├── url_validator.py │ └── utils.py ├── main.py ├── schemas │ ├── MetadataCommon.xsd │ ├── MimeType.csv │ ├── UmmCommon_1.2.xsd │ ├── catalog.xml │ ├── check_messages.json │ ├── check_messages_override.json │ ├── checks.json │ ├── checks_override.json │ ├── chronounits.csv │ ├── dif10_schema.xsd │ ├── echo-c_json.json │ ├── echo-c_schema.xsd │ ├── echo-g_schema.xsd │ ├── granuledataformat.csv │ ├── horizontalresolutionrange.csv │ ├── idnnode.csv │ ├── instruments.csv │ ├── locations.csv │ ├── platforms.csv │ ├── projects.csv │ ├── providers.csv │ ├── rucontenttype.csv │ ├── rule_mapping.json │ ├── rules_override.json │ ├── ruleset.json │ ├── sciencekeywords.csv │ ├── temporalresolutionrange.csv │ ├── umm-c-json-schema.json │ ├── umm-cmn-json-schema.json │ ├── umm-g-json-schema.json │ ├── version.txt │ └── verticalresolutionrange.csv └── version.txt ├── pyproject.toml ├── requirements.txt ├── setup.py └── tests ├── .DS_Store ├── __init__.py ├── common.py ├── fixtures ├── __init__.py ├── bad_syntax_metadata.echo-c ├── checker.py ├── checks_dif10_master_test_file.json ├── checks_echo-c_master_test_file.json ├── checks_echo-g_master_test_file.json ├── checks_umm-c_master_test_file.json ├── checks_umm-g_master_test_file.json ├── common.py ├── custom_checker.py ├── downloader.py ├── no_error_metadata.echo-c ├── test_check_files.py ├── test_cmr_metadata.dif10 ├── test_cmr_metadata.echo-c ├── test_cmr_metadata.echo-g ├── test_cmr_metadata.umm-c ├── test_cmr_metadata.umm-g ├── test_cmr_metadata_echo-c.json └── validator.py ├── test_checker.py ├── test_custom_checker.py ├── test_datetime_validator.py ├── test_downloader.py ├── test_schema_validator.py └── test_string_validator.py /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: slesaad, xhagrg 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Outputs** 24 | If applicable, add outputs to help explain your problem. 25 | 26 | **Additional context** 27 | Add any other context about the problem here. 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is this a new check? Please describe the check and the field.** 11 | : check you need. 12 | 13 | **Describe the checks in steps** 14 | 15 | 16 | **Is there a similar check already implemented? Please list the name of the check** 17 | 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | # Enable version updates for pip 4 | - package-ecosystem: "pip" # See documentation for possible values 5 | directory: "/" # Location of package manifests 6 | schedule: 7 | interval: "weekly" 8 | -------------------------------------------------------------------------------- /.github/workflows/python-app.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python application 5 | 6 | on: [pull_request] 7 | 8 | jobs: 9 | build: 10 | 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Set up Python 3.8 16 | uses: actions/setup-python@v2 17 | with: 18 | python-version: 3.8 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install flake8 pytest 23 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 24 | - name: Lint with flake8 25 | run: | 26 | # stop the build if there are Python syntax errors or undefined names 27 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 28 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 29 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 30 | - name: Test with pytest 31 | run: | 32 | pytest 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .vscode/ 3 | .env/* 4 | env/* 5 | *.ipynb 6 | build/* 7 | dist/* 8 | pyQuARC.egg-info/* 9 | env/* 10 | .venv/* -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | ## v1.2.6 4 | - Created citation file 5 | - Updated README 6 | 7 | ## v1.2.5 8 | - Updated README 9 | - Updated umm-g schema file 10 | 11 | ## v1.2.4 12 | - Updated UMM-C schema file 13 | - Added science_keywords_presence_check 14 | - Added DOI authority presence check for echo-c and umm-c 15 | - Adjusted output message for url_check 16 | - Added orbit fields to rule_mapping for spatial_extent_fulfillment_check 17 | - Resolved ISO standard typo in check_messages 18 | 19 | ## v1.2.3 20 | - Updated schema files 21 | - Added Free And Open Data check 22 | - Added Horizontal Resolution Presence check 23 | - Added Data Format Presence check 24 | - Added Standard Product check 25 | - Added License URL Description check 26 | - Added Granule Campaign Name Presence check 27 | - Revised GCMD long name presence checks 28 | - Revised validate_beginning_datetime_against_granules check 29 | - Removed redundant checks 30 | - Fix auth issue when downloading metadata files 31 | 32 | ## v1.2.2 33 | 34 | - Bugfixes: 35 | - Stray newlines in GCMD keywords 36 | - Reading xml metadata for fields with attributes didn't get expected output 37 | - Missing field in rule_mapping 38 | 39 | 40 | ## v1.2.1 41 | 42 | - Added support for automated regression testing 43 | - Revised output messages 44 | 45 | ## v1.2.0 46 | 47 | - Added support for ECHO10 Granule, UMM-G (UMM-JSON Granule) and UMM-C (UMM-JSON Collection) metadata 48 | - Added support for custom CMR host 49 | - Added support for some UMM fields that look like the following: 50 | 51 | ```json 52 | "ContactMechanisms": [ 53 | { 54 | "Type": "Telephone", 55 | "Value": "605-594-6116" 56 | }, 57 | { 58 | "Type": "U.S. toll free", 59 | "Value": "866-573-3222" 60 | }, 61 | { 62 | "Type": "Email", 63 | "Value": "lpdaac@usgs.gov" 64 | } 65 | ] 66 | ``` 67 | 68 | To specify the "Email" field, in the `rule_mapping`, a user would put in `ContactMechanisms/Value?Type=Email` as the field. 69 | - All the field specified in a datetime check that involves comparison should have a corresponding `datetime_format_check` entry, otherwise the check won't run 70 | - Added support for `data` specific to format type. This will take precedence over the generic `data`. Example: 71 | 72 | ```json 73 | "get_data_url_check": { 74 | "rule_name": "GET DATA URL check", 75 | "fields_to_apply": { 76 | "dif10": [ 77 | { 78 | "fields": [ 79 | "DIF/Related_URL" 80 | ], 81 | "data": [ 82 | ["URL_Content_Type", "Type"] 83 | ] 84 | } 85 | ], 86 | "umm-json": [ 87 | { 88 | "fields": [ 89 | "RelatedUrls" 90 | ] 91 | } 92 | ] 93 | }, 94 | "data": [ 95 | ["Type"] 96 | ], 97 | "severity": "error", 98 | "check_id": "get_data_url_check" 99 | }, 100 | ``` 101 | 102 | - Prioritized field dependencies to check dependencies (dependencies from fields take precedence over dependencies from data) 103 | - Added collection `version` to collection datetime validation with granules for accuracy 104 | - Allowed DIF10 datetime fields to support ISO Date (not just ISO Datetime) 105 | - Generalized and renamed `datetime_compare` check to `date_compare` 106 | - Updated auto GCMD keywords downloader to use the new GCMD url 107 | - Addded `pyquarc_errors` to the response, which will contain any errors that were thrown as exceptions during validation 108 | - Added checks that validate granule fields against the corresponding collection fields 109 | 110 | 111 | ### List of added and updated checks 112 | 113 | - GET DATA URL Check 114 | - Data Center Long Name Check 115 | - URL Description Uniqueness Check 116 | - Periodic Duration Unit Check 117 | - Characteristic Name Uniqueness Check UMM 118 | - Range Date Time Logic Check 119 | - Range Date Time Logic Check 120 | - Project Date Time Logic Check 121 | - Project Date Time Logic Check 122 | - Periodic Date Time Logic Check 123 | - Datetime ISO Format Check 124 | - URL Health and Status Check 125 | - Delete Time Check 126 | - DOI Missing Reason Enumeration Check 127 | - Processing Level Description Length Check 128 | - UMM Controlled Collection State List 129 | - Ends at present flag logic check 130 | - Ends at present flag presence check 131 | - Data Contact Role Enumeration Check 132 | - Controlled Contact Role Check 133 | - Characteristic Description Length Check 134 | - Organization Longname GCMD Check 135 | - Instrument Short/Longname Consistency Check 136 | - Instrument Shortname GCMD Check 137 | - Instrument Long Name Check 138 | - Platform Shortname GCMD Check 139 | - Data Format GCMD Check 140 | - Platform Longname GCMD Check 141 | - Platform Type GCMD Check 142 | - Campaign Short/Long name consistency Check 143 | - Campaign Short Name GCMD Check 144 | - Campaign Long Name GCMD Check 145 | - Collection Data Type Enumeration Check 146 | - Bounding Coordinates Logic Check 147 | - Vertical Spatial Domain Type Check 148 | - Spatial Coverage Type Check 149 | - Campaign Name Presence Check 150 | - Spatial Extent Requirement Fulfillment Check 151 | - Collection Progress Related Fields Consistency Check 152 | - Online Resource Type GCMD Check 153 | - Characteristic Name Uniqueness Check 154 | - Ending Datetime validation against granules 155 | - Beginning Datetime validation against granules 156 | - ISO Topic Category Vocabulary Check 157 | - Temporal Extent Requirement Check 158 | - FTP Protocol Check 159 | - Citation Version Check 160 | - Default Date Check 161 | - Online Description Presence Check 162 | - IDN Node Shortname GCMD Check 163 | - Chrono Unit GCMD Check 164 | - Platform Type Presence Check 165 | - Horizontal Data Resolution Unit Controlled Vocabulary Check 166 | - Sensor number check 167 | - Data Center Shortname GCMD Check 168 | - Characteristics Data Type Presence Check 169 | - Platform Type Presence Check 170 | - Platform Longname Presence Check 171 | - Granule Platform Short Name Check 172 | - Horizontal Data Resolution Unit Controlled Vocabulary Check 173 | - Periodic Duration Unit Check 174 | - URL Description Uniqueness Check 175 | - Online Resource Description Uniqueness Check 176 | - Online Access Description Uniqueness Check 177 | - Metadata Update Time Logic Check 178 | - Granule Single Date Time Check 179 | - Granule Project Short Name Check 180 | - Granule Sensor Short Name Check 181 | - Validate Granule Data Format Against Collection Check 182 | - Granule Data Format Presence Check 183 | 184 | 185 | ## v1.1.5 186 | 187 | - Added reader for specific columns from GCMD csvs 188 | - Fixed bug to handle cases when there are multiple entries for same shortname but the first entry has missing long name 189 | 190 | ## v1.1.4 191 | 192 | - Added error handling for errored checks 193 | - Fixed minor bugs 194 | 195 | ## v1.1.3 196 | 197 | - Fixed null pointer exception in the check `collection_progress_consistency_check` 198 | 199 | ## v1.1.2 200 | 201 | - Removed stdout when importing pyQuARC package 202 | 203 | ## v1.1.1 204 | 205 | - Included addition of `version.txt` in the package build 206 | 207 | ## v1.1.0 208 | 209 | - Support for [DIF10](https://earthdata.nasa.gov/esdis/eso/standards-and-references/directory-interchange-format-dif-standard) collection level metadata 210 | - Added new checks and rules listed in the following section 211 | - Restructured the schema files for ease of new rule addition 212 | - Users will now be able to deal with just the `rule_mapping.json` file without having to mess with `checks.json` 213 | - Added documentation for all available checks, available at [CHECKS.md](./CHECKS.md) 214 | 215 | ### List of added checks 216 | 217 | - `opendap_url_location_check` 218 | - `user_services_check` 219 | - `doi_missing_reason_explanation` 220 | - `boolean_check` 221 | - `collection_progress_consistency_check` 222 | - `online_resource_type_gcmd_check` 223 | - `characteristic_name_uniqueness_check` 224 | - `validate_ending_datetime_against_granules` 225 | - `validate_beginning_datetime_against_granules` 226 | - `get_data_url_check` 227 | 228 | ### List of added rules 229 | 230 | - `altitude_unit_check` 231 | - `campaign_name_presence_check` 232 | - `spatial_coverage_type_presence_check` 233 | - `horizontal_datum_name_check` 234 | - `online_access_url_presence_check` 235 | - `online_resource_url_presence_check` 236 | - `online_access_url_description_check` 237 | - `online_resource_url_description_check` 238 | - `opendap_url_location_check` 239 | - `location_keyword_presence_check` 240 | - `spatial_extent_requirement_fulfillment_check` 241 | - `license_information_check` 242 | - `collection_citation_presence_check` 243 | - `user_services_check` 244 | - `doi_missing_reason_explanation` 245 | - `boolean_check` 246 | - `collection_progress_consistency_check` 247 | - `online_resource_type_gcmd_check` 248 | - `online_resource_type_presence_check` 249 | - `characteristic_name_uniqueness_check` 250 | - `validate_ending_datetime_against_granules` 251 | - `validate_beginning_datetime_against_granules` 252 | - `future_date_check` 253 | - `iso_topic_category_check` 254 | - `dif10_date_not_provided_check` 255 | - `temporal_extent_requirement_check` 256 | - `ftp_protocol_check` 257 | - `citation_version_check` 258 | - `default_date_check` 259 | - `url_desc_presence_check` 260 | - `get_data_url_check` 261 | 262 | ## v1.0.0 263 | 264 | - Support for **ECHO10** collection level metadata 265 | - Feature to use as a package 266 | - Description and Architecture Diagram in the README document 267 | -------------------------------------------------------------------------------- /CHECKS.md: -------------------------------------------------------------------------------- 1 | # Adding a new check 2 | 3 | **Files that you need to work with:** 4 | 5 | 1. `rules_override.json` 6 | 2. `check_messages_override.json` 7 | 8 | **Steps:** 9 | 10 | 1. Create a unique rule_id for your rule. Make it meaningful. Something like `doi_missing_reason_explanation` (no spaces). 11 | 2. Make an entry to the json files with the same rule_id. 12 | 3. In `rules_override.json`, add in the `fields_to_apply`, `data` and `check_id`. Add in the `echo10` and `dif10` fields corresponding to the rule. `check_id` should be one of the available `check_id`s (listed below). The format of `data` corresponding to each of these `check_id`s are given in the specifications below. 13 | 14 | ## Available checks 15 | 16 | ### Generic checks 17 | 18 | #### `date_compare` 19 | 20 | Compares the given two dates based on the relationship specified. 21 | Relationship supported: `lt`, `lte`, `gt`, `gte`, `eq` and `neq`. 22 | 23 | ##### Case I: Compare two different datetime field values 24 | 25 | Specify the two field in `fields` and the corresponding `relation`. 26 | 27 | ##### Case II: Compare a field with a specific date 28 | 29 | Specify the field in `fields`. Add the date and relation in `data` in the format `[{time}, {relation}]`. eg: `["now", "gte"]` 30 | Currently only `now` is supported as a date 31 | 32 | #### `datetime_format_check` 33 | 34 | Checks whether the datetime is in the ISO format. 35 | 36 | #### `url_check` 37 | 38 | Checks if the url is correct, resolves to a webpage and gives a 200 OK status. 39 | 40 | #### `string_compare` 41 | 42 | Compares the two strings based on the relationship specified. 43 | Relationship supported: `lt`, `lte`, `gt`, `gte`, `eq` and `neq`. 44 | 45 | ##### Case I: Compare two different string field values 46 | 47 | Specify the two field in `fields` and the corresponding `relation`. 48 | 49 | ##### Case II: Compare a field with a specific string 50 | 51 | Specify the field in `fields`. Add the date and relation in `data` in the format `[{string}, {relation}]`. eg: `["FTP", "eq"]` 52 | 53 | #### `length_check` 54 | 55 | Checks if the legth of the field value adheres to the relationship specified. 56 | `data format`: `[{length}, {relation}]`. eg: `[100, "gte"]`. 57 | 58 | #### `availability_check` 59 | 60 | Checks the rule: if the first field is provided, the second field has to be provided. 61 | `fields: (ordered) [ {first_field}, {second_field} ]` 62 | 63 | #### `boolean_check` 64 | 65 | Checks if the field value is boolean, either `true` or `false` or any case combination of those. 66 | 67 | #### `controlled_keywords_check` 68 | 69 | Checks if the field value is one of the controlled keywords; provide the controlled keywords as `data` in the format: 70 | `[["keyword1", "keyword2",...]]` 71 | 72 | #### `doi_validity_check` 73 | 74 | Checks if the doi provided resolves to a valid document. 75 | 76 | #### `one_item_presence_check` 77 | 78 | Checks if one of the given fields is populated. 79 | 80 | #### `uniqueness_check` 81 | 82 | Checks if the field values are unique. 83 | 84 | #### `count_check` 85 | 86 | Checks if the field value that is a count of fields matches the actual count of the fields. 87 | 88 | ### Miscellaneous Checks 89 | 90 | #### `bounding_coordinate_logic_check` 91 | 92 | Check that the North bounding coordinate is always larger than the South bounding coordinate and the East bounding coordinate is always larger than the West bounding coordinate. 93 | `fields`: the parent field 94 | (eg. `Collection/Spatial/HorizontalSpatialDomain/Geometry/BoundingRectangle`) 95 | 96 | #### `characteristic_name_uniqueness_check` 97 | 98 | If multiple Charasteristics are provided, checks to see whether each of those names are unique. 99 | 100 | #### `collection_progress_consistency_check` 101 | 102 | There are a few fields pertaining to the status of the collection as either active or complete whose values should align with one another. This check looks across these related fields to make sure the values are logically consistent. 103 | 104 | For `ACTIVE` collections: 105 | `CollectionProgress = ACTIVE` 106 | No `EndingDateTime` should be provided 107 | `EndsAtPresentFlag` = `true` 108 | 109 | For `COMPLETE` collections: 110 | `CollectionProgress = COMPLETE` 111 | An `EndingDateTime` should be provided 112 | `EndsAtPresentFlag` = `false` OR no `EndsAtPresentFlag` provided 113 | 114 | #### `doi_link_update` 115 | 116 | If `http://dx.doi.org` is provided, recommend updating it to `https://doi.org` 117 | 118 | #### `doi_missing_reason_explanation` 119 | 120 | If no DOI is provided, and the `DOI/MissingReason` field is populated, recommend adding an explanation for why there is no DOI. 121 | 122 | #### `ends_at_present_flag_logic_check` 123 | 124 | Checks the logic as follows: 125 | If `EndsAtPresentFlag` is populated: 126 | `true` -> check `EndingDateTime`, if there is no ending date time passes check; if there is an `EndingDateTime` display a warning 127 | `true` -> check `CollectionState/CollectionProgress`, if `CollectionState/CollectionProgress = ACTIVE` passes check; if `CollectionState/CollectionProgress = COMPLETE` display a warning 128 | `false` -> check `EndingDateTime`, if there is an ending date time passes check; if there is not an `EndingDateTime` display a warning 129 | `false` -> check `CollectionState/CollectionProgress`, if `CollectionState/CollectionProgress = COMPLETE` passes check; if `CollectionState/CollectionProgress = ACTIVE` display a warning 130 | 131 | #### `ends_at_present_flag_presence_check` 132 | 133 | If `EndsAtPresentFlag` is not populated: 134 | If no `EndingDateTime` is provided, print a warning that it might be necessary to add an `EndsAtPresentFlag` of `true` 135 | If an `EndingDateTime` is provided, passes check (no message needed) 136 | If `CollectionState` = `ACTIVE`, print a warning that it might be necessary to add an `EndsAtPresentFlag` of `true` 137 | If `CollectionState` = `COMPLETE`, passes check (no message needed) 138 | 139 | #### `get_data_url_check` 140 | 141 | Checks for `"GET DATA"` links (at least 1 should be provided). If no GET DATA links are provided this check will throw an error. 142 | 143 | #### `mime_type_check` 144 | 145 | Checks whether a `Mime Type` is provided for `'USE SERVICE API'` URLs (i.e. when the `URL Type` = `'USE SERVICE API'`). 146 | 147 | #### `opendap_url_location_check` 148 | 149 | Check to make sure that an OPeNDAP access URL is not provided in the `Online Access URL` field. 150 | 151 | #### `user_services_check` 152 | 153 | Check to make sure the fields aren't populated like this: 154 | 155 | ```plaintext 156 | Collection/Contacts/Contact/ContactPersons/ContactPerson/FirstName: "User" 157 | Collection/Contacts/Contact/ContactPersons/ContactPerson/MiddleName: "null" 158 | Collection/Contacts/Contact/ContactPersons/ContactPerson/LastName: "Services" 159 | ``` 160 | 161 | #### `validate_beginning_datetime_against_granules` 162 | 163 | Checks whether the beginning date time matches the beginning date time of the first granule in the collection (if granules exist.) 164 | 165 | #### `validate_ending_datetime_against_granules` 166 | 167 | Checks whether the ending date time matches the ending date time of the last granule in the collection (if granules exist.) 168 | 169 | ### GCMD Checks 170 | 171 | #### `science_keywords_gcmd_check` 172 | 173 | Check to determine if the provided science keyword matches a value from the GCMD controlled vocabulary list. 174 | 175 | #### `spatial_keyword_gcmd_check` 176 | 177 | Check to determine if the provided spatial keyword matches a value from the GCMD controlled vocabulary list. 178 | 179 | #### `platform_long_name_gcmd_check` 180 | 181 | Check to determine if the provided long name matches a value from the platform GCMD controlled vocabulary list. 182 | 183 | #### `platform_short_name_gcmd_check` 184 | 185 | Check to determine if the provided short name matches a value from the platform GCMD controlled vocabulary list. 186 | 187 | #### `platform_type_gcmd_check` 188 | 189 | Check to determine if the provided platform type matches a value from the GCMD controlled vocabulary list. 190 | 191 | #### `instrument_long_name_gcmd_check` 192 | 193 | Check to determine if the provided long name matches a value from the GCMD controlled vocabulary list. 194 | 195 | #### `instrument_short_name_gcmd_check` 196 | 197 | Check to determine if the provided short name matches a value from the GCMD controlled vocabulary list. 198 | 199 | #### `instrument_short_long_name_consistency_check` 200 | 201 | Checks if the provided instrument short name and long name are consistent across the GCMD keywords list. 202 | 203 | #### `campaign_long_name_gcmd_check` 204 | 205 | Checks whether the value adheres to GCMD, specifically the project list long name column. 206 | 207 | #### `campaign_short_name_gcmd_check` 208 | 209 | Checks whether the value adheres to GCMD, specifically the project list short name column. 210 | 211 | #### `campaign_short_long_name_consistency_check` 212 | 213 | Checks whether the campaign (project) short name and long name GCMD keywords are consistent: basically that they belong to the same row. 214 | 215 | #### `organization_short_name_gcmd_check` 216 | 217 | Checks whether the value adheres to GCMD, specifically the provider list short name column. 218 | 219 | #### `data_format_gcmd_check` 220 | 221 | Checks whether the value adheres to GCMD, specifically the data format short name/long name column. 222 | 223 | #### `online_resource_type_gcmd_check` 224 | 225 | Check that the `Online Resource Type` is included in the `rucontenttype` GCMD list under the `Type` or `Subtype` column. 226 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | title: "pyQuARC: Open Source Library for Earth Observation Metadata Quality Assessment" 3 | message: "If you use this software, please cite it as below" 4 | type: software 5 | authors: 6 | - given-names: Slesa 7 | family-names: Adhikari 8 | email: slesa.adhikari@uah.edu 9 | - given-names: Iksha 10 | family-names: Gurung 11 | email: iksha.gurung@uah.edu 12 | - given-names: Jenny 13 | family-names: Wood 14 | email: jenny.wood@uah.edu 15 | - given-names: Jeanné 16 | family-names: le Roux 17 | email: jeanne.leroux@uah.edu 18 | identifiers: 19 | - type: doi 20 | value: 10.5281/zenodo.10724717 21 | repository-code: 'https://github.com/NASA-IMPACT/pyQuARC/tree/v1.2.5' 22 | abstract: >- 23 | pyQuARC is designed to read and evaluate Earth observation metadata records hosted within the Common Metadata Repository (CMR), which is a centralized metadata repository for all of NASA's Earth observation data products. The CMR serves as the backend for NASA's Earthdata Search meaning that high-quality metadata helps connect users to the existing data in Earthdata Search. pyQuARC implements the Analysis and Review of CMR (ARC) team's metadata quality assessment framework to provide prioritized recommendations for metadata improvement and optimized search results. pyQuARC makes basic validation checks, pinpoints inconsistencies between dataset-level (i.e. collection) and file-level (i.e. granule) metadata, and identifies opportunities for more descriptive and robust information. It currently supports DIF10 (collection), ECHO10 (collection and granule), UMM-C, and UMM-G metadata standards. As open source software, pyQuARC can be adapted to add customized checks, implement future metadata standards, or support other metadata types. 24 | keywords: 25 | - Metadata 26 | - Python 27 | - Data Curation 28 | - Earth Observation 29 | - DAAC 30 | - Collection 31 | - Granule 32 | - GCMD 33 | - Quality Assessment 34 | - DIF10 35 | - ECHO10 36 | - UMM-C 37 | license: Apache-2.0 38 | version: 1.2.5 39 | date-released: '2021-08-19' 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2022 NASA-IMPACT 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NASA-IMPACT/pyQuARC/85ccd62b8b3714f2721fcc67d9a29199c9fbcbb4/images/architecture.png -------------------------------------------------------------------------------- /pyQuARC/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from pyQuARC.main import ARC 4 | 5 | from pyQuARC.code.constants import SUPPORTED_FORMATS as FORMATS 6 | 7 | from pyQuARC.code.checker import Checker 8 | from pyQuARC.code.downloader import Downloader 9 | 10 | from pyQuARC.code.base_validator import BaseValidator 11 | from pyQuARC.code.datetime_validator import DatetimeValidator 12 | from pyQuARC.code.schema_validator import SchemaValidator 13 | from pyQuARC.code.string_validator import StringValidator 14 | from pyQuARC.code.url_validator import UrlValidator 15 | 16 | ABS_PATH = os.path.abspath(os.path.dirname(__file__)) 17 | with open(f"{ABS_PATH}/version.txt") as version_file: 18 | __version__ = version_file.read().strip() 19 | 20 | 21 | def version(): 22 | """Returns the current version of pyQuARC.""" 23 | return __version__ 24 | -------------------------------------------------------------------------------- /pyQuARC/code/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NASA-IMPACT/pyQuARC/85ccd62b8b3714f2721fcc67d9a29199c9fbcbb4/pyQuARC/code/__init__.py -------------------------------------------------------------------------------- /pyQuARC/code/base_validator.py: -------------------------------------------------------------------------------- 1 | class BaseValidator: 2 | """ 3 | Base class for all the validators 4 | """ 5 | 6 | def __init__(self): 7 | pass 8 | 9 | @staticmethod 10 | def eq(first, second): 11 | return first == second 12 | 13 | @staticmethod 14 | def neq(first, second): 15 | return first != second 16 | 17 | @staticmethod 18 | def lt(first, second): 19 | return first < second 20 | 21 | @staticmethod 22 | def lte(first, second): 23 | return first <= second 24 | 25 | @staticmethod 26 | def gt(first, second): 27 | return first > second 28 | 29 | @staticmethod 30 | def gte(first, second): 31 | return first >= second 32 | 33 | @staticmethod 34 | def is_in(value, list_of_values): 35 | return value in list_of_values 36 | 37 | @staticmethod 38 | def contains(list_of_values, value): 39 | return value in list_of_values 40 | 41 | @staticmethod 42 | def compare(first, second, relation): 43 | if relation.startswith("not_"): 44 | return not (BaseValidator.compare(first, second, relation[4:])) 45 | func = getattr(BaseValidator, relation) 46 | return func(first, second) 47 | -------------------------------------------------------------------------------- /pyQuARC/code/constants.py: -------------------------------------------------------------------------------- 1 | import os 2 | from colorama import Fore, Style 3 | 4 | DIF = "dif10" 5 | ECHO10_C = "echo-c" 6 | UMM_C = "umm-c" 7 | UMM_G = "umm-g" 8 | ECHO10_G = "echo-g" 9 | 10 | SUPPORTED_FORMATS = [DIF, ECHO10_C, UMM_C, UMM_G, ECHO10_G] 11 | 12 | # Changed to os instead of pathlib 13 | # https://github.com/aio-libs/aiohttp/issues/3977 14 | 15 | ROOT_DIR = ( 16 | # go up one directory 17 | os.path.abspath(os.path.join(__file__, "../..")) 18 | ) 19 | 20 | SCHEMAS_BASE_PATH = f"{ROOT_DIR}/schemas" 21 | 22 | GCMD_KEYWORDS = [ 23 | "chronounits", 24 | "granuledataformat", 25 | "horizontalresolutionrange", 26 | "idnnode", 27 | "instruments", 28 | "locations", 29 | "MimeType", 30 | "platforms", 31 | "projects", 32 | "providers", 33 | "rucontenttype", 34 | "sciencekeywords", 35 | "temporalresolutionrange", 36 | "verticalresolutionrange", 37 | ] 38 | 39 | SCHEMAS = { 40 | "json": [ 41 | "checks", 42 | "check_messages", 43 | "check_messages_override", 44 | "checks_override", 45 | "rule_mapping", 46 | "rules_override", 47 | f"{UMM_C}-json-schema", 48 | "umm-cmn-json-schema", 49 | f"{UMM_G}-json-schema", 50 | ], 51 | "csv": GCMD_KEYWORDS, 52 | "xsd": [f"{DIF}_schema", f"{ECHO10_C}_schema", f"{ECHO10_G}_schema"], 53 | "xml": ["catalog"], 54 | } 55 | 56 | SCHEMA_PATHS = { 57 | schema: f"{SCHEMAS_BASE_PATH}/{schema}.{filetype}" 58 | for filetype, schemas in SCHEMAS.items() 59 | for schema in schemas 60 | } 61 | 62 | VERSION_FILE = f"{SCHEMAS_BASE_PATH}/version.txt" 63 | 64 | COLOR = { 65 | "title": Fore.GREEN, 66 | "info": Fore.BLUE, 67 | "error": Fore.RED, 68 | "warning": Fore.YELLOW, 69 | "reset": Style.RESET_ALL, 70 | "bright": Style.BRIGHT, 71 | } 72 | 73 | GCMD_BASIC_URL = "https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/" 74 | 75 | GCMD_LINKS = { 76 | keyword: f"{GCMD_BASIC_URL}{keyword}?format=csv" for keyword in GCMD_KEYWORDS 77 | } 78 | 79 | CMR_URL = "https://cmr.earthdata.nasa.gov" 80 | 81 | DATE_FORMATS = [ 82 | "%Y-%m-%dT%H:%M:%S.%f", # Year to microsecond 83 | "%Y-%m-%dT%H:%M:%S", # Year to second 84 | "%Y-%m-%dT%H:%M", # Year to minute 85 | "%Y-%m-%dT%H", # Year to hour 86 | "%Y-%m-%d", # Year to day 87 | "%Y-%m", # Year to month 88 | "%Y", # Year 89 | ] 90 | -------------------------------------------------------------------------------- /pyQuARC/code/custom_checker.py: -------------------------------------------------------------------------------- 1 | from urllib.parse import urlparse 2 | from concurrent.futures import ThreadPoolExecutor, as_completed 3 | 4 | 5 | class CustomChecker: 6 | """ 7 | Class to implement custom checks 8 | """ 9 | 10 | def __init__(self): 11 | pass 12 | 13 | @staticmethod 14 | def _get_path_value_recursively( 15 | subset_of_metadata_content, path_list, container, query_params=None 16 | ): 17 | """ 18 | Gets the path values recursively while handling list or dictionary in `subset_of_metadata_content` 19 | Adds the values to `container` 20 | 21 | Args: 22 | subset_of_metadata_content (dict or list or str): 23 | The value of the field at a certain point; 24 | changes during each level of recursion 25 | path_list (list): The path of the field as a list 26 | Example: 'Collection/RangeDateTime/StartDate' -> 27 | ['Collection', 'RangeDateTime', 'StartDate'] 28 | container (set): The container that holds all the path values 29 | query_params (list): The [key, value] pair to distinguish a field in umm-c 30 | eg: ["Type", "DELETE"] 31 | """ 32 | try: 33 | root_content = subset_of_metadata_content[path_list[0]] 34 | except KeyError: 35 | # this is needed because GCMD keywords check needs the placement 36 | # of the values in the returned list 37 | container.append(None) 38 | return 39 | except IndexError: 40 | container.append(subset_of_metadata_content) 41 | return 42 | new_path = path_list[1:] 43 | if ( 44 | isinstance(root_content, str) 45 | or isinstance(root_content, int) 46 | or isinstance(root_content, float) 47 | ): 48 | container.append(root_content) 49 | return 50 | elif isinstance(root_content, list): 51 | if not new_path: 52 | container.append(root_content) 53 | return 54 | if len(new_path) == 1 and query_params: 55 | try: 56 | root_content = next( 57 | ( 58 | x 59 | for x in root_content 60 | if x[query_params[0]] == query_params[1] 61 | ) 62 | ) 63 | root_content = root_content[new_path[0]] 64 | container.append(root_content) 65 | except: 66 | container.append(None) 67 | return 68 | for each in root_content: 69 | try: 70 | CustomChecker._get_path_value_recursively( 71 | each, new_path, container, query_params 72 | ) 73 | except KeyError: 74 | container.append(None) 75 | continue 76 | elif isinstance(root_content, dict): 77 | CustomChecker._get_path_value_recursively( 78 | root_content, new_path, container, query_params 79 | ) 80 | 81 | @staticmethod 82 | def _get_path_value(content_to_validate, path_string): 83 | """ 84 | Gets the value of the field from the metadata (input_json) 85 | 86 | Args: 87 | path_string (str): The path of the field. Example: 'Collection/RangeDateTime/StartDate' 88 | 89 | Returns: 90 | (bool, set) If the path exists, (True, set of values of the path); 91 | else (False, empty set) 92 | """ 93 | 94 | container = list() 95 | query_params = None 96 | 97 | parsed = urlparse(path_string) 98 | path = parsed.path.split("/") 99 | if key_value := parsed.query: 100 | query_params = key_value.split("=") 101 | 102 | CustomChecker._get_path_value_recursively( 103 | content_to_validate, path, container, query_params 104 | ) 105 | return container 106 | 107 | @staticmethod 108 | def _process_argument(arg, func, relation, external_data, external_relation): 109 | """ 110 | Process the argument by calling the provided function with the given arguments. 111 | 112 | Args: 113 | arg: The argument to be processed. 114 | func: The function to be called. 115 | relation: The relation argument. 116 | external_data: The external data argument. 117 | external_relation: The external relation argument. 118 | 119 | Returns: 120 | A dict containing the updated invalid_values list and the updated validity flag. 121 | """ 122 | 123 | function_args = [*arg] 124 | function_args.extend( 125 | [ 126 | extra_arg 127 | for extra_arg in [relation, *external_data, external_relation] 128 | if extra_arg 129 | ] 130 | ) 131 | func_return = func(*function_args) 132 | return func_return 133 | 134 | def run( 135 | self, func, content_to_validate, field_dict, external_data, external_relation 136 | ): 137 | """ 138 | Runs the custom check based on `func` to the `content_to_validate`'s `field_dict` path 139 | 140 | Args: 141 | content_to_validate (dict): The metadata content 142 | field_dict (dict): The field dictionary of the form: 143 | { 144 | "fields": relavant fields, 145 | "relation": relation between the fields 146 | } 147 | func (function): The function reference to the check 148 | external_data (list): External data required by the check if any 149 | 150 | Returns: 151 | (dict): The result of the check in the form: 152 | { 153 | "valid": "Validity status (bool)", 154 | "value": "The instance value/s" 155 | } 156 | """ 157 | fields = field_dict["fields"] 158 | field_values = [] 159 | relation = field_dict.get("relation") 160 | result = {"valid": None} 161 | for _field in fields: 162 | value = CustomChecker._get_path_value(content_to_validate, _field) 163 | field_values.append(value) 164 | args = zip(*field_values) 165 | 166 | invalid_values = [] 167 | validity = None 168 | 169 | # Process arguments using multithreading 170 | with ThreadPoolExecutor() as executor: 171 | future_results = [] 172 | for arg in args: 173 | future = executor.submit( 174 | self._process_argument, 175 | arg, 176 | func, 177 | relation, 178 | external_data, 179 | external_relation, 180 | ) 181 | future_results.append(future) 182 | 183 | # Retrieve results from futures 184 | for future in as_completed(future_results): 185 | try: 186 | func_return = future.result() 187 | valid = func_return["valid"] # can be True, False or None 188 | if valid is not None: 189 | if valid: 190 | validity = validity or (validity is None) 191 | else: 192 | if "value" in func_return: 193 | invalid_values.append(func_return["value"]) 194 | validity = False 195 | except Exception as e: 196 | raise e 197 | result["valid"] = validity 198 | result["value"] = invalid_values 199 | return result 200 | -------------------------------------------------------------------------------- /pyQuARC/code/custom_validator.py: -------------------------------------------------------------------------------- 1 | from .base_validator import BaseValidator 2 | from .string_validator import StringValidator 3 | 4 | from .utils import cmr_request, if_arg, set_cmr_prms 5 | 6 | 7 | class CustomValidator(BaseValidator): 8 | def __init__(self): 9 | super().__init__() 10 | 11 | @staticmethod 12 | def ends_at_present_flag_logic_check( 13 | ends_at_present_flag, ending_date_time, collection_state 14 | ): 15 | collection_state = collection_state.upper() 16 | valid = ( 17 | (bool(ends_at_present_flag) 18 | and ends_at_present_flag not in ("False", "false")) 19 | and not (ending_date_time) and collection_state in ("ACTIVE", "IN WORK") 20 | ) or ( 21 | (bool(ends_at_present_flag) == False 22 | or ends_at_present_flag in ("False", "false")) 23 | and bool(ending_date_time) and collection_state == "COMPLETE" 24 | ) 25 | 26 | return {"valid": valid, "value": ends_at_present_flag} 27 | 28 | @staticmethod 29 | def ends_at_present_flag_presence_check( 30 | ends_at_present_flag, ending_date_time, collection_state 31 | ): 32 | valid = True 33 | if ends_at_present_flag == None: 34 | valid = bool(ending_date_time) and collection_state == "COMPLETE" 35 | 36 | return {"valid": valid, "value": ends_at_present_flag} 37 | 38 | @staticmethod 39 | def mime_type_check(mime_type, url_type, controlled_list): 40 | """ 41 | Checks that if the value for url_type is "USE SERVICE API", 42 | the mime_type should be one of the values from a controlled list 43 | For all other cases, the check should be valid 44 | """ 45 | result = {"valid": True, "value": mime_type} 46 | if url_type: 47 | if "USE SERVICE API" in url_type: 48 | if mime_type: 49 | result = StringValidator.controlled_keywords_check( 50 | mime_type, controlled_list 51 | ) 52 | else: 53 | result["valid"] = False 54 | return result 55 | 56 | @staticmethod 57 | def availability_check(field_value, parent_value): 58 | # If the parent is available, the child should be available too, else it is invalid 59 | return { 60 | "valid": bool(field_value) if parent_value else True, 61 | "value": parent_value, 62 | } 63 | 64 | @staticmethod 65 | @if_arg 66 | def bounding_coordinate_logic_check(west, north, east, south): 67 | # Checks if the logic for coordinate values make sense 68 | result = {"valid": False, "value": [west, north, east, south]} 69 | west = float(west) 70 | east = float(east) 71 | south = float(south) 72 | north = float(north) 73 | 74 | result["valid"] = ( 75 | (south >= -90 and south <= 90) 76 | and (north >= -90 and north <= 90) 77 | and (east >= -180 and east <= 180) 78 | and (west >= -180 and west <= 180) 79 | and (north > south) 80 | and (east > west) 81 | ) 82 | return result 83 | 84 | @staticmethod 85 | def one_item_presence_check(*field_values): 86 | """ 87 | Checks if one of the specified fields is populated 88 | At least one of the `field_values` should not be null 89 | It is basically a OneOf check 90 | """ 91 | validity = False 92 | value = None 93 | 94 | for field_value in field_values: 95 | if field_value is not None: 96 | value = field_value 97 | validity = True 98 | break 99 | 100 | return {"valid": validity, "value": value} 101 | 102 | @staticmethod 103 | def dif_standard_product_check(*field_values): 104 | """ 105 | Checks if the Extended_Metadata field in the DIF schema is being 106 | utilized to specify whether or not the collection is a Standard Product. 107 | This check is needed because DIF schema does not have a dedicated field 108 | for Standard Product, and the Extended_Metadata field is also utilized 109 | for other things. 110 | """ 111 | validity = False 112 | value = None 113 | 114 | for field_value in field_values: 115 | if field_value: 116 | if 'StandardProduct' in field_value: 117 | value = field_value 118 | validity = True 119 | break 120 | return {"valid": validity, "value": value} 121 | 122 | @staticmethod 123 | def license_url_description_check(description_field, url_field, license_text): 124 | """ 125 | Determines if a description has been provided for the License URL if a 126 | License URL has been provided in the metadata. 127 | 128 | Args: 129 | url_field (string): license URL string 130 | description_field (string): string describing the URL 131 | """ 132 | validity = True 133 | value = description_field 134 | 135 | if not license_text and not url_field: 136 | validity = False 137 | return {"valid": validity, "value": value} 138 | elif license_text and not url_field: 139 | return {"valid": validity, "value": value} 140 | else: 141 | if not description_field: 142 | validity = False 143 | return {"valid": validity, "value": value} 144 | 145 | @staticmethod 146 | def granule_sensor_presence_check( 147 | sensor_values, collection_shortname=None, version=None, dataset_id=None 148 | ): 149 | """ 150 | Checks if sensor is provided at the granule level if provided at 151 | collection level 152 | """ 153 | if dataset_id: 154 | params = {"DatasetId": dataset_id} 155 | else: 156 | params = { 157 | "collection_shortname": collection_shortname, 158 | "version": version, 159 | } 160 | prms = set_cmr_prms(params, format="umm_json") 161 | collections = cmr_request(prms) 162 | if collections := collections.get("items"): 163 | collection = collections[0] 164 | for platform in collection["umm"].get("Platforms", []): 165 | instruments = platform.get("Instruments", []) 166 | for instrument in instruments: 167 | if "ComposedOf" in instrument.keys(): 168 | return CustomValidator.presence_check(sensor_values) 169 | 170 | return { 171 | "valid": True, 172 | "value": sensor_values, 173 | } 174 | 175 | @staticmethod 176 | @if_arg 177 | def user_services_check(first_name, middle_name, last_name): 178 | return { 179 | "valid": ( 180 | first_name.lower() != "user" 181 | or last_name.lower() != "services" 182 | or (middle_name and (middle_name.lower() != "null")) 183 | ), 184 | "value": f"{first_name} {middle_name} {last_name}", 185 | } 186 | 187 | @staticmethod 188 | def doi_missing_reason_explanation(explanation, missing_reason, doi): 189 | validity = bool(doi or ((not doi) and missing_reason and explanation)) 190 | return {"valid": validity, "value": explanation} 191 | 192 | @staticmethod 193 | @if_arg 194 | def boolean_check(field_value): 195 | # Checks if the value is a boolean, basically 'true' or 'false' or their case variants 196 | return {"valid": field_value.lower() in ["true", "false"], "value": field_value} 197 | 198 | @staticmethod 199 | @if_arg 200 | def collection_progress_consistency_check( 201 | collection_state, ends_at_present_flag, ending_date_time 202 | ): 203 | # Logic: https://github.com/NASA-IMPACT/pyQuARC/issues/61 204 | validity = False 205 | collection_state = collection_state.upper() 206 | ends_at_present_flag = ( 207 | str(ends_at_present_flag).lower() if ends_at_present_flag else None 208 | ) 209 | 210 | if collection_state in ["ACTIVE", "IN WORK"]: 211 | validity = (not ending_date_time) and (ends_at_present_flag == "true") 212 | elif collection_state == "COMPLETE": 213 | validity = ending_date_time and ( 214 | not ends_at_present_flag or (ends_at_present_flag == "false") 215 | ) 216 | 217 | return {"valid": validity, "value": collection_state} 218 | 219 | @staticmethod 220 | @if_arg 221 | def uniqueness_check(list_of_objects, key): 222 | seen, duplicates = set(), set() 223 | if isinstance(list_of_objects, list): 224 | for url_obj in list_of_objects: 225 | if (description := url_obj.get(key)) in seen: 226 | duplicates.add(description) 227 | else: 228 | seen.add(description) 229 | return {"valid": not bool(duplicates), "value": ", ".join(duplicates)} 230 | 231 | @staticmethod 232 | def get_data_url_check(related_urls, key): 233 | """Checks if the related_urls contains a "GET DATA" url 234 | 235 | Args: 236 | related_urls (dict): The related_urls field of the object 237 | Example: [ 238 | { 239 | "Description": "The LP DAAC product page provides information on Science Data Set layers and links for user guides, ATBDs, data access, tools, customer support, etc.", 240 | "URLContentType": "CollectionURL", 241 | "Type": "DATA SET LANDING PAGE", 242 | "URL": "https://doi.org/10.5067/MODIS/MOD13Q1.061" 243 | }, ... 244 | ] or 245 | [ 246 | { 247 | "Description": "The LP DAAC product page provides information on Science Data Set layers and links for user guides, ATBDs, data access, tools, customer support, etc.", 248 | "URL_Content_Type": { 249 | "Type": "GET DATA", 250 | "Subtype>: "LAADS" 251 | }, 252 | "URL": "https://doi.org/10.5067/MODIS/MOD13Q1.061", 253 | ... 254 | }, ... 255 | ] 256 | key (list): The hierarchical list of keys 257 | Example: ["Type"] 258 | or 259 | ["URL_Content_Type", "Type"] 260 | """ 261 | return_obj = {"valid": False, "value": "N/A"} 262 | for url_obj in related_urls: 263 | type = url_obj.get(key[0]) 264 | if len(key) == 2: 265 | type = (type or {}).get(key[1]) 266 | if (validity := type == "GET DATA") and (url := url_obj.get("URL")): 267 | return_obj["valid"] = validity 268 | return_obj["value"] = url 269 | break 270 | return return_obj 271 | 272 | @staticmethod 273 | @if_arg 274 | def count_check(count, values, key): 275 | items = values.get(key, []) 276 | if not isinstance(items, list): 277 | items = [items] 278 | num_items = len(items) 279 | return {"valid": int(count) == num_items, "value": (count, num_items)} 280 | -------------------------------------------------------------------------------- /pyQuARC/code/datetime_validator.py: -------------------------------------------------------------------------------- 1 | import pytz 2 | import re 3 | 4 | from datetime import datetime 5 | 6 | from .base_validator import BaseValidator 7 | from .utils import cmr_request, if_arg, set_cmr_prms, get_date_time 8 | 9 | 10 | class DatetimeValidator(BaseValidator): 11 | """ 12 | Validator class for datetime datatype 13 | """ 14 | 15 | def __init__(self): 16 | super().__init__() 17 | 18 | @staticmethod 19 | def _iso_datetime(datetime_string): 20 | """ 21 | Converts the input datetime string to iso datetime object 22 | 23 | Args: 24 | datetime_string (str): the datetime string 25 | 26 | Returns: 27 | (datetime.datetime) If the string is valid iso string, False otherwise 28 | """ 29 | REGEX = r"^(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])T(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\.[0-9]+)?(Z|[+-](?:2[0-3]|[01][0-9]):[0-5][0-9])?$" 30 | match_iso8601 = re.compile(REGEX).match 31 | try: 32 | if match_iso8601(datetime_string): 33 | if datetime_string.endswith("Z"): 34 | datetime_string = datetime_string.replace("Z", "+00:00") 35 | value = datetime.fromisoformat(datetime_string) 36 | return value 37 | except: 38 | pass 39 | return False 40 | 41 | @staticmethod 42 | def _iso_date(date_string): 43 | """ 44 | Converts the input date string to iso datetime object 45 | 46 | Args: 47 | date_string (str): the datestring 48 | 49 | Returns: 50 | (datetime.datetime) If the string is valid iso string, False otherwise 51 | """ 52 | 53 | try: 54 | value = datetime.strptime(date_string, "%Y-%m-%d") 55 | return value 56 | except: 57 | return False 58 | 59 | @staticmethod 60 | @if_arg 61 | def iso_format_check(datetime_string): 62 | """ 63 | Performs the Date/DateTime ISO Format Check - checks if the datetime 64 | is valid ISO formatted datetime string 65 | 66 | Args: 67 | datetime_string (str): The datetime string 68 | 69 | Returns: 70 | (dict) An object with the validity of the check and the instance 71 | """ 72 | return { 73 | "valid": bool(DatetimeValidator._iso_datetime(datetime_string)), 74 | "value": datetime_string, 75 | } 76 | 77 | @staticmethod 78 | @if_arg 79 | def date_or_datetime_format_check(datetime_string): 80 | """ 81 | Performs the Date/DateTime Format Check 82 | Checks if the datetime_string is a valid ISO date or ISO datetime string 83 | 84 | Args: 85 | datetime_string (str): The date or datetime string 86 | 87 | Returns: 88 | (dict) An object with the validity of the check and the instance 89 | """ 90 | return { 91 | "valid": bool(DatetimeValidator._iso_datetime(datetime_string)) 92 | or bool(DatetimeValidator._iso_date(datetime_string)), 93 | "value": datetime_string, 94 | } 95 | 96 | @staticmethod 97 | @if_arg 98 | def compare(first, second, relation): 99 | """ 100 | Compares two datetime values based on the argument relation 101 | Returns: 102 | (dict) An object with the validity of the check and the instance 103 | """ 104 | first = ( 105 | DatetimeValidator._iso_datetime(first) or DatetimeValidator._iso_date(first) 106 | ).replace(tzinfo=pytz.utc) 107 | second = DatetimeValidator._iso_datetime(second) or DatetimeValidator._iso_date( 108 | second 109 | ) 110 | if not (second): 111 | second = datetime.now() 112 | second = second.replace( 113 | tzinfo=pytz.UTC 114 | ) # Making it UTC for comparison with other UTC times 115 | result = BaseValidator.compare(first, second, relation) 116 | return {"valid": result, "value": (str(first), str(second))} 117 | 118 | @staticmethod 119 | def validate_datetime_against_granules( 120 | datetime_string, collection_shortname, version, sort_key, time_key 121 | ): 122 | """ 123 | Validates the collection datetime against the datetime of the last granule in the collection 124 | 125 | Args: 126 | datetime_string (str): datetime string 127 | collection_shortname (str): ShortName of the parent collection 128 | sort_key (str): choice of start_date and end_date 129 | time_key (str): choice of time_end and time_start 130 | Returns: 131 | (dict) An object with the validity of the check and the instance 132 | """ 133 | cmr_prms = set_cmr_prms( 134 | { 135 | "short_name": collection_shortname, 136 | "version": version, 137 | "sort_key[]": sort_key, 138 | }, 139 | "json", 140 | "granules", 141 | ) 142 | granules = cmr_request(cmr_prms) 143 | 144 | validity = True 145 | last_granule_datetime = None 146 | date_time = None 147 | 148 | # Compare the precision of the two datetime strings 149 | if len(granules["feed"]["entry"]) > 0: 150 | last_granule = granules["feed"]["entry"][0] 151 | last_granule_datetime = last_granule.get(time_key) 152 | date_time = get_date_time(datetime_string) 153 | last_granule_datetime = get_date_time(last_granule_datetime) 154 | validity = date_time == last_granule_datetime 155 | 156 | return {"valid": validity, "value": (date_time, last_granule_datetime)} 157 | 158 | @staticmethod 159 | @if_arg 160 | def validate_ending_datetime_against_granules( 161 | ending_datetime, collection_shortname, version 162 | ): 163 | """ 164 | Validates the collection EndingDatetime against the datetime of the last granule in the collection 165 | 166 | Args: 167 | ending_datetime (str): EndingDatetime string 168 | collection_shortname (str): ShortName of the parent collection 169 | 170 | Returns: 171 | (dict) An object with the validity of the check and the instance 172 | """ 173 | return DatetimeValidator.validate_datetime_against_granules( 174 | ending_datetime, collection_shortname, version, "-end_date", "time_end" 175 | ) 176 | 177 | @staticmethod 178 | @if_arg 179 | def validate_beginning_datetime_against_granules( 180 | beginning_datetime, collection_shortname, version 181 | ): 182 | """ 183 | Validates the collection BeginningDateTime against the datetime of the last granule in the collection 184 | 185 | Args: 186 | beginning_datetime (str): BeginningDateTime string 187 | collection_shortname (str): ShortName of the parent collection 188 | 189 | Returns: 190 | (dict) An object with the validity of the check and the instance 191 | """ 192 | return DatetimeValidator.validate_datetime_against_granules( 193 | beginning_datetime, 194 | collection_shortname, 195 | version, 196 | "start_date", 197 | "time_start", 198 | ) 199 | -------------------------------------------------------------------------------- /pyQuARC/code/downloader.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | import requests 4 | 5 | from urllib.parse import urlparse 6 | 7 | from .utils import get_cmr_url, get_headers 8 | 9 | 10 | class Downloader: 11 | """ 12 | Downloads data given a concept ID 13 | """ 14 | 15 | BASE_URL = "{cmr_host}/search/concepts/" 16 | 17 | COLLECTION = "collection" 18 | GRANULE = "granule" 19 | INVALID = "invalid" 20 | 21 | FORMAT_MAP = { 22 | "echo-c": "echo10", 23 | "echo-g": "echo10", 24 | "umm-c": "umm-json", 25 | "umm-g": "umm-json", 26 | "dif10": "dif10", 27 | } 28 | 29 | def __init__( 30 | self, concept_id, metadata_format, version=None, cmr_host=get_cmr_url() 31 | ): 32 | """ 33 | Args: 34 | concept_id (str): The concept id of the metadata to download 35 | metadata_format (str): The file format of the metadata to download 36 | version (str): The version of the metadata to download 37 | """ 38 | self.concept_id = concept_id 39 | self.version = version 40 | self.metadata_format = metadata_format 41 | self.errors = [] 42 | 43 | # big XML string or dict is stored here 44 | self.downloaded_content = None 45 | 46 | parsed_url = urlparse(cmr_host) 47 | self.cmr_host = f"{parsed_url.scheme}://{parsed_url.netloc}" 48 | 49 | def _valid_concept_id(self): 50 | """ 51 | Check whether passed concept id is valid 52 | 53 | Returns: 54 | (bool) True if the concept id is valid, False otherwise 55 | """ 56 | 57 | return Downloader._concept_id_type(self.concept_id) != Downloader.INVALID 58 | 59 | def _construct_url(self): 60 | """ 61 | Constructs CMR API URL based on the concept ID. 62 | It assumes that the concept_id is already valid. 63 | 64 | Returns: 65 | (str) The URL constructed based on the concept ID 66 | """ 67 | extension = Downloader.FORMAT_MAP.get(self.metadata_format, "echo10") 68 | 69 | base_url = Downloader.BASE_URL.format(cmr_host=self.cmr_host) 70 | version = f"/{self.version}" if self.version else "" 71 | constructed_url = f"{base_url}{self.concept_id}{version}.{extension}" 72 | return constructed_url 73 | 74 | def log_error(self, error_message_code, kwargs): 75 | """ 76 | Logs errors in self.errors 77 | 78 | Args: 79 | error_message_code (str): The key to the ERROR_MESSAGES dict 80 | kwargs (dict): Any keyword arguments required for the error string 81 | """ 82 | 83 | self.errors.append({"type": error_message_code, "details": kwargs}) 84 | 85 | def download(self): 86 | """ 87 | Downloads metadata by calling the CMR API 88 | 89 | Returns: 90 | (str) The JSON string if download is successful, None otherwise 91 | """ 92 | 93 | # is the concept id valid? if not, log error 94 | if not self._valid_concept_id(): 95 | self.log_error("invalid_concept_id", {"concept_id": self.concept_id}) 96 | return 97 | 98 | # constructs url based on concept id 99 | url = self._construct_url() 100 | headers = get_headers() 101 | response = requests.get(url, headers=headers) 102 | 103 | # if the authorization token is invalid, even public metadata that doesn't require the token is inaccessible 104 | # this works around that 105 | if response.status_code == 401: # if token invalid, try without token 106 | response = requests.get(url) 107 | 108 | # gets the response, makes sure it's 200, puts it in an object variable 109 | if response.status_code != 200: 110 | message = "Something went wrong while downloading the requested metadata. Make sure all the inputs are correct." 111 | try: 112 | details = json.loads(response.text).get("errors") 113 | except (json.decoder.JSONDecodeError, KeyError): 114 | details = "N/A" 115 | self.log_error( 116 | "request_failed", 117 | { 118 | "concept_id": self.concept_id, 119 | "url": url, 120 | "status_code": response.status_code, 121 | "message": message, 122 | "details": details, 123 | }, 124 | ) 125 | return 126 | 127 | # stores the data in the downloaded_content variable 128 | self.downloaded_content = response.text 129 | return self.downloaded_content 130 | 131 | @staticmethod 132 | def _concept_id_type(concept_id: str) -> str: 133 | """ 134 | Concept ID can be for a collection or granule. This function determines which one it is. 135 | 136 | Returns: 137 | (str) "collection" when the concept_id is a collection 138 | "granule" when the concept_id is a granule 139 | "invalid" when the concept_id is neither collection nor granule, or invalid concept id 140 | """ 141 | 142 | concept_id_pattern: str = r"C\d+-(([a-zA-Z]+(_[a-zA-Z]+)?$))+" 143 | granule_id_pattern: str = r"G\d+-(([a-zA-Z]+(_[a-zA-Z]+)?$))+" 144 | 145 | concept_id_type: str = Downloader.INVALID 146 | 147 | if re.match(concept_id_pattern, concept_id): 148 | concept_id_type = Downloader.COLLECTION 149 | elif re.match(granule_id_pattern, concept_id): 150 | concept_id_type = Downloader.GRANULE 151 | 152 | return concept_id_type 153 | -------------------------------------------------------------------------------- /pyQuARC/code/scheduler.py: -------------------------------------------------------------------------------- 1 | class Scheduler: 2 | """ 3 | Schedules the rules based on the applicable ordering 4 | """ 5 | 6 | def __init__( 7 | self, rule_mapping, rules_override, checks, checks_override, metadata_format 8 | ): 9 | self.check_list = {**checks, **checks_override} 10 | self.rule_mapping = {**rule_mapping, **rules_override} 11 | self.metadata_format = metadata_format 12 | 13 | @staticmethod 14 | def append_if_not_exist(value, list_of_values): 15 | """ 16 | Appends `value` if it doesn't exist in `list_of_values` 17 | `list_of_values` is an ordered list 18 | """ 19 | if value not in list_of_values: 20 | list_of_values.append(value) 21 | 22 | def get_all_dependencies(self, rule, check, field_dict=None): 23 | """ 24 | Gets all the dependencies for a rule 25 | 26 | If field_dict is provided, get the dependencies only for that field 27 | """ 28 | dependencies = [] 29 | dependencies_from_fields = [] 30 | 31 | if field_dict: 32 | if dependencies_from_fields := field_dict.get("dependencies"): 33 | return dependencies_from_fields 34 | else: 35 | return check.get("dependencies", []) 36 | 37 | if field_objects := rule.get("fields_to_apply").get(self.metadata_format): 38 | for field_object in field_objects: 39 | if field_dependencies := field_object.get("dependencies"): 40 | dependencies_from_fields.extend(field_dependencies) 41 | 42 | dependencies.extend(dependencies_from_fields) 43 | 44 | dependencies_from_checks = check.get("dependencies", []) 45 | dependencies.extend(dependencies_from_checks) 46 | return dependencies 47 | 48 | def dependencies_ordering(self, dependencies, list): 49 | """ 50 | Creates a dependency ordering; basically independent checks are added first 51 | """ 52 | for dependency in dependencies: 53 | dependency_check = self.check_list.get(dependency[0]) 54 | if dependency_check.get("dependencies"): 55 | self.dependencies_ordering(dependency_check.get("dependencies"), list) 56 | Scheduler.append_if_not_exist(dependency[0], list) 57 | 58 | def _find_rule_ids_based_on_check_id(self, check_id): 59 | """ 60 | Returns all the rule_ids that are based on a check_id 61 | 62 | Args: 63 | check_id (str): The check id to find the rules based on 64 | 65 | Returns: 66 | list: list of all the rule_ids that are based on the check_id 67 | """ 68 | return [ 69 | rule_id 70 | for rule_id, rule in self.rule_mapping.items() 71 | if (rule.get("check_id") == check_id) or (rule_id == check_id) 72 | ] 73 | 74 | def order_rules(self): 75 | """ 76 | Creates a rule ordering based on the dependencies 77 | 78 | Returns: 79 | (list): ordered list of rules 80 | """ 81 | ordered_rules = [] 82 | ordered_check_list = [] 83 | 84 | for rule_id, rule in self.rule_mapping.items(): 85 | check_id = rule.get("check_id") or rule_id 86 | if check := self.check_list.get(check_id): 87 | dependencies = self.get_all_dependencies(rule, check) 88 | # First add dependencies and their dependencies and so on 89 | self.dependencies_ordering(dependencies, ordered_check_list) 90 | # Then add self 91 | Scheduler.append_if_not_exist(check_id, ordered_check_list) 92 | else: 93 | print(f"Missing entry for {check_id} in `checks.json`") 94 | 95 | for dependency in ordered_check_list: 96 | ordered_rules.extend(self._find_rule_ids_based_on_check_id(dependency)) 97 | 98 | return ordered_rules 99 | -------------------------------------------------------------------------------- /pyQuARC/code/schema_validator.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | 5 | from io import BytesIO 6 | from jsonschema import Draft7Validator, draft7_format_checker, RefResolver 7 | from lxml import etree 8 | from urllib.request import pathname2url 9 | 10 | from .constants import ECHO10_C, SCHEMA_PATHS, UMM_C 11 | 12 | 13 | class SchemaValidator: 14 | """ 15 | Validates downloaded metadata for its schema and returns the result. 16 | """ 17 | 18 | PATH_SEPARATOR = "/" 19 | 20 | def __init__( 21 | self, 22 | check_messages, 23 | metadata_format=ECHO10_C, 24 | ): 25 | """ 26 | Args: 27 | metadata_format (str): The format of the metadata that needs 28 | to be validated. Can be any of { DIF, ECHO10_C, UMM_C, UMM_G }. 29 | validation_paths (list of str): The path of the fields in the 30 | metadata that need to be validated. In the form 31 | ['Collection/StartDate', ...]. 32 | """ 33 | self.metadata_format = metadata_format 34 | if metadata_format.startswith("umm-"): 35 | self.validator_func = self.run_json_validator 36 | else: 37 | self.validator_func = self.run_xml_validator 38 | self.check_messages = check_messages 39 | 40 | def read_xml_schema(self): 41 | """ 42 | Reads the xml schema file 43 | """ 44 | # The XML schema file (echo10_xml.xsd) imports another schema file (MetadataCommon.xsd) 45 | # Python cannot figure out the import if they are in a different location than the calling script 46 | # Thus we need to set an environment variable to let it know where the files are located 47 | # Path to catalog must be a url 48 | catalog_path = f"file:{pathname2url(str(SCHEMA_PATHS['catalog']))}" 49 | # Temporarily set the environment variable 50 | os.environ["XML_CATALOG_FILES"] = os.environ.get( 51 | "XML_CATALOG_FILES", catalog_path 52 | ) 53 | 54 | with open(SCHEMA_PATHS[f"{self.metadata_format}_schema"]) as schema_file: 55 | file_content = schema_file.read().encode() 56 | xmlschema_doc = etree.parse(BytesIO(file_content)) 57 | schema = etree.XMLSchema(xmlschema_doc) 58 | return schema 59 | 60 | def read_json_schema(self): 61 | """ 62 | Reads the json schema file 63 | """ 64 | with open(SCHEMA_PATHS[f"{self.metadata_format}-json-schema"]) as schema_file: 65 | schema = json.load(schema_file) 66 | return schema 67 | 68 | def run_json_validator(self, content_to_validate): 69 | """ 70 | Validate passed content based on the schema and return any errors 71 | Args: 72 | content_to_validate (str): The metadata content as a json string 73 | Returns: 74 | (dict) A dictionary that gives the validity of the schema and errors if they exist 75 | """ 76 | schema = self.read_json_schema() 77 | schema_store = {} 78 | 79 | if self.metadata_format == UMM_C: 80 | with open(SCHEMA_PATHS["umm-cmn-json-schema"]) as schema_file: 81 | schema_base = json.load(schema_file) 82 | 83 | # workaround to read local referenced schema file (only supports uri) 84 | schema_store = { 85 | schema_base.get("$id", "/umm-cmn-json-schema.json"): schema_base, 86 | schema_base.get("$id", "umm-cmn-json-schema.json"): schema_base, 87 | } 88 | 89 | errors = {} 90 | 91 | resolver = RefResolver.from_schema(schema, store=schema_store) 92 | 93 | validator = Draft7Validator( 94 | schema, format_checker=draft7_format_checker, resolver=resolver 95 | ) 96 | 97 | for error in sorted( 98 | validator.iter_errors(json.loads(content_to_validate)), key=str 99 | ): 100 | field = SchemaValidator.PATH_SEPARATOR.join( 101 | [str(x) for x in list(error.path)] 102 | ) 103 | message = error.message 104 | remediation = None 105 | if error.validator == "oneOf" and ( 106 | check_message := self.check_messages.get(error.validator) 107 | ): 108 | fields = [ 109 | f'{field}/{obj["required"][0]}' for obj in error.validator_value 110 | ] 111 | message = check_message["failure"].format(fields) 112 | remediation = check_message["remediation"] 113 | errors.setdefault(field, {})["schema"] = { 114 | "message": [f"Error: {message}"], 115 | "remediation": remediation, 116 | "valid": False, 117 | } 118 | return errors 119 | 120 | @staticmethod 121 | def _build_errors(error_log, paths): 122 | """ 123 | Cleans up the error log given by the XML Validator and builds an error object in 124 | the format accepted by our program 125 | 126 | Args: 127 | error_log (str): The error log as output by the xml validator 128 | paths (list): All available paths in the document file 129 | 130 | Returns: 131 | (dict): The formatted error dictionary 132 | """ 133 | errors = {} 134 | lines = error_log.splitlines() 135 | for line in lines: 136 | # For DIF, because the namespace is specified in the metadata file, lxml library 137 | # provides field name concatenated with the namespace, 138 | # the following 3 lines of code removes the namespace 139 | namespaces = re.findall("(\{http[^}]*\})", line) 140 | for namespace in namespaces: 141 | line = line.replace(namespace, "") 142 | field_name = re.search("Element\s'(.*)':", line)[1] 143 | field_paths = [abs_path for abs_path in paths if field_name in abs_path] 144 | field_name = field_paths[0] if len(field_paths) == 1 else field_name 145 | message = re.search("Element\s'.+':\s(\[.*\])?(.*)", line)[2].strip() 146 | errors.setdefault(field_name, {})["schema"] = { 147 | "message": [f"Error: {message}"], 148 | "valid": False, 149 | } 150 | return errors 151 | 152 | def run_xml_validator(self, content_to_validate): 153 | """ 154 | Validate passed content based on the schema and return any errors 155 | 156 | Args: 157 | content_to_validate (bytes): The metadata content as a xml string 158 | 159 | Returns: 160 | (dict) A dictionary that gives the validity of the schema and errors if they exist 161 | 162 | """ 163 | schema = self.read_xml_schema() 164 | 165 | xml_content = content_to_validate 166 | doc = etree.parse(BytesIO(xml_content)) 167 | 168 | # Getting a list of available paths in the document 169 | # The validator only gives the field name, not full path 170 | # Getting this to map it to the full path later 171 | paths = [] 172 | for node in doc.xpath("//*"): 173 | if not node.getchildren() and node.text: 174 | paths.append(doc.getpath(node)[1:]) 175 | 176 | errors = {} 177 | 178 | try: 179 | schema.assertValid(doc) 180 | except etree.DocumentInvalid as err: 181 | errors = SchemaValidator._build_errors(str(err.error_log), paths) 182 | return errors 183 | 184 | def run(self, metadata): 185 | """ 186 | Runs schema validation on the metadata 187 | 188 | Args: 189 | metadata (str): The original metadata (either xml or json string) 190 | 191 | Returns: 192 | (dict): Result of the validation from xml and json schema validators 193 | """ 194 | return self.validator_func(metadata) 195 | -------------------------------------------------------------------------------- /pyQuARC/code/tracker.py: -------------------------------------------------------------------------------- 1 | class Tracker: 2 | """ 3 | Tracks the status of each check 4 | """ 5 | 6 | def __init__(self, rule_mapping, rules_override, metadata_format): 7 | """ 8 | Args: 9 | rule_mapping (dict): The mapping from rule to fields 10 | rules_override (dict): The override of mapping from rule to fields 11 | metadata_format (str): The format of the metadata file (eg. echo10, dif10) 12 | """ 13 | self.data = Tracker.create_initial_track( 14 | rule_mapping, rules_override, metadata_format 15 | ) 16 | 17 | @staticmethod 18 | def create_initial_track(rule_mapping, rules_override, metadata_format): 19 | """ 20 | Creats an initial tracking data where for each rule, the validity and the applied 21 | status is False 22 | 23 | Args: 24 | rule_mapping (dict): The mapping from rule to fields 25 | rules_override (dict): The override of mapping from rule to fields 26 | metadata_format (str): The format of the metadata file (eg. echo10, dif10) 27 | 28 | Returns: 29 | (dict): A dictionary in the form: 30 | { 31 | "rule_id": [ 32 | { 33 | "field": "field_name", 34 | "valid": "validity_status", 35 | "applied": "applied_status" 36 | }, 37 | ... 38 | ], 39 | ... 40 | } 41 | """ 42 | data = {} 43 | keys = list(rule_mapping.keys()) 44 | keys += list(rules_override.keys()) 45 | for rule_id in set(keys): 46 | data[rule_id] = [] 47 | rule = rules_override.get(rule_id) or rule_mapping.get(rule_id) 48 | for field in rule["fields_to_apply"].get(metadata_format, {}): 49 | data[rule_id].append( 50 | {"field": field["fields"][0], "applied": False, "valid": None} 51 | ) 52 | return data 53 | 54 | def update_data(self, rule_id, field, validity): 55 | """ 56 | Updates the tracking value for `rule_id` and `field` with the `validity` status 57 | 58 | Args: 59 | rule_id (str): The id of the rule 60 | field (str): The field that the rule is applied to 61 | validity (bool): The validity status of the rule for the field 62 | """ 63 | for idx, row in enumerate(self.data[rule_id]): 64 | if row["field"] == field: 65 | self.data[rule_id][idx]["valid"] = validity 66 | self.data[rule_id][idx]["applied"] = True 67 | 68 | def read_data(self, rule_id, field): 69 | """ 70 | Reads the tracking data for `rule_id` and `field` 71 | 72 | Args: 73 | rule_id (str): The id of the rule 74 | field (str): The field path 75 | 76 | Returns: 77 | (dict): A dict of form: { 78 | "field": "field_value", 79 | "valid": "validity_status", 80 | "applied": "applied_status" 81 | } for the `rule_id` and `field` 82 | """ 83 | for row in self.data[rule_id]: 84 | if row["field"] == field: 85 | return row 86 | return {} 87 | -------------------------------------------------------------------------------- /pyQuARC/code/url_validator.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | 4 | from urlextract import URLExtract 5 | 6 | from .string_validator import StringValidator 7 | from .utils import get_headers, if_arg 8 | 9 | 10 | class UrlValidator(StringValidator): 11 | """ 12 | Validator class for URLs 13 | """ 14 | 15 | def __init__(self): 16 | super().__init__() 17 | 18 | @staticmethod 19 | def _extract_http_texts(text_with_urls): 20 | """ 21 | Extracts anything that starts with 'http' from `text_with_urls`. 22 | This is required for catching "wrong" urls that aren't extracted by `URLExtract.find_urls()` because they are not urls at all 23 | An example: https://randomurl 24 | Args: 25 | text_with_urls (str, required): The text that contains the URLs where the check needs to be performed 26 | 27 | Returns: 28 | (list) List of texts that start with 'http' from `text_with_urls` 29 | """ 30 | texts = text_with_urls.split(" ") 31 | starts_with_http = set() 32 | for text in texts: 33 | if text.startswith("http"): 34 | starts_with_http.add(text) 35 | return starts_with_http 36 | 37 | @staticmethod 38 | @if_arg 39 | def health_and_status_check(text_with_urls): 40 | """ 41 | Checks the health and status of the URLs included in `text_with_urls` 42 | Args: 43 | text_with_urls (str, required): The text that contains the URLs where the check needs to be performed 44 | Returns: 45 | (dict) An object with the validity of the check and the instance/results 46 | """ 47 | 48 | def status_code_from_request(url): 49 | headers = get_headers() 50 | # timeout = 10 seconds, to allow for slow but not invalid connections 51 | return requests.get(url, headers=headers, timeout=10).status_code 52 | 53 | results = [] 54 | 55 | validity = True 56 | 57 | # extract URLs from text 58 | extractor = URLExtract(cache_dir=os.environ.get("CACHE_DIR")) 59 | urls = extractor.find_urls(text_with_urls) 60 | urls.extend(UrlValidator._extract_http_texts(text_with_urls)) 61 | 62 | # remove dots at the end (The URLExtract library catches URLs, but sometimes appends a '.' at the end) 63 | # remove duplicated urls 64 | urls = set(url[:-1] if url.endswith(".") else url for url in urls) 65 | value = ", ".join(urls) 66 | 67 | # check that URL returns a valid response 68 | for url in urls: 69 | if not url.startswith("http"): 70 | url = f"http://{url}" 71 | try: 72 | response_code = status_code_from_request(url) 73 | if response_code == 200: 74 | if url.startswith("http://"): 75 | secure_url = url.replace("http://", "https://") 76 | if status_code_from_request(secure_url) == 200: 77 | result = { 78 | "url": url, 79 | "error": "The URL is secure. Please use 'https' instead of 'http'.", 80 | } 81 | else: 82 | continue 83 | else: 84 | result = {"url": url, "error": f"Status code {response_code}"} 85 | except requests.ConnectionError: 86 | result = {"url": url, "error": "The URL does not exist on Internet."} 87 | except: 88 | result = {"url": url, "error": "Some unknown error occurred."} 89 | results.append(result) 90 | 91 | if results: 92 | validity = False 93 | value = results 94 | 95 | return {"valid": validity, "value": value} 96 | 97 | @staticmethod 98 | @if_arg 99 | def doi_check(doi): 100 | """ 101 | Checks if the doi link given in the text is a valid doi link 102 | 103 | Returns: 104 | (dict) An object with the validity of the check and the instance/results 105 | """ 106 | valid = False 107 | if doi.strip().startswith("10."): # doi always starts with "10." 108 | url = f"https://www.doi.org/{doi}" 109 | valid = UrlValidator.health_and_status_check(url).get("valid") 110 | return {"valid": valid, "value": doi} 111 | 112 | @staticmethod 113 | @if_arg 114 | def doi_link_update(value, bad_urls): 115 | validity = True 116 | if value in bad_urls: 117 | validity = False 118 | 119 | return {"valid": validity, "value": value} 120 | -------------------------------------------------------------------------------- /pyQuARC/code/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | import urllib 4 | from datetime import datetime 5 | 6 | from functools import wraps 7 | 8 | from .constants import CMR_URL, DATE_FORMATS 9 | 10 | 11 | def if_arg(func): 12 | @wraps(func) 13 | def run_function_only_if_arg(*args): 14 | if args[0]: 15 | return func(*args) 16 | else: 17 | return {"valid": None, "value": None} 18 | 19 | return run_function_only_if_arg 20 | 21 | 22 | def get_headers(): 23 | token = os.environ.get("AUTH_TOKEN") 24 | headers = None 25 | if token: 26 | headers = {"Authorization": f"Bearer {token}"} 27 | return headers 28 | 29 | 30 | def _add_protocol(url): 31 | if not url.startswith("http"): 32 | url = f"https://{url}" 33 | return url 34 | 35 | 36 | def is_valid_cmr_url(url): 37 | url = _add_protocol(url) 38 | valid = False 39 | headers = get_headers() 40 | try: # some invalid url throw an exception 41 | response = requests.get( 42 | url, headers=headers, timeout=5 43 | ) # some invalid urls freeze 44 | valid = response.status_code == 200 and response.headers.get("CMR-Request-Id") 45 | except: 46 | valid = False 47 | return valid 48 | 49 | 50 | def get_cmr_url(): 51 | cmr_url = os.environ.get("CMR_URL", CMR_URL) 52 | return _add_protocol(cmr_url) 53 | 54 | 55 | def set_cmr_prms(params, format="json", type="collections"): 56 | base_url = f"{type}.{format}?" 57 | params = {key: value for key, value in params.items() if value} 58 | return f"{base_url}{urllib.parse.urlencode(params)}" 59 | 60 | 61 | def cmr_request(cmr_prms): 62 | headers = get_headers() 63 | return requests.get(f"{get_cmr_url()}/search/{cmr_prms}", headers=headers).json() 64 | 65 | 66 | def collection_in_cmr(cmr_prms): 67 | return cmr_request(cmr_prms).get("hits", 0) > 0 68 | 69 | 70 | def get_date_time(dt_str): 71 | """ 72 | Convert a date and time string to a datetime object using predefined formats. 73 | This function attempts to parse a date and time string (`dt_str`) into a `datetime` object. 74 | It iterates over a list of possible date and time formats (`DATE_FORMATS`). The first successful 75 | parse using one of these formats will result in returning the corresponding `datetime` object. 76 | If none of the formats match, the function returns `None`. 77 | """ 78 | for fmt in DATE_FORMATS: 79 | try: 80 | date_time = datetime.strptime(dt_str, fmt) 81 | return date_time 82 | except ValueError: 83 | continue 84 | return None 85 | -------------------------------------------------------------------------------- /pyQuARC/schemas/MimeType.csv: -------------------------------------------------------------------------------- 1 | "Hits: 37","page_num: 1","page_size: 2000","Keyword Version: 14.3","Revision: 2022-08-26 10:35:56","Timestamp: 2022-09-15 12:28:30","Terms Of Use: https://cdn.earthdata.nasa.gov/conduit/upload/5182/KeywordsCommunityGuide_Baseline_v1_SIGNED_FINAL.pdf","The most up to date XML representations can be found here: https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/MimeType/?format=xml","Case native" 2 | MimeType,UUID 3 | "application/gml+xml","40bdf6e5-780c-43e2-ab8e-e5dfae4bd779" 4 | "application/gzip","a8ee535a-8bc8-46fd-8b97-917bd7ea7666" 5 | "application/json","8542dd4a-a11b-475d-8d46-cad785a7f510" 6 | "application/msword","c79a0e11-2774-4cf3-a194-45b9e58a93fd" 7 | "application/octet-stream","b77e64ef-ce80-4dab-b552-c6062990a6e0" 8 | "application/opensearchdescription+xml","07bcc60e-1551-44d9-b87e-7c260d230ecb" 9 | "application/pdf","627269ae-ba93-492e-8c31-cc4de1d69810" 10 | "application/tar+gzip","43ca8ee0-04a5-4020-b0ec-998ec0e0f30e" 11 | "application/tar+zip","17e82b7c-498d-4d69-993c-fd691aa25ce8" 12 | "application/tar","84ef762f-e348-42a6-981c-563822a47806" 13 | "application/vnd.google-earth.kml+xml","80045dcb-18ee-463a-8baf-ffcabed510ea" 14 | "application/vnd.google-earth.kmz","f7328bf5-8ef2-4f95-a4e0-6fb16d122237" 15 | "application/vnd.ms-excel","7c99ff72-5239-424d-a0bf-9712c33ea76d" 16 | "application/vnd.opendap.dap4.dmrpp+xml","b26761fa-8d8e-4bd8-a8ba-db6575554ad7" 17 | "application/x-bufr","e384b8a8-8cec-4230-9ebe-4db76bbef706" 18 | "application/x-hdf5","4e80047b-c50b-4805-ac68-789dbc38803f" 19 | "application/x-hdfeos","b1eac265-2b00-4c39-a429-797c13a2c640" 20 | "application/x-hdf","b0a3e733-4d1b-486f-b56c-c405a5e4367b" 21 | "application/x-netcdf","2b192915-32a8-4b68-a720-8ca8a84f04ca" 22 | "application/x-tar-gz","5e70beda-396e-4cc8-bdd5-70dfc8a1142e" 23 | "application/x-vnd.iso.19139-2+xml","c1a8dbb7-312d-4481-998e-58d126b32080" 24 | "application/xml","dd6c5cea-4100-4973-9ba9-659fdd7fd608" 25 | "application/zip","4e5db77b-bc1d-4f7c-9f13-e3e54e0b2e3b" 26 | "image/bmp","b7687b8f-7a24-4150-bd9d-28e0d53f7554" 27 | "image/gif","ad61b259-8131-4e0e-aac8-a800a0a51ca6" 28 | "image/jpeg","3f697f52-6a1c-4e2c-bd4b-13aaaf45f2e6" 29 | "image/png","edb9e800-ec31-4d5c-848d-c548fd151db2" 30 | "image/tiff","3e048f9e-8f93-4f0c-9f0b-20bafb909c68" 31 | "image/vnd.collada+xml","d3ef6fe7-b6cd-45b4-9a27-d42fa3289116" 32 | "text/css","3195dfce-51db-4b40-aadb-808b43573743" 33 | "text/csv","2065aabb-9beb-4c84-8ad7-0e16cfed17cf" 34 | "text/html","415a10b5-7286-4195-a88e-00c7b995b7d0" 35 | "text/javascript","40cb0bdd-67f4-43c8-aa57-2bdc260e6950" 36 | "text/markdown","b403039f-a107-4a84-88a1-29e4d1b30b0b" 37 | "text/plain","fea4e0a7-d794-481d-9915-52f1be226714" 38 | "text/xml","091b6afc-ab75-4790-8e71-3b32ae8bd0a4" 39 | -------------------------------------------------------------------------------- /pyQuARC/schemas/catalog.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /pyQuARC/schemas/check_messages_override.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /pyQuARC/schemas/checks.json: -------------------------------------------------------------------------------- 1 | { 2 | "datetime_compare": { 3 | "data_type": "datetime", 4 | "check_function": "compare", 5 | "dependencies": [ 6 | [ 7 | "datetime_format_check" 8 | ] 9 | ], 10 | "available": true 11 | }, 12 | "datetime_format_check": { 13 | "data_type": "datetime", 14 | "check_function": "iso_format_check", 15 | "available": true 16 | }, 17 | "date_or_datetime_format_check": { 18 | "data_type": "datetime", 19 | "check_function": "date_or_datetime_format_check", 20 | "available": true 21 | }, 22 | "url_check": { 23 | "data_type": "url", 24 | "check_function": "health_and_status_check", 25 | "available": true 26 | }, 27 | "string_compare": { 28 | "data_type": "string", 29 | "check_function": "compare", 30 | "available": true 31 | }, 32 | "length_check": { 33 | "data_type": "string", 34 | "check_function": "length_check", 35 | "available": true 36 | }, 37 | "doi_validity_check": { 38 | "data_type": "url", 39 | "check_function": "doi_check", 40 | "available": true 41 | }, 42 | "controlled_keywords_check": { 43 | "data_type": "string", 44 | "check_function": "controlled_keywords_check", 45 | "available": true 46 | }, 47 | "science_keywords_gcmd_check": { 48 | "data_type": "string", 49 | "check_function": "science_keywords_gcmd_check", 50 | "available": true 51 | }, 52 | "ends_at_present_flag_logic_check": { 53 | "data_type": "custom", 54 | "check_function": "ends_at_present_flag_logic_check", 55 | "available": true 56 | }, 57 | "ends_at_present_flag_presence_check": { 58 | "data_type": "custom", 59 | "check_function": "ends_at_present_flag_presence_check", 60 | "available": true 61 | }, 62 | "availability_check": { 63 | "data_type": "custom", 64 | "check_function": "availability_check", 65 | "available": true 66 | }, 67 | "mime_type_check": { 68 | "data_type": "custom", 69 | "check_function": "mime_type_check", 70 | "available": true 71 | }, 72 | "organization_short_name_gcmd_check": { 73 | "data_type": "string", 74 | "check_function": "organization_short_name_gcmd_check", 75 | "available": true 76 | }, 77 | "organization_long_name_gcmd_check": { 78 | "data_type": "string", 79 | "check_function": "organization_long_name_gcmd_check", 80 | "available": true 81 | }, 82 | "organization_short_long_name_consistency_check": { 83 | "data_type": "string", 84 | "check_function": "organization_short_long_name_consistency_check", 85 | "available": true 86 | }, 87 | "instrument_short_long_name_consistency_check": { 88 | "data_type": "string", 89 | "check_function": "instrument_short_long_name_consistency_check", 90 | "available": true 91 | }, 92 | "instrument_short_name_gcmd_check": { 93 | "data_type": "string", 94 | "check_function": "instrument_short_name_gcmd_check", 95 | "available": true 96 | }, 97 | "instrument_long_name_gcmd_check": { 98 | "data_type": "string", 99 | "check_function": "instrument_long_name_gcmd_check", 100 | "available": true 101 | }, 102 | "instrument_long_name_presence_check": { 103 | "data_type": "string", 104 | "check_function": "instrument_long_name_presence_check", 105 | "available": true 106 | }, 107 | "validate_granule_instrument_against_collection": { 108 | "data_type": "string", 109 | "check_function": "validate_granule_instrument_against_collection", 110 | "available": true 111 | }, 112 | "platform_short_name_gcmd_check": { 113 | "data_type": "string", 114 | "check_function": "platform_short_name_gcmd_check", 115 | "available": true 116 | }, 117 | "platform_long_name_gcmd_check": { 118 | "data_type": "string", 119 | "check_function": "platform_long_name_gcmd_check", 120 | "available": true 121 | }, 122 | "platform_type_gcmd_check": { 123 | "data_type": "string", 124 | "check_function": "platform_type_gcmd_check", 125 | "available": true 126 | }, 127 | "platform_long_name_presence_check": { 128 | "data_type": "string", 129 | "check_function": "platform_long_name_presence_check", 130 | "available": true 131 | }, 132 | "platform_short_long_name_consistency_check": { 133 | "data_type": "string", 134 | "check_function": "platform_short_long_name_consistency_check", 135 | "available": true 136 | }, 137 | "validate_granule_platform_against_collection": { 138 | "data_type": "string", 139 | "check_function": "validate_granule_platform_against_collection", 140 | "available": true 141 | }, 142 | "spatial_keyword_gcmd_check": { 143 | "data_type": "string", 144 | "check_function": "spatial_keyword_gcmd_check", 145 | "available": true 146 | }, 147 | "location_gcmd_check": { 148 | "data_type": "string", 149 | "check_function": "location_gcmd_check", 150 | "available": true 151 | }, 152 | "campaign_short_long_name_consistency_check": { 153 | "data_type": "string", 154 | "check_function": "campaign_short_long_name_consistency_check", 155 | "available": true 156 | }, 157 | "campaign_short_name_gcmd_check": { 158 | "data_type": "string", 159 | "check_function": "campaign_short_name_gcmd_check", 160 | "available": true 161 | }, 162 | "campaign_long_name_gcmd_check": { 163 | "data_type": "string", 164 | "check_function": "campaign_long_name_gcmd_check", 165 | "available": true 166 | }, 167 | "campaign_long_name_presence_check": { 168 | "data_type": "string", 169 | "check_function": "campaign_long_name_presence_check", 170 | "available": true 171 | }, 172 | "data_format_gcmd_check": { 173 | "data_type": "string", 174 | "check_function": "data_format_gcmd_check", 175 | "available": true 176 | }, 177 | "mime_type_gcmd_check": { 178 | "data_type": "string", 179 | "check_function": "mime_type_gcmd_check", 180 | "available": true 181 | }, 182 | "idnnode_shortname_gcmd_check": { 183 | "data_type": "string", 184 | "check_function": "idnnode_shortname_gcmd_check", 185 | "available": true 186 | }, 187 | "temporal_range_res_gcmd_check": { 188 | "data_type": "string", 189 | "check_function": "temporal_range_res_gcmd_check", 190 | "available": true 191 | }, 192 | "vertical_range_res_gcmd_check": { 193 | "data_type": "string", 194 | "check_function": "vertical_range_res_gcmd_check", 195 | "available": true 196 | }, 197 | "horizontal_range_res_gcmd_check": { 198 | "data_type": "string", 199 | "check_function": "horizontal_range_res_gcmd_check", 200 | "available": true 201 | }, 202 | "chrono_unit_gcmd_check": { 203 | "data_type": "string", 204 | "check_function": "chrono_gcmd_check", 205 | "available": true 206 | }, 207 | "one_item_presence_check": { 208 | "data_type": "custom", 209 | "check_function": "one_item_presence_check", 210 | "available": true 211 | }, 212 | "dif_standard_product_check": { 213 | "data_type": "custom", 214 | "check_function": "dif_standard_product_check", 215 | "available": true 216 | }, 217 | "doi_link_update": { 218 | "data_type": "url", 219 | "check_function": "doi_link_update", 220 | "available": true 221 | }, 222 | "bounding_coordinate_logic_check": { 223 | "data_type": "custom", 224 | "check_function": "bounding_coordinate_logic_check", 225 | "available": true 226 | }, 227 | "user_services_check": { 228 | "data_type": "custom", 229 | "check_function": "user_services_check", 230 | "available": true 231 | }, 232 | "doi_missing_reason_explanation": { 233 | "data_type": "custom", 234 | "check_function": "doi_missing_reason_explanation", 235 | "available": true 236 | }, 237 | "boolean_check": { 238 | "data_type": "custom", 239 | "check_function": "boolean_check", 240 | "available": true 241 | }, 242 | "collection_progress_consistency_check": { 243 | "data_type": "custom", 244 | "check_function": "collection_progress_consistency_check", 245 | "available": true 246 | }, 247 | "online_resource_type_gcmd_check": { 248 | "data_type": "string", 249 | "check_function": "online_resource_type_gcmd_check", 250 | "available": true 251 | }, 252 | "license_url_description_check": { 253 | "data_type": "custom", 254 | "check_function": "license_url_description_check", 255 | "available": true 256 | }, 257 | "uniqueness_check": { 258 | "data_type": "custom", 259 | "check_function": "uniqueness_check", 260 | "available": true 261 | }, 262 | "validate_ending_datetime_against_granules": { 263 | "data_type": "datetime", 264 | "check_function": "validate_ending_datetime_against_granules", 265 | "available": true 266 | }, 267 | "validate_beginning_datetime_against_granules": { 268 | "data_type": "datetime", 269 | "check_function": "validate_beginning_datetime_against_granules", 270 | "available": true 271 | }, 272 | "validate_granule_data_format_against_collection_check": { 273 | "data_type": "string", 274 | "check_function": "validate_granule_data_format_against_collection", 275 | "available": true 276 | }, 277 | "get_data_url_check": { 278 | "data_type": "custom", 279 | "check_function": "get_data_url_check", 280 | "available": true 281 | }, 282 | "granule_project_short_name_check": { 283 | "data_type": "string", 284 | "check_function": "granule_project_short_name_check", 285 | "available": true 286 | }, 287 | "granule_sensor_short_name_check": { 288 | "data_type": "string", 289 | "check_function": "granule_sensor_short_name_check", 290 | "available": true 291 | }, 292 | "granule_data_format_presence_check": { 293 | "data_type": "custom", 294 | "check_function": "one_item_presence_check", 295 | "available": true 296 | }, 297 | "count_check": { 298 | "data_type": "custom", 299 | "check_function": "count_check", 300 | "available": true 301 | } 302 | } 303 | -------------------------------------------------------------------------------- /pyQuARC/schemas/checks_override.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /pyQuARC/schemas/echo-c_json.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "definitions": { 4 | "RangeDateTime": { 5 | "type": "object", 6 | "properties": { 7 | "BeginningDateTime": { 8 | "type": "string", 9 | "format": "date-time" 10 | }, 11 | "EndingDateTime": { 12 | "type": "string", 13 | "format": "date-time" 14 | } 15 | } 16 | } 17 | }, 18 | "type": "object", 19 | "properties": { 20 | "Collection": { 21 | "type": "object", 22 | "properties": { 23 | "DataSetId": { 24 | "type": "string", 25 | "minLength": 1, 26 | "maxLength": 1030, 27 | "description": "Specifies a unique name for the collection. This is considered the primary identifier for a collection." 28 | }, 29 | "ProcessingLevelId": { 30 | "type": "string", 31 | "minLength": 1, 32 | "maxLength": 80, 33 | "description": "The processing level class contains the level identifier and level description of the collection." 34 | }, 35 | "Temporal": { 36 | "type": "object", 37 | "oneOf": [ 38 | { 39 | "required": ["RangeDateTime"] 40 | }, 41 | { 42 | "required": ["SingleDateTime"] 43 | }, 44 | { 45 | "required": ["PeriodicDateTime"] 46 | } 47 | ], 48 | "properties": { 49 | "RangeDateTime": { 50 | "$ref": "#/definitions/RangeDateTime" 51 | }, 52 | "SingleDateTime": { 53 | "type": "string", 54 | "format": "date-time" 55 | }, 56 | "PeriodicDateTime": { 57 | "type": "string", 58 | "format": "date-time" 59 | } 60 | } 61 | }, 62 | "DOI": { 63 | "type": "object", 64 | "oneOf": [ 65 | { 66 | "required": [ 67 | "DOI" 68 | ] 69 | }, 70 | { 71 | "required": [ 72 | "MissingReason" 73 | ] 74 | } 75 | ], 76 | "properties": { 77 | "DOI": { 78 | "type": "string" 79 | }, 80 | "Authority": { 81 | "type": "string" 82 | }, 83 | "MissingReason": { 84 | "type": "string" 85 | }, 86 | "Explanation": { 87 | "type": "string" 88 | } 89 | } 90 | } 91 | } 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /pyQuARC/schemas/granuledataformat.csv: -------------------------------------------------------------------------------- 1 | "Hits: 125","page_num: 1","page_size: 2000","Keyword Version: 14.3","Revision: 2022-08-26 10:36:02","Timestamp: 2022-09-15 16:21:38","Terms Of Use: https://cdn.earthdata.nasa.gov/conduit/upload/5182/KeywordsCommunityGuide_Baseline_v1_SIGNED_FINAL.pdf","The most up to date XML representations can be found here: https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/DataFormat/?format=xml","Case native" 2 | Short_Name,Long_Name,UUID 3 | "ACCDB","Microsoft Access ACCDB File Format Family","378d394c-8230-49f1-8ba0-8ee7e7abe316" 4 | "ADF","ArcInfo Binary Grid Format","81782805-cc0d-4325-8f35-aa0c8e439b16" 5 | "AGF","Atlas Geo File","067315ec-ed74-401f-b828-d568351211d1" 6 | "AMES","NASA Ames Format for Data Exchange","08208d5b-1311-4c14-b8c1-5f64f75cb392" 7 | "AREA","McIDAS Image File Format","ebe6f3c4-a78f-4152-977c-296d42e4e9e8" 8 | "ASCII Grid","Esri American Standard Code for Information Interchange Grid","d7d5b771-8c3e-44d5-ba14-8a3757cecbaf" 9 | "ASCII Raster","Esri American Standard Code for Information Interchange Raster","1aee02b9-bf37-4197-bb7f-bd6f040d26f6" 10 | "ASCII","American Standard Code for Information Interchange","8e128326-b9cb-44c7-9e6b-4bd950a08753" 11 | "AVI","Audio Video Interleaved","2a6763f9-0ef9-4879-9b7d-6ce023c75871" 12 | "ArcInfo Coverage","Esri ArcInfo Coverage","6616fd27-7c2a-4d3f-b361-20ad423e31a2" 13 | "ArcInfo Interchange","Esri ArcInfo Interchange File","5a94a0c5-093d-4c9e-bd48-f2ddf1c4daa6" 14 | "BIL","Band Interleaved by Line","4dc86020-d8e0-49d2-b217-a48c18111b51" 15 | "BIP","Band Interleaved By Pixel","58fdfa0e-ca66-4534-948e-0cc0dbc219dd" 16 | "BMP","Bitmap Image File","7aafe1ad-b785-4805-ac76-2aa102cdb7e4" 17 | "BNA","Boundary File Format","b5849782-087e-4580-8d39-19777a96d736" 18 | "BSQ","Band Sequential Image","bd2ced35-e9f5-4ab6-a85d-0f45fac62c00" 19 | "BUFR","Binary Universal Form for the Representation of Meteorological Data","d40b49ce-c201-431c-9fdf-4db38f9b97b2" 20 | "BigTIFF","Big Tagged Image File Format","832bfe62-e5c3-4c89-bee9-4c1b6e80dc9f" 21 | "Binary","","3a3d2a90-5cf6-4ddd-a3c4-c88fa0c6941d" 22 | "CCSDS","Consultative Committee for Space Data Systems","2da9aa88-c3d4-4307-a86f-e048b6297899" 23 | "CEOS","Committee on Earth Observation Satellites","c0158436-501c-47e5-9a92-6416ef81d0b9" 24 | "COG","Cloud Optimized GeoTIFF","23a0f833-fc2b-4922-8998-371a4a18bd17" 25 | "CR2","Canon Raw Version 2","87ae4923-1a96-4fa2-a560-ddf27de4fcba" 26 | "CRD","Consolidated Laser Ranging Data Format","49028622-39d1-46b1-b89f-0fc2b4923882" 27 | "CSV","Comma-Separated Values File","465809cc-e76c-4630-8594-bb8bd7a1a380" 28 | "DBF","dBASE Database File","c087e039-7d05-4067-a0e3-150e0ff19aa3" 29 | "DEM","Shuttle Topography Mission (SRTM) Data File","191a9f1d-cfd3-469d-86b8-ce2d2355034a" 30 | "DLG","Digital Line Graph","78fe04cb-d68f-4b4f-a6b5-3b59660d3f95" 31 | "DTA","State Data Format","61225045-76c9-439f-a44b-b5c1a26635f1" 32 | "DXF","Drawing Interchange Format","e1b78739-ebcf-43c9-8181-3d4e79003f72" 33 | "ENVI","Environment for Visualizing Images File Format","5782afb9-caa8-44c4-8dec-e793d990b75d" 34 | "EPS","Encapsulated Postscript","22996c6c-4a7c-4ff3-9782-1fc3c680a88c" 35 | "Excel","Microsoft Excel","e807acba-457e-4f7e-be44-da5f93f4118b" 36 | "FITS","Flexible Image Transport System","b46bb83e-736e-4189-b1d8-6f0570fdfa6c" 37 | "GIF","Graphics Interchange Format","3be6e181-085f-4a53-b7ed-851f1980dc71" 38 | "GMT","Generic Mapping Tools","f1736125-bd43-47d3-9125-262baa182f99" 39 | "GRIB1","General Regularly-Distributed Information in Binary Form-1","f4930ca9-6b68-4bb8-adb0-81a571e20a53" 40 | "GRIB2","General Regularly-Distributed Information in Binary Form-2","94961648-292b-46d9-bc9d-502bc19f0d55" 41 | "GRIDFloat","Esri GridFloat Output File","600ef75c-9c03-41ba-a57d-4c913acd4cb5" 42 | "GTE","GTE Data Archive Format","59928592-b670-47dc-b025-3ac040ae679e" 43 | "GeoJSON","JavaScript Object Notation for Geographical Features","2648d5e0-a8fd-4cd0-8060-0c63e15d3a67" 44 | "GeoPackage","","ac33b396-4ab5-4873-89c6-e248621961d4" 45 | "GeoTIFF","Georeferenced Tagged Image File Format","668db73b-2a1c-4e92-8e0e-fda3131b4aac" 46 | "Geodatabase","Esri Geodatabase","d0b3505e-77bb-45a0-83fe-787bc3812b67" 47 | "Grid","","bb6184eb-1ced-44fb-9668-d57cf1baa2e3" 48 | "HDF-EOS2","Hierarchical Data Format - Earth Observing System Version 2","db86a588-b3e3-46fe-98b0-9f0699e918a5" 49 | "HDF-EOS4","Hierarchical Data Format - Earth Observing System Version 4","2611e09d-5eb2-4159-a917-7e373727a825" 50 | "HDF-EOS5","Hierarchical Data Format - Earth Observing System Version 5","0e1b63cf-966f-4f42-9575-e6b362de9aaa" 51 | "HDF4","Hierarchical Data Format Version 4","e5c126f8-0435-4cef-880f-72a1d2d792f2" 52 | "HDF5","Hierarchical Data Format Version 5","1c406abc-104d-4517-96b8-dbbcf515f00f" 53 | "HGT","Shuttle Radar Topography Mission (SRTM) Data file","2c37a52f-c159-4d16-a5c1-36ca833863e1" 54 | "HTML","HyperText Markup Language","d896a8cc-4fce-4a8d-86bc-185b324fab2b" 55 | "ICARTT","International Consortium for Atmospheric Research on Transport and Transformation","23ccdce5-cd51-424e-b82a-67e076a86994" 56 | "ICI","","0679d78d-0931-4948-94ec-46ab130785a6" 57 | "IFC","Industry Foundation Classes","5513aef2-5676-4a19-9652-edda4d753c2e" 58 | "IIQ","Intelligent Image Quality","28acdf05-9aed-44e8-a58c-e47291bbc28f" 59 | "IONEX","IONEX Format Data","133c2aee-50f9-4260-b792-6d6694399da3" 60 | "ISI","Indian Standards Institute","43e7a701-2ce2-41e8-8893-6b75963fcfe0" 61 | "IWRF","Integrated Weather Radar Facility Time Series Format","e1c88c22-0bf2-4798-89fd-fa7d875e4b6c" 62 | "JPEG2000","Joint Photographic Experts Group Format 2000","e1544d27-a3b8-4b14-95aa-e25b21bcce1f" 63 | "JPEG","Joint Photographic Experts Group Format","7443bb2d-1dbb-44d1-bd29-0241d30fbc57" 64 | "JSON-LD","JavaScript Object Notation for Linked Data","adc9dbd1-13ec-4f36-aa3a-378af76ba34b" 65 | "JSON","JavaScript Object Notation","6a602f95-1d4d-483f-90e6-674dec7bc01b" 66 | "KML","Keyhole Markup Language","809da52c-3147-403c-8d4e-e06119ef89f9" 67 | "KMZ","Keyhole Markup Language Zipped","e6ab1f01-1c2c-46a5-97fa-5b8bb874fc31" 68 | "LAS","LAS (LASer) Format","181d354f-af90-4aaf-9167-ff3db9f6cb13" 69 | "LAZ","Compressed Light Detection and Ranging (Lidar) Data Format","31aab472-99df-43a4-a4bb-09cd087af860" 70 | "Little-Endian","","5cd15e09-8082-4049-98ae-f402e54929a4" 71 | "MAT","MATLAB","10de1987-5896-42d6-be7c-506fd7ba1f21" 72 | "MDB","Microsoft Access MDB File Format Family","1b5f24f0-a524-4d6b-a20a-6bc685d46c8e" 73 | "MOV","QuickTime Movie","8f05ce85-385b-491a-abe5-a39f426e4832" 74 | "MP4","MPEG-4","a593731a-ff49-47d8-bed0-7f3f9d3a9e33" 75 | "MSR","MetaSensing Rawdata","a1edd41d-50e1-4d1f-9062-7d2649605e38" 76 | "McIDAS","Man computer Interactive Data Access System","77f3bc44-4f05-432d-9ee3-2e6859ca2896" 77 | "NBJ","U.S. Military Campgrounds Directory Software","5c71f6b1-6978-416d-a213-f42a7d2728e3" 78 | "NIDS","National Indicator Data Set","a82ccef5-a7bf-416c-8ef6-704e62564901" 79 | "NITF21NCDRD","NITF (NCDRD) Raster Type","20c82be1-789f-4441-9728-d51cf2704523" 80 | "Not Provided","","c7c7819e-4805-4b0a-819e-82b453e8fdd0" 81 | "ODB","OpenDocument Database Front End Document Format","7f6d6202-7fb2-4ad5-bbfe-114803316b63" 82 | "ODS","OpenDocument Spreadsheet Document Format","a2904083-1e32-4086-b028-a2afc2ffd443" 83 | "Open XML Spreadsheet","Microsoft Excel Open XML Spreadsheet","71467c5c-c54c-48b6-8420-0d19e272d1c9" 84 | "PDF","Portable Document Format","ac392872-1571-4bfd-94dd-81f93d9f1fd0" 85 | "PNG","Portable Network Graphics","131c1f06-d827-4b0f-b2cf-d0585f221be1" 86 | "PSD","Photoshop Format","5e17145e-b1c9-473b-9e5e-7680c286c64a" 87 | "Parquet","Column-oriented Data Storage Format","cbcdfa99-4403-4370-a8d1-03b2327a51aa" 88 | "PowerPoint","Microsoft PowerPoint","ceb2056a-ab97-441b-9491-62df68970205" 89 | "RB5","Qualcomm Flight RB5 5G","20f86520-af22-4e57-9d83-06a7e2ca5280" 90 | "RData","R Data Format Family","53ce2fee-84fe-40c4-bd26-b2dcce86021a" 91 | "RINEX","Receiver Independent Exchange Format","48571017-0cc3-4ac7-b9ab-d0df8ed99a6c" 92 | "SAFE","Standard Archive Format for Europe","87eddebb-2ef4-4597-bfac-1b97e9f54440" 93 | "SAS","SAS Transport Data Set Files","6231402a-7e4c-42d9-802d-7184eb812f46" 94 | "SDTS","Spatial Data Transfer Standard","cfe55cb7-b9db-4e56-801e-0c7c605c887c" 95 | "SEG-Y","Society of Exploration Geophysicists File Format","ee05a43b-abfa-4955-847b-86cb590fba53" 96 | "SIARD","Software Independent Archiving of Relational Databases","3bcc2483-169c-4dc3-a29c-c4c2dbab6926" 97 | "SIGMET IRIS","Vaisala Sigmet Interactive Radar Information System IRIS","60d4013b-f58c-42f6-8680-d41d8e6ee90f" 98 | "SLPK","Scene Layer Package","789d230c-8557-493b-8bdc-4e810f4f7968" 99 | "SPC","Thermo Scientific SPC File","c142d352-cfc0-493a-a0f1-167ad22530c4" 100 | "SPSS","Statistical Package for the Social Sciences (SPSS) Data File","e3b8b3e3-3dd0-41c2-a784-9697ba934d2d" 101 | "SQLite","Standard Query Language Lite","37adee23-d239-4e1d-8ac8-1c7e26f36dc6" 102 | "SYLK","Symbolic Link","dd61ff8e-5257-4018-8d99-3e8bdec58837" 103 | "Sea-Bird CTD","Sea-Bird for Conductivity, Temperature, and Depth","ddcd5941-9ba8-4c39-a498-1e214d1bfa6e" 104 | "SeaBASS","SeaWiFS Bio-Optical Archive and Storage System Data Format","be0d9b66-445d-4109-8784-63f1ea80e729" 105 | "Shapefile","Esri Shapefile","a1d1fbdd-98f4-41e9-9aeb-a369a3b466a4" 106 | "SonTek Castaway CTD","Castaway Conductivity, Temperature, and Depth","8b7ff128-106e-4a40-9de9-5bf3b6404fc2" 107 | "TAR","Tape Archive","19139079-41cd-47d5-b581-93bed04ade29" 108 | "TIFF","Tagged Image File Format","0225ee3e-c3b1-4a5d-bd22-b36462330b00" 109 | "Text File","","7de32ba8-eb3a-4e02-add3-3d828e46bd57" 110 | "TimeseriesML","Timeseries Profile of Observations and Measurements","80323a44-3233-4472-b0d9-158cb9371719" 111 | "UF","Universal Format","f8c1fd07-20b6-47fe-a420-f01679c523d1" 112 | "VPF","Vector Product Format","75ce5e67-f1c3-4a53-bad3-34ba6f104a8c" 113 | "Valeport CTD","Valeport Conductivity, Temperature, and Depth","1528fc97-9aa0-4417-887b-c746fb0c6d23" 114 | "WKI","Lotus 1-2-3 Spreadsheet","5d95e94b-95f7-4db3-a555-a1363fc23df0" 115 | "WKT","Well-known text","5aedf9cd-c6cb-4c62-b54f-050549e4a270" 116 | "WaterML","Water Markup Language","8b3ff8b7-92b4-48ba-97dd-29e925069745" 117 | "Word","Microsoft Word","f49d1197-2ed0-4f88-98ce-fbb86bbb03e1" 118 | "XML","Extensible Markup Language","9efbc54a-b45c-47af-bac7-20e504483dc4" 119 | "XTDR","External Standard Data Representation","83698059-cf52-4267-9b2c-599f671d1bd6" 120 | "YAML","YAML Ain't Markup Language","de132e9b-fc00-4c79-8a65-57f774d9a673" 121 | "Zarr","Chunked Compressed N-dimensional Arrays","ed9804d8-e1ad-4209-8fea-30fba3d47ed7" 122 | "miniSEED","mini Standard for the Exchange of Earthquake Data","3b7e3e2e-8f14-4e07-a84c-00dc9750cc10" 123 | "netCDF-2","Network Common Data Format Version 2","b62836b1-87f6-49fe-92e6-cd481c6d8456" 124 | "netCDF-3","Network Common Data Format Version 3","868fc5dc-21fa-4356-a3de-f4c3c9559a21" 125 | "netCDF-4 classic","Network Common Data Format Version 4 Classic","fa52494f-c855-4d6c-a4dc-46b3090cc6e3" 126 | "netCDF-4","Network Common Data Format Version 4","30ea4e9a-4741-42c9-ad8f-f10930b35294" 127 | -------------------------------------------------------------------------------- /pyQuARC/schemas/horizontalresolutionrange.csv: -------------------------------------------------------------------------------- 1 | "Hits: 15","page_num: 1","page_size: 2000","Keyword Version: 14.3","Revision: 2022-08-26 10:35:31","Timestamp: 2022-09-15 16:21:36","Terms Of Use: https://cdn.earthdata.nasa.gov/conduit/upload/5182/KeywordsCommunityGuide_Baseline_v1_SIGNED_FINAL.pdf","The most up to date XML representations can be found here: https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/horizontalresolutionrange/?format=xml","Case native" 2 | Horizontal_Resolution_Range,UUID 3 | "1 km - < 10 km or approximately .01 degree - < .09 degree","6dd8224f-944e-4798-ac48-44c23a567eeb" 4 | "1 meter - < 30 meters","abf43d91-a65d-4b3b-a6dd-593e211b2c7b" 5 | "10 km - < 50 km or approximately .09 degree - < .5 degree","3b36beea-9637-4213-bdbe-42e878ca14df" 6 | "100 km - < 250 km or approximately 1 degree - < 2.5 degrees","2207b375-113b-4499-a5a6-0ae6edc2aae8" 7 | "100 meters - < 250 meters","8d197170-3639-4850-b012-0cae4a288e2b" 8 | "250 km - < 500 km or approximately 2.5 degrees - < 5.0 degrees","8d520e8b-e1c6-4c18-bf8a-7a41b006f66f" 9 | "250 meters - < 500 meters","e5c4876e-47b7-4d53-90a2-081a6b150140" 10 | "30 meters - < 100 meters","437daa1f-f584-4afc-9104-b245f3a3d26d" 11 | "50 km - < 100 km or approximately .5 degree - < 1 degree","35a7a6f2-69fe-4ba7-a2b5-91f83f52afb3" 12 | "500 km - < 1000 km or approximately 5 degrees - < 10 degrees","e2d588c7-76a5-4655-bfaf-bf66874b61c4" 13 | "500 meters - < 1 km","9e5ebee1-3ba2-4522-8d90-7ddf47987581" 14 | "< 1 meter","08e4b31c-0be3-49cd-9374-caac345e7402" 15 | "> 1000 km or > 10 degrees","c19001e4-dfbf-491b-b6d5-c4d0cee8f2fe" 16 | "Point Resolution","75c9d806-9e29-40f5-b479-4c63c90f77a9" 17 | -------------------------------------------------------------------------------- /pyQuARC/schemas/idnnode.csv: -------------------------------------------------------------------------------- 1 | "Hits: 127","page_num: 1","page_size: 2000","Keyword Version: 14.3","Revision: 2022-08-26 10:35:28","Timestamp: 2022-09-15 14:58:39","Terms Of Use: https://cdn.earthdata.nasa.gov/conduit/upload/5182/KeywordsCommunityGuide_Baseline_v1_SIGNED_FINAL.pdf","The most up to date XML representations can be found here: https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/idnnode/?format=xml","Case native" 2 | Short_Name,Long_Name,UUID 3 | "ACADIS","","913d42e2-1641-4f5e-8273-379ddd3812d5" 4 | "ACE/CRC","","13381733-968f-4fd6-b70c-aac6a77b2657" 5 | "AMD/AR","","81f394bf-aa57-4742-98d7-3b9c5fc0b27e" 6 | "AMD/AU","","d7c402cb-af86-46a3-99ec-e507a1a04d2c" 7 | "AMD/BE","","ef83cb9a-bcc3-49bc-bb29-7e95b3e8f55d" 8 | "AMD/CA","","b23bda57-63ec-4f9e-be6c-c49dacb05190" 9 | "AMD/CH","","0d3fac3b-eaab-46bf-aa67-985cae48032f" 10 | "AMD/CL","","d9e5f883-a434-4ad9-b827-ae0709409397" 11 | "AMD/CN","","101a20ca-88bd-45a3-9882-c2e8a1c4dd29" 12 | "AMD/DE","","8875bcf1-a8e6-445c-b5bf-dabbe248265c" 13 | "AMD/EE","","f518808a-38c7-410c-a987-0bbe967c9763" 14 | "AMD/ES","","98d77582-2190-40cf-90b7-d5614920c149" 15 | "AMD/FI","","2132cc3c-ab36-48d3-bc21-7b1d47e9fe53" 16 | "AMD/FR","","61fb16eb-c889-4852-976c-9b638de7bfab" 17 | "AMD/IND","","2736d700-1760-491e-9fe3-dea1907c10a9" 18 | "AMD/INT","","b3c6dc17-6f4a-4ec0-afc0-0007fc20855e" 19 | "AMD/IT","","f81093d9-6bc9-4211-8cfb-17064c7d0edf" 20 | "AMD/JP","","21474392-1374-433e-b4b0-46c19c1561b2" 21 | "AMD/KR","","4d2f1eed-4919-44b7-ac9e-9d0a372b99f3" 22 | "AMD/MY","","76970a29-898b-4052-ac89-a6622241492c" 23 | "AMD/NL","","170ec227-c44b-4787-9889-a50134bcc8da" 24 | "AMD/NO","","0529a07c-08e1-48c0-8619-d9f6880e6d44" 25 | "AMD/NZ","","c59cc5b3-f0ca-4519-af5b-b6a6a8330878" 26 | "AMD/SCAR-MARBIN","","695d70f0-4ea0-4576-84bc-341785afc174" 27 | "AMD/SE","","df312b2e-c2ff-45f7-851c-b5a92dccb848" 28 | "AMD/UA","","8a0868ba-a22a-47b1-9827-d54c274bce35" 29 | "AMD/UK","","6edf29ba-5e3f-4aae-9406-f63abc4cb314" 30 | "AMD/US","","e8e17a9b-c5aa-4924-b065-7a485d019299" 31 | "AMD/UY","","b9e13335-23b0-4396-a902-64ad91396ef2" 32 | "AMD/ZA","","fd2afb0b-ce8b-40f9-942c-754fd83e8579" 33 | "AMD","","47308f11-79b0-46c2-b0c9-06d0b15ae845" 34 | "ANTABIF","","4d1dbf8a-fad8-4b42-b20a-7d64fe40abbc" 35 | "ARCTIC/CN","","c09ed6fa-6b01-4901-9e56-7de45d330a0c" 36 | "ARCTIC/ES","","6345a51f-886d-4cbf-8535-63ef717c73d1" 37 | "ARCTIC/FR","","603e666b-2edc-4d81-b9f7-ca55fd91bb1d" 38 | "ARCTIC/JP","","3b79ca1c-593e-481e-a231-724a3a7f0994" 39 | "ARCTIC/NL","","d3306863-ef0a-4f8c-8eb2-3a403571f1e4" 40 | "ARCTIC/NO","","a67e9739-f8e7-4ea1-9805-3b42d7d0a02c" 41 | "ARCTIC","","d711d9ad-686b-4665-9213-322bf9956caa" 42 | "ARGENTINA/CONAE","","072df70d-da3d-4225-aced-b65337f35da2" 43 | "AUSTRALIA/AMMC","","1d9168a9-9f8e-4e9d-aea9-150b62d799f9" 44 | "AUSTRALIA/ANU","","38d7cefd-85c6-4e92-93aa-a239601ad9ae" 45 | "AUSTRALIA/CSIRO","","cb1ac463-39fe-4f03-b41c-5564411aef50" 46 | "BRAZIL/INPE","","c9f29a5a-fc32-4f94-afd3-e8427f7c579e" 47 | "CANADA/CGDI","","1e9c313a-ca4a-4fab-a80f-c7aeb68a5d0b" 48 | "CD","","7fcd5522-84b0-48d1-927d-4d7751064b9c" 49 | "CEOS","","edff0791-121c-40aa-adf2-33feb2bc30e5" 50 | "CHINA/NSMC","","3720a713-d1b8-4f65-8cba-250f9e231e1f" 51 | "CHINA/SMC","","a89ffa20-a111-4f43-b286-58ee76e9513f" 52 | "CLIVAR","","c394da5f-408e-4f69-9053-d303d024429f" 53 | "DOE/ORNL/NGEE/Arctic","","4381ace1-e19b-4a31-9938-010aecab5633" 54 | "DOKIPY","","1274e999-7b99-468d-90e6-935bc7654f7d" 55 | "ECHO","","2efb470f-61f8-4046-b2ee-ebed27e2f310" 56 | "ESA/ESRIN","","956c8ad7-5e1f-4689-a620-2fa2cfdf6b62" 57 | "EU/EMSO","","390e49dc-8ab5-4e69-b16d-f25d26d520b5" 58 | "EUROBIS","","e391283c-257e-4569-b43a-437b8fcb2772" 59 | "FRANCE/CNES","","d4221a3f-66ef-43b7-9d4b-b311675f0622" 60 | "GERMANY/DLR","","5bf0c40e-c279-4f8e-bf14-24458a33beab" 61 | "GERMANY/EUMETSAT","","3197a75d-b6cf-430d-891b-b7fea05a0a2b" 62 | "GERMANY/GFZ","","b131a9e2-e994-41e2-9f61-afcf215f6c39" 63 | "GISTDA","","54db9be0-841b-4fd8-88ad-743f99964e0b" 64 | "GOMC/ESIP","","03718300-c4bf-43cb-a709-8af9863f0869" 65 | "GOMMP","","02f99b1f-5902-42db-a168-9babe290ef08" 66 | "GOMODP/ARC","","d609be88-89f1-4a70-999d-9b7f8fbeab75" 67 | "GOMOOS","","1209a4b6-aae5-4041-ac59-326252a3aace" 68 | "GOSIC/GAW","","25c40da1-4161-4bea-a49b-5635dc6c7f0c" 69 | "GOSIC/GCOS","","11a7ad28-cd0c-437d-a419-a1f0844de3e5" 70 | "GOSIC/GOOS","","ac04669c-e280-4dfc-8d26-a111cf78971c" 71 | "GOSIC/GTOS","","0228bd6f-7f35-452d-ba96-19df1f552284" 72 | "GOSIC","","3fc6d574-ee3a-4a82-af08-6b88a3ae7ead" 73 | "IMBER","","23bfb318-a393-48ae-90e4-82b3d40d7069" 74 | "IPY/NL","","bc7ebca0-7a56-48ba-900f-689ed6f994bf" 75 | "IPY","","8c29c974-d599-4a7a-a05e-769ae04e1a59" 76 | "ISRIC/WDC-Soils","","6cadc09f-68aa-4726-8a11-5af8f347b9d6" 77 | "ISRO/MOSDAC","","87917f21-cfa1-43d1-bea6-3029e16ea878" 78 | "ISRO/NOEDA","","ce899816-4263-4d38-9ed7-6da83dacaeb7" 79 | "ISRO/UOPS","","7dd28f93-f9df-4ce1-b812-f973f3c687aa" 80 | "ISRO","","8875a321-f769-4c6c-9f39-fb597a0e1678" 81 | "JAPAN/JAXA","","d7efff40-2309-4ba3-9a91-ff838e53ad2a" 82 | "JAPAN/JST","","56858bc0-679b-4cb1-acff-03d90f23e1c7" 83 | "KOBIS","","93c41a40-e1c9-43b1-bbf6-c71cf1e55e47" 84 | "MOHCP","","ac2f8729-77aa-4910-ae57-efbc9381ecdd" 85 | "MOHC","","bd50357e-76d9-4568-9a32-288138070d47" 86 | "MOP","","70797bb2-9117-4981-891b-7e6a2f79a7e5" 87 | "NETHERLANDS/NLR-NEONET","","f3a42a8c-2671-4f58-ab2d-68cefe9bd4dd" 88 | "NEW ZEALAND/ICAIR","","2a82a286-2ed1-4038-a4ee-30de38898f44" 89 | "OBIS/AADC","","c6504217-f251-4375-817e-1fcf6edc322f" 90 | "OBIS/AR","","1b054f0b-9521-46e0-8cd4-e8673bd52509" 91 | "OBIS/AU","","48b9719b-25de-413d-8299-2058c7a24730" 92 | "OBIS/BR","","bdc61022-422e-49f8-b5f9-313b7e037fab" 93 | "OBIS/CA","","9abc9ff6-51d1-4f14-a500-fde5cb917a28" 94 | "OBIS/CL","","66041ba2-e4b6-497d-b753-a5a80c8c3345" 95 | "OBIS/CN","","f1151da1-30af-4fc7-9783-51bdd6354c8b" 96 | "OBIS/COML","","b071cdf8-5501-4095-8afe-fbfd3a268797" 97 | "OBIS/CO","","cffbdff0-700c-4964-b9be-ee928dd29b05" 98 | "OBIS/IN","","36ed7ad1-bd3c-489d-82cf-14827276e1b0" 99 | "OBIS/JP","","92783493-aa3f-4bbd-b83d-557962a8932f" 100 | "OBIS/NZ","","a3762c96-8b6b-43bc-9627-b51e895cbca8" 101 | "OBIS/SCAR-MARBIN","","bd6e7ba3-7d6f-4655-9a05-7587f2d87bfe" 102 | "OBIS/SEAMAP","","8f0abfc6-c41f-4269-ad32-be8d8ae1c634" 103 | "OBIS/UA","","a7261961-aa0f-41f1-8756-6f375ccc5140" 104 | "OBIS/US","","b80f1a5d-3beb-4b9e-b7eb-c2037bba255b" 105 | "OBIS/ZA","","3c6dbb11-d46d-4c66-872e-4538c3322c04" 106 | "OBIS","","4683ea2b-dd4d-4d55-b862-7a3e608a8c99" 107 | "PANGAEA","","7cba95ac-1984-4f71-89ca-aa3932f35f8c" 108 | "PI-GCOS","","f5a440da-3c08-4f80-960f-03a06f7580d5" 109 | "PacIOOS","","0f2fa2cb-7a5d-4762-b6bc-78a19f98abff" 110 | "RUSSIA/RAS","","a129a240-aa2a-4f58-b15c-dcfab7208663" 111 | "RUSSIA/ROSCOSMOS","","512020f4-adac-42a2-a478-2c2242aa7243" 112 | "SLOAN","","03d77986-98aa-4ad4-9070-2b7a41eadb31" 113 | "SOOS","","51f5487c-098e-4387-b0f0-09eb9d55b0d5" 114 | "TW/NSPO","","42e54d0a-fa33-472b-9c78-1b5783bc315a" 115 | "UKRAINE/WDCGSD","","6f2cd8c2-c011-4c5c-aefb-0c3393984f72" 116 | "UNEP/GRID/ICIMOD","","b6ee7e42-f9f0-4a1f-b598-34495f6415c7" 117 | "UNEP/GRID","","16158f17-bd57-4b30-abdc-f213d3996b4e" 118 | "UN","","bae3773a-b62b-4ce6-b6a8-9d26aba92734" 119 | "USA/CIESIN","","c81db2d7-e55c-4109-8312-96b5bcaab96d" 120 | "USA/DOD","","5870c4a9-1729-440f-b220-b862eeb0d7f8" 121 | "USA/NASA","","9ae20472-ba4c-4b01-8d97-857b73fa3c95" 122 | "USA/NCAR","","0653ebf8-bd12-4bb3-b080-d45bb0b388db" 123 | "USA/NOAA","","3d117d54-4c0f-4630-8b46-a6ddb1ffc9b3" 124 | "USA/NSF","","8a4ce0a0-5655-4baa-b6da-3a24299e6852" 125 | "USA/USDA","","877527cc-51bd-4ea0-872c-d79731abfa84" 126 | "USA/USGS","","1b25904e-7ccd-4ef3-b733-2ef741163d54" 127 | "WGISS27","","c6af1a42-5c10-4eec-b787-b78c15fb4014" 128 | "vERSO","vERSO Project: Ecosystem Responses to Global Change: A Multi-scale Approach in the Southern Ocean","092d6914-19ac-4db8-bcf7-6d33461022c8" 129 | -------------------------------------------------------------------------------- /pyQuARC/schemas/rucontenttype.csv: -------------------------------------------------------------------------------- 1 | "Hits: 102","page_num: 1","page_size: 2000","Keyword Version: 14.3","Revision: 2022-08-26 10:35:58","Timestamp: 2022-09-15 13:11:25","Terms Of Use: https://cdn.earthdata.nasa.gov/conduit/upload/5182/KeywordsCommunityGuide_Baseline_v1_SIGNED_FINAL.pdf","The most up to date XML representations can be found here: https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/rucontenttype/?format=xml","Case native" 2 | URLContentType,Type,Subtype,UUID 3 | "CollectionURL","DATA SET LANDING PAGE","","8826912b-c89e-4810-b446-39b98b5d937c" 4 | "CollectionURL","EXTENDED METADATA","DMR++ MISSING DATA","4cc17021-b9cc-4b3f-a4f1-f05f7c1aeb2d" 5 | "CollectionURL","EXTENDED METADATA","DMR++","f02b0c6a-7fd9-473d-a1cb-a6482e8daa61" 6 | "CollectionURL","EXTENDED METADATA","","3c9d4493-22fd-48a8-9af5-bf0d16b7ede5" 7 | "CollectionURL","PROFESSIONAL HOME PAGE","","f00cf885-8fc5-42ca-a70e-1689530f00cf" 8 | "CollectionURL","PROJECT HOME PAGE","","6e72d128-7d28-4bd0-bac0-8c5ffd8b31f1" 9 | "CollectionURL","","","c7bbd6c7-8b0a-46ed-a428-a2f0453ed69e" 10 | "DataCenterURL","HOME PAGE","","05c685ab-8ce0-4b8a-8eba-b15fc6bbddfa" 11 | "DataCenterURL","","","b2df0d8e-d236-4fd2-a4f6-12951b3bb17a" 12 | "DataContactURL","HOME PAGE","","e5803df8-c802-4f3f-96f5-53e534835887" 13 | "DataContactURL","","","65373de8-3fb3-4882-a8ca-cfe23a4ff58e" 14 | "DistributionURL","DOWNLOAD SOFTWARE","MOBILE APP","5fedeefd-2609-488c-a897-fe168cae34dd" 15 | "DistributionURL","DOWNLOAD SOFTWARE","","ca8b62c9-5f31-40bd-92a9-8d30081309e2" 16 | "DistributionURL","GET CAPABILITIES","GIBS","ca5440d6-a9e4-416b-8e35-c4769f664b95" 17 | "DistributionURL","GET CAPABILITIES","OpenSearch","09c6e3ea-f8e0-4052-8b41-c3b1269799ed" 18 | "DistributionURL","GET CAPABILITIES","","2892b502-2c66-42d5-af3d-bcddb57d9195" 19 | "DistributionURL","GET DATA VIA DIRECT ACCESS","","172cd72d-30d3-4795-8660-dc38820faba0" 20 | "DistributionURL","GET DATA","APPEEARS","6b8f0bfc-d9a4-4af1-9d94-6dcfade03bda" 21 | "DistributionURL","GET DATA","CERES Ordering Tool","93bc7186-2634-49ae-a8da-312d893ef15e" 22 | "DistributionURL","GET DATA","DATA COLLECTION BUNDLE","444f03b4-e588-42da-aee6-73028f3c45be" 23 | "DistributionURL","GET DATA","DATA TREE","3c2a68a6-d8c2-4f14-8208-e57a4446ad71" 24 | "DistributionURL","GET DATA","DATACAST URL","2fc3797c-71b5-4d01-8ae1-d5634ec625ce" 25 | "DistributionURL","GET DATA","DIRECT DOWNLOAD","8e33a2dd-df13-4079-8636-391abb5344c6" 26 | "DistributionURL","GET DATA","EOSDIS DATA POOL","3779ec72-c1e0-4a0f-aff8-8e2a2a7af486" 27 | "DistributionURL","GET DATA","Earthdata Search","5b8013bb-0b15-4811-8aa3-bfc108c3a041" 28 | "DistributionURL","GET DATA","GIOVANNI","2e869d22-88fe-43dc-852d-9f50c911ad02" 29 | "DistributionURL","GET DATA","GoLIVE Portal","9bfb0f20-189e-411b-a678-768bf3fa256e" 30 | "DistributionURL","GET DATA","IceBridge Portal","3c60609c-d48d-47c4-b069-43951fa0aea3" 31 | "DistributionURL","GET DATA","LAADS","0b50c12d-a6ae-4d63-b42b-d99bf7aa2da0" 32 | "DistributionURL","GET DATA","LANCE","aa11ac15-3042-4634-b47e-acc368f608bd" 33 | "DistributionURL","GET DATA","MIRADOR","9b05d2a3-9a5a-425c-b6ed-59e0e56814fa" 34 | "DistributionURL","GET DATA","MLHub","7ff3e5a8-650a-4cd1-80db-cca0fd209a84" 35 | "DistributionURL","GET DATA","MODAPS","26431afd-cb37-4772-9e97-3a36f6dff32d" 36 | "DistributionURL","GET DATA","NOAA CLASS","a36f1716-a310-41f5-b4d8-6c6a5fc933d9" 37 | "DistributionURL","GET DATA","NOMADS","b434314f-949f-4c26-be57-2ea4c7f03643" 38 | "DistributionURL","GET DATA","Order","bd91340d-a8b3-4c01-b262-71e50fe69c83" 39 | "DistributionURL","GET DATA","PORTAL","49be0345-a6af-4608-98d8-9b2343e60077" 40 | "DistributionURL","GET DATA","Sub-Orbital Order Tool","459decfe-53ee-41ce-b608-d7578b04ef7b" 41 | "DistributionURL","GET DATA","Subscribe","38219044-ad26-4a32-98c0-dca8ad3cd29a" 42 | "DistributionURL","GET DATA","USGS EARTH EXPLORER","4485a5b6-d84c-4c98-980e-164863ca518f" 43 | "DistributionURL","GET DATA","VERTEX","5520d1de-f7f5-4798-9ebc-698885805489" 44 | "DistributionURL","GET DATA","VIRTUAL COLLECTION","78d28911-a87c-40a0-ada2-c14f7cfb0834" 45 | "DistributionURL","GET DATA","","750f6c61-0f15-4185-94d8-c029dec04bc5" 46 | "DistributionURL","GOTO WEB TOOL","HITIDE","a7225578-b398-4222-a7a0-8f5175338ddf" 47 | "DistributionURL","GOTO WEB TOOL","LIVE ACCESS SERVER (LAS)","20ab6d52-f5a7-439c-a044-6ef2452a2838" 48 | "DistributionURL","GOTO WEB TOOL","MAP VIEWER","c1c61697-b4bd-467c-9db4-5bd0115545a3" 49 | "DistributionURL","GOTO WEB TOOL","SIMPLE SUBSET WIZARD (SSW)","6ffc54ea-001a-4c03-afff-5086b2da8f59" 50 | "DistributionURL","GOTO WEB TOOL","SUBSETTER","bf37a20c-8e99-4187-b91b-3ea254f006f9" 51 | "DistributionURL","GOTO WEB TOOL","","ffccf1c0-f25d-4747-ac4a-f09444383031" 52 | "DistributionURL","USE SERVICE API","GRADS DATA SERVER (GDS)","5c0cd574-0255-4202-9b5b-3da8711b7ed7" 53 | "DistributionURL","USE SERVICE API","MAP SERVICE","0c3aa5c6-f1f9-4c16-aa96-30672028d26c" 54 | "DistributionURL","USE SERVICE API","OPENDAP DATA","eae7a041-b004-48df-8d4e-d758969e3185" 55 | "DistributionURL","USE SERVICE API","OpenSearch","89b80cbd-027f-4eab-823f-ae00c268f5bf" 56 | "DistributionURL","USE SERVICE API","SERVICE CHAINING","411d2781-822c-4c48-8d5b-4b51b100ce0a" 57 | "DistributionURL","USE SERVICE API","TABULAR DATA STREAM (TDS)","7b664934-70c4-4694-b8c1-416e7c91afb9" 58 | "DistributionURL","USE SERVICE API","THREDDS DATA","77cae7cb-4676-4c69-a88b-d78971496f97" 59 | "DistributionURL","USE SERVICE API","WEB COVERAGE SERVICE (WCS)","029540bb-7f5c-44ba-8578-61e2f858be60" 60 | "DistributionURL","USE SERVICE API","WEB FEATURE SERVICE (WFS)","c4d406e6-7a34-42aa-bd79-f7f9265cc7bd" 61 | "DistributionURL","USE SERVICE API","WEB MAP SERVICE (WMS)","b0e2089c-3c1d-4c12-b833-e07365a4038e" 62 | "DistributionURL","USE SERVICE API","WEB MAP TILE SERVICE (WMTS)","7aac9f91-20c4-4234-9153-e850c8ace8a9" 63 | "DistributionURL","USE SERVICE API","","d117cf5c-8d23-4662-be62-7b883cecb219" 64 | "DistributionURL","","","d25982b9-92e9-4ec0-ab44-48e79ecbe137" 65 | "PublicationURL","VIEW RELATED INFORMATION","ALGORITHM DOCUMENTATION","fcc9411c-a1c9-415d-a16c-75c42f2cec45" 66 | "PublicationURL","VIEW RELATED INFORMATION","ALGORITHM THEORETICAL BASIS DOCUMENT (ATBD)","fd01f7ec-fdf6-4440-b974-75f12fb4ec5f" 67 | "PublicationURL","VIEW RELATED INFORMATION","ANOMALIES","914cbb7e-5b20-4bcd-86e3-ffcfa26f0a73" 68 | "PublicationURL","VIEW RELATED INFORMATION","CASE STUDY","3112d474-b44f-4af1-8266-c3dd6d28220f" 69 | "PublicationURL","VIEW RELATED INFORMATION","DATA CITATION POLICY","40cf5001-15ec-4d9a-913c-bb323f2974fc" 70 | "PublicationURL","VIEW RELATED INFORMATION","DATA PRODUCT SPECIFICATION","415cfe86-4d71-4100-8f35-6404caec1c91" 71 | "PublicationURL","VIEW RELATED INFORMATION","DATA QUALITY","0eba3253-8eb7-4e43-9627-9cff48775e27" 72 | "PublicationURL","VIEW RELATED INFORMATION","DATA RECIPE","547600e9-b60a-44eb-b14b-5c6e1f2c094e" 73 | "PublicationURL","VIEW RELATED INFORMATION","DELIVERABLES CHECKLIST","be0460d8-ca8e-45c8-b637-8fb4ce5a5e97" 74 | "PublicationURL","VIEW RELATED INFORMATION","GENERAL DOCUMENTATION","aebf20eb-39c7-4f4f-aecf-a628f703867b" 75 | "PublicationURL","VIEW RELATED INFORMATION","HOW-TO","7ebd73e5-b0aa-4cf2-ace5-1d3890c2c3ce" 76 | "PublicationURL","VIEW RELATED INFORMATION","IMPORTANT NOTICE","2af2cfc4-9390-43da-8fa8-1f272e8ee0b0" 77 | "PublicationURL","VIEW RELATED INFORMATION","INSTRUMENT/SENSOR CALIBRATION DOCUMENTATION","fc3c1abb-92c1-49c2-90d4-161c70cff44a" 78 | "PublicationURL","VIEW RELATED INFORMATION","MICRO ARTICLE","4f3c0b04-1fe6-4e11-994a-9cc4afd09ce0" 79 | "PublicationURL","VIEW RELATED INFORMATION","PI DOCUMENTATION","367f8b8a-e57e-4c49-b971-0b5c6a484186" 80 | "PublicationURL","VIEW RELATED INFORMATION","PROCESSING HISTORY","7cfa5214-7f69-4355-b259-286be88f25d1" 81 | "PublicationURL","VIEW RELATED INFORMATION","PRODUCT HISTORY","b292f51f-d2b4-4e65-84a9-e50306238989" 82 | "PublicationURL","VIEW RELATED INFORMATION","PRODUCT QUALITY ASSESSMENT","b7ed88ce-3f04-40ea-863e-ac58bd048ff3" 83 | "PublicationURL","VIEW RELATED INFORMATION","PRODUCT USAGE","1132a0fc-888b-4332-ad0a-dc5c6e615afa" 84 | "PublicationURL","VIEW RELATED INFORMATION","PRODUCTION HISTORY","0b597285-eaac-4cbd-94cc-d87ae8046681" 85 | "PublicationURL","VIEW RELATED INFORMATION","PUBLICATIONS","13a4deec-bd22-4864-9804-77fac181f484" 86 | "PublicationURL","VIEW RELATED INFORMATION","READ-ME","aa3cea98-b20a-4de8-8f22-7a8b30784625" 87 | "PublicationURL","VIEW RELATED INFORMATION","REQUIREMENTS AND DESIGN","86b8b121-d710-4c5b-84b0-7b40717f6c76" 88 | "PublicationURL","VIEW RELATED INFORMATION","SCIENCE DATA PRODUCT SOFTWARE DOCUMENTATION","e8e6e972-832f-4501-a721-4108f33332d6" 89 | "PublicationURL","VIEW RELATED INFORMATION","SCIENCE DATA PRODUCT VALIDATION","15b0a4c4-b39d-48f5-92d2-905e45e6dc6a" 90 | "PublicationURL","VIEW RELATED INFORMATION","USER FEEDBACK PAGE","ab2fce71-e5f9-4ba6-bfb1-bc428a8b7dd8" 91 | "PublicationURL","VIEW RELATED INFORMATION","USER'S GUIDE","d1996d91-e824-4b24-b94e-3aae4543b63b" 92 | "PublicationURL","VIEW RELATED INFORMATION","","5ec1bb9d-0efc-4099-9b31-ec791bbd8145" 93 | "PublicationURL","","","894edd57-afb3-4bb3-878f-fc245d8b6e82" 94 | "VisualizationURL","Color Map","GITC","87117fb4-888c-41b9-a795-d13d436d828b" 95 | "VisualizationURL","Color Map","Giovanni","197d7881-a01b-4892-822f-94ca72aea2f4" 96 | "VisualizationURL","Color Map","Harmony GDAL","503206c2-c5ae-4d65-8c18-be8d06370c0c" 97 | "VisualizationURL","Color Map","","58848eb9-9c2c-491e-847e-5a4f3d9f6889" 98 | "VisualizationURL","GET RELATED VISUALIZATION","GIOVANNI","690210ef-4cf8-4645-b68d-921466bba6a2" 99 | "VisualizationURL","GET RELATED VISUALIZATION","MAP","e6f9524a-e4bc-460a-bdf3-a5e8f0e921a9" 100 | "VisualizationURL","GET RELATED VISUALIZATION","SOTO","389ab1cf-fbf4-49ee-bf22-e40643fa00f6" 101 | "VisualizationURL","GET RELATED VISUALIZATION","WORLDVIEW","eeff646c-6faf-468e-a0ab-ff78fc6f86f9" 102 | "VisualizationURL","GET RELATED VISUALIZATION","","dd2adc64-c7bd-4dbf-976b-f0496966817c" 103 | "VisualizationURL","","","731f4e5c-d200-4c56-9daa-e6fad17415ef" 104 | -------------------------------------------------------------------------------- /pyQuARC/schemas/rules_override.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /pyQuARC/schemas/temporalresolutionrange.csv: -------------------------------------------------------------------------------- 1 | "Hits: 20","page_num: 1","page_size: 2000","Keyword Version: 14.3","Revision: 2022-08-26 10:35:31","Timestamp: 2022-09-15 14:44:45","Terms Of Use: https://cdn.earthdata.nasa.gov/conduit/upload/5182/KeywordsCommunityGuide_Baseline_v1_SIGNED_FINAL.pdf","The most up to date XML representations can be found here: https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/temporalresolutionrange/?format=xml","Case native" 2 | Temporal_Resolution_Range,UUID 3 | "1 minute - < 1 hour","bca20202-2b06-4657-a425-5b0e416bce0c" 4 | "1 second - < 1 minute","48ff676f-836c-4cff-bc88-4c4cc06b2e1b" 5 | "< 1 second","42a2f639-d1c3-4e82-a8b8-63f0f4a60ac6" 6 | "Annual Climatology","af931dca-9a7d-4ba9-b40f-2a21e31f2d5b" 7 | "Annual","40e09855-fb48-4a7d-9851-d6e809e6c309" 8 | "Climate Normal (30-year climatology)","f308a8db-40ea-4932-a58c-fb0a093959dc" 9 | "Daily - < Weekly","1ac968ef-a90a-4ffc-adbf-ea0c0d69a7f9" 10 | "Daily Climatology","f86e464a-cf9d-4e15-a39b-501855d1dc5a" 11 | "Decadal","3d97e993-dc6a-41ff-8a49-3e837c1fc2b1" 12 | "Diurnal","99ef187e-6940-4c10-8d65-00d4426d493b" 13 | "Hourly - < Daily","31765761-b153-478a-92b3-1088997fd74b" 14 | "Hourly Climatology","027dee16-b361-481e-868d-add966eb5b71" 15 | "Monthly - < Annual","8900c323-8789-4403-91e9-c399de369935" 16 | "Monthly Climatology","8c8c70b1-f6c5-4f34-89b5-510049b8c8ab" 17 | "Pentad Climatology","e0040d4b-e398-4b65-bd42-d39434b5cc95" 18 | "Seasonal","7c5420a6-94e2-40ca-9dff-20309090d327" 19 | "Subannual","7afdb8ba-a504-45b6-b301-730e3c69d23a" 20 | "Weekly - < Monthly","7b2a303c-3cb7-4961-9851-650548964674" 21 | "Weekly Climatology","2de882f0-d84a-471e-8fb5-9f8a1c7913c1" 22 | -------------------------------------------------------------------------------- /pyQuARC/schemas/version.txt: -------------------------------------------------------------------------------- 1 | 2023-04-24 -------------------------------------------------------------------------------- /pyQuARC/schemas/verticalresolutionrange.csv: -------------------------------------------------------------------------------- 1 | "Hits: 8","page_num: 1","page_size: 2000","Keyword Version: 14.3","Revision: 2022-08-26 10:35:31","Timestamp: 2022-09-15 16:21:37","Terms Of Use: https://cdn.earthdata.nasa.gov/conduit/upload/5182/KeywordsCommunityGuide_Baseline_v1_SIGNED_FINAL.pdf","The most up to date XML representations can be found here: https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/verticalresolutionrange/?format=xml","Case native" 2 | Vertical_Resolution_Range,UUID 3 | "1 meter - < 10 meters","201337ea-fa14-4e58-a538-e92c5ff734a4" 4 | "10 meters - < 30 meters","20505a5b-4df8-4430-83a3-ad7b212c9bfc" 5 | "100 meters - < 1 km","eccf8700-c503-46a3-b6f7-86cf7e48465a" 6 | "30 meters - < 100 meters","a66aa809-5320-408d-9cbe-86ed7940b8ec" 7 | "< 1 meter","cf1f085e-4948-4874-9640-e236fff7bc8d" 8 | "> 1 km","3f0aa4fc-802c-4804-9f9f-8b666f3a2776" 9 | "Point Resolution","0893353d-4e8c-4b31-bcc5-fce552ccfff3" 10 | -------------------------------------------------------------------------------- /pyQuARC/version.txt: -------------------------------------------------------------------------------- 1 | 1.2.8 2 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | colorama==0.4.4 2 | idna==2.10 3 | jsonschema==4.17.3 4 | lxml==4.9.1 5 | pytest==5.4.3 6 | pytz==2020.1 7 | requests==2.24.0 8 | setuptools==60.8.2 9 | strict-rfc3339==0.7 10 | tqdm==4.48.2 11 | urlextract==1.3.0 12 | xmltodict==0.12.0 13 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | from distutils.util import convert_path 3 | 4 | version_path = convert_path("pyQuARC/version.txt") 5 | with open(version_path) as version_file: 6 | __version__ = version_file.read().strip() 7 | 8 | with open("README.md", "r", encoding="utf-8") as readme_file: 9 | long_description = readme_file.read() 10 | 11 | 12 | with open("requirements.txt", "r", encoding="utf-8") as requirement_file: 13 | requirements = requirement_file.readlines() 14 | 15 | 16 | setuptools.setup( 17 | name="pyQuARC", 18 | version=__version__, 19 | author="NASA IMPACT", 20 | author_email="teamimpact@uah.edu", 21 | description="The pyQuARC tool is an open source library for Earth Observation Metadata quality and assessment. The pyQuARC tool reads and evaluates metadata records with a focus on the consistency and robustness of the metadata. pyQuARC flags opportunities to improve or add to contextual metadata information in order to help the user connect to relevant data products. pyQuARC also ensures that information common to both the data product and the file-level metadata are consistent and compatible. pyQuARC frees up human evaluators to make more sophisticated assessments such as whether an abstract accurately describes the data and provides the correct contextual information. The base pyQuARC package assesses descriptive metadata used to catalog Earth observation data products and files. As open source software, pyQuARC can be adapted and customized by data providers to allow for quality checks that evolve with their needs, including checking metadata not covered in base package.", 22 | long_description=long_description, 23 | long_description_content_type="text/markdown", 24 | url="https://github.com/NASA-IMPACT/pyQuARC", 25 | packages=setuptools.find_packages(), 26 | classifiers=[ 27 | "Programming Language :: Python :: 3.8", 28 | "License :: OSI Approved :: Apache License, Version 2.0", 29 | "Operating System :: OS Independent", 30 | ], 31 | keywords="validation metadata cmr quality", 32 | python_requires=">=3.8", 33 | install_requires=requirements, 34 | package_data={"pyQuARC": ["schemas/*", "*.txt"], "tests": ["fixtures/*"]}, 35 | include_package_data=True, 36 | ) 37 | -------------------------------------------------------------------------------- /tests/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NASA-IMPACT/pyQuARC/85ccd62b8b3714f2721fcc67d9a29199c9fbcbb4/tests/.DS_Store -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NASA-IMPACT/pyQuARC/85ccd62b8b3714f2721fcc67d9a29199c9fbcbb4/tests/__init__.py -------------------------------------------------------------------------------- /tests/common.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from .fixtures.common import DUMMY_METADATA_FILE_PATH 4 | 5 | 6 | def read_test_metadata(): 7 | with open(os.path.join(os.getcwd(), DUMMY_METADATA_FILE_PATH), "r") as content_file: 8 | return content_file.read().encode() 9 | -------------------------------------------------------------------------------- /tests/fixtures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NASA-IMPACT/pyQuARC/85ccd62b8b3714f2721fcc67d9a29199c9fbcbb4/tests/fixtures/__init__.py -------------------------------------------------------------------------------- /tests/fixtures/bad_syntax_metadata.echo-c: -------------------------------------------------------------------------------- 1 | 2 | ACOS_L2S 3 | 4 | 2016 5 | 2016-04-14T00:00:00.000Z 6 | Not provided 7 | ACOS GOSAT/TANSO-FTS Level 2 Full Physics Standard Product V7.3 (ACOS_L2S) at GES DISC 8 | Version 7.3 is the current version of the data set. Version 3.5 is no longer available and has been superseded by Version 7.3. 9 | 10 | This data set is currently provided by the OCO (Orbiting Carbon Observatory) Project. In expectation of the OCO-2 launch, the algorithm was developed by the Atmospheric CO2 Observations from Space (ACOS) Task as a preparatory project, using GOSAT TANSO-FTS spectra. After the OCO-2 launch, "ACOS" data are still produced and improved, using approaches applied to the OCO-2 spectra. 11 | 12 | The "ACOS" data set contains Carbon Dioxide (CO2) column averaged dry air mole fraction for all soundings for which retrieval was attempted. These are the highest-level products made available by the OCO Project, using TANSO-FTS spectral radiances, and algorithm build version 7.3. 13 | 14 | The GOSAT team at JAXA produces GOSAT TANSO-FTS Level 1B (L1B) data products for internal use and for distribution to collaborative partners, such as ESA and NASA. These calibrated products are augmented by the OCO Project with additional geolocation information and further corrections. Thus produced Level 1B products (with calibrated radiances and geolocation) are the input to the "ACOS" Level 2 production process. 15 | 16 | Even though the GES DISC is not publicly distributing Level 1B ACOS products, it should be known that changes in this version are affecting both Level 1B and Level 2 data. An important enhancement in Level1B will address the degradation in the number of quality-passed soundings. 17 | 18 | Elimination of many systematic biases, and better agreement with TCCON (Total Carbon Column Observing Network), is expected in Level 2 retrievals. The key changes to the L2 algorithm include scaling the O2-A band spectroscopy (reducing XCO2 bias by 4 or 5 ppm); using interpolation with the instrument lineshape [ ILS ] (reducing XCO2 bias by 1.5 ppm); and fitting a zero level offset to the A-band. Users have to also carefully familiarize themselves with the disclaimer in the new documentation. 19 | 20 | An important element to note are the updates on data screening. Although a Master Quality Flag is provided in the data product, further analysis of a larger set of data has allowed the science team to provide an updated set of screening criteria. These are listed in the data user's guide, and are recommended instead of the Master Quality Flag. 21 | 22 | 23 | Lastly, users should continue to carefully observe and weigh information from three important flags: 24 | 25 | "warn_level" - Provides a value that summarizes each sounding's acceptability to a larger set of quality filters. A high warn level predicts that the sounding would fail most data filters applied to it. A low warn level suggests that the sounding would pass most quality filters that might be applied. 26 | 27 | "sounding_qual_flag" - quality of input data provided to the retrieval processing 28 | 29 | "outcome_flag" - retrieval quality based upon certain internal thresholds (not thoroughly evaluated) 30 | 31 | "master_quality_flag" - four possible values: "Good", "Caution" and "Bad", and "Failed", as determined from other flags in the L2 productThe short name for this data type is ACOS_L2S. 32 | 2019-11-21T14:37:19.000Z 33 | 2 34 | NASA/GSFC/SED/ESD/GCDC/GESDISC 35 | Not provided 36 | COMPLETE 37 | This product have full public access 38 | HDF5 39 | 40 | GLOBAL 41 | 42 | 43 | false 44 | 45 | 2009-04-20T00:00:00.000Z 46 | 47 | 48 | 49 | 50 | ARCHIVER 51 | NASA/GSFC/SED/ESD/GCDC/GESDISC 52 | 53 | 54 | TECHNICAL CONTACT 55 | 56 | 57 | ANDREY 58 | SAVTCHENKO 59 | METADATA AUTHOR 60 | 61 | 62 | 63 | 64 | 65 | 66 | EARTH SCIENCE 67 | ATMOSPHERE 68 | ATMOSPHERIC CHEMISTRY 69 | 70 | CARBON AND HYDROCARBON COMPOUNDS 71 | 72 | CARBON DIOXIDE 73 | 74 | 75 | 76 | 77 | 78 | 79 | GOSAT 80 | Greenhouse Gases Observing Satellite 81 | Earth Observation Satellites 82 | 83 | 84 | TANSO-FTS 85 | Thermal And Near Infrared Sensor For Carbon Observation 86 | 87 | 88 | 89 | 90 | 91 | 92 | OCO 93 | Orbiting Carbon Observatory 94 | 95 | 96 | 97 | 98 | https://oco2.gesdisc.eosdis.nasa.gov/data/GOSAT_TANSO_Level2/ACOS_L2S.7.3/ 99 | Access the data via HTTP. 100 | 101 | 102 | https://search.earthdata.nasa.gov/search?q=ACOS_L2S+7.3 103 | Use the Earthdata Search to find and retrieve data sets across multiple data centers. 104 | 105 | 106 | 107 | 108 | https://disc.gsfc.nasa.gov/datacollection/ACOS_L2S_7.3.html 109 | Access the dataset landing page from the GES DISC website. 110 | CollectionURL : DATA SET LANDING PAGE 111 | 112 | 113 | https://oco2.gesdisc.eosdis.nasa.gov/opendap/ACOS_L2S.7.3/ 114 | Access the data via the OPeNDAP protocol. 115 | USE SERVICE API : OPENDAP DATA 116 | 117 | 118 | http://www.gosat.nies.go.jp/index_e.html 119 | GOSAT site 120 | PublicationURL : VIEW RELATED INFORMATION 121 | 122 | 123 | https://docserver.gesdisc.eosdis.nasa.gov/public/project/OCO/ACOS_v7.3_DataUsersGuide-RevF.pdf 124 | User's Guide 125 | VIEW RELATED INFORMATION : GENERAL DOCUMENTATION 126 | 127 | 128 | https://oco.jpl.nasa.gov/publications/ 129 | Publications from the Science Team 130 | VIEW RELATED INFORMATION : GENERAL DOCUMENTATION 131 | 132 | 133 | https://docserver.gesdisc.eosdis.nasa.gov/public/project/OCO/KnownDataIssues.ACOS.html 134 | ACOS Data Gaps 135 | VIEW RELATED INFORMATION : GENERAL DOCUMENTATION 136 | 137 | 138 | http://www.gosat.nies.go.jp/en/about_%EF%BC%92_observe.html 139 | Instrument Description 140 | VIEW RELATED INFORMATION : GENERAL DOCUMENTATION 141 | 142 | 143 | 144 | 145 | 146 | GEODETIC 147 | 148 | -180 149 | 90 150 | 180 151 | -90 152 | 153 | 154 | 155 | GEODETIC 156 | 157 | 158 | 159 | https://docserver.gesdisc.eosdis.nasa.gov/public/project/OCO/ACOS.L2S.2015_v7.3.png 160 | Monthly maps of the ACOS v7.3 XCO2 data. Each data point contains the average 161 | value for XCO2 estimates in a 2° x 2° bin for that month that passed all pre- and post-screening 162 | filters; the recommend bias correction has been applied. Reproduced from ACOS v7.3 Data Users Guide. 163 | 164 | 165 | 166 | -------------------------------------------------------------------------------- /tests/fixtures/checker.py: -------------------------------------------------------------------------------- 1 | DUMMY_METADATA_CONTENT = { 2 | "Contacts": { 3 | "Contact": [ 4 | { 5 | "Role": "ARCHIVER", 6 | "OrganizationName": "NASA/GSFC/SED/ESD/GCDC/GESDISC", 7 | "ContactPersons": { 8 | "ContactPerson": { 9 | "FirstName": "SLESA", 10 | "LastName": "OSTRENGA", 11 | "JobPosition": "METADATA AUTHOR", 12 | } 13 | }, 14 | }, 15 | { 16 | "Role": "TECHNICAL CONTACT", 17 | "ContactPersons": { 18 | "ContactPerson": [ 19 | { 20 | "FirstName": "DANA", 21 | "LastName": "OSTRENGA", 22 | "JobPosition": "METADATA AUTHOR", 23 | }, 24 | { 25 | "FirstName": "MICHAEL", 26 | "LastName": "BOSILOVICH", 27 | "JobPosition": "INVESTIGATOR", 28 | }, 29 | { 30 | "blabla": "BOSILOVICH", 31 | }, 32 | ] 33 | }, 34 | }, 35 | ] 36 | }, 37 | } 38 | 39 | 40 | FUNCTION_MAPPING = { 41 | "input": [ 42 | {"datatype": "datetime", "function": "iso_format_check"}, 43 | {"datatype": "datetime", "function": "format_check"}, 44 | ], 45 | "output": [True, False], 46 | } 47 | -------------------------------------------------------------------------------- /tests/fixtures/common.py: -------------------------------------------------------------------------------- 1 | DUMMY_METADATA_FILE_PATH = "tests/fixtures/test_cmr_metadata.echo-c" 2 | -------------------------------------------------------------------------------- /tests/fixtures/custom_checker.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | 4 | INPUT_OUTPUT = { 5 | "get_path_value": { 6 | "input": [ 7 | "Collection/ShortName", 8 | "Collection/DataSetId", 9 | "Collection/Contacts/Contact/Role", 10 | "Collection/Platforms/Platform/Instruments/Instrument", 11 | ], 12 | "output": [ 13 | ["ACOS_L2S"], 14 | [ 15 | "ACOS GOSAT/TANSO-FTS Level 2 Full Physics Standard Product V7.3 (ACOS_L2S) at GES DISC" 16 | ], 17 | ["ARCHIVER", "TECHNICAL CONTACT"], 18 | [ 19 | OrderedDict( 20 | [ 21 | ("ShortName", "TANSO-FTS"), 22 | ( 23 | "LongName", 24 | "Thermal And Near Infrared Sensor For Carbon Observation", 25 | ), 26 | ] 27 | ) 28 | ], 29 | ], 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /tests/fixtures/downloader.py: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /tests/fixtures/no_error_metadata.echo-c: -------------------------------------------------------------------------------- 1 | 2 | ACOS_L2S 3 | 4 | 2016 5 | 2016-04-14T00:00:00.000Z 6 | Not provided 7 | ACOS GOSAT/TANSO-FTS Level 2 Full Physics Standard Product V7.3 (ACOS_L2S) at GES DISC 8 | Version 7.3 is the current version of the data set. Version 3.5 is no longer available and has been superseded by Version 7.3. 9 | 10 | This data set is currently provided by the OCO (Orbiting Carbon Observatory) Project. In expectation of the OCO-2 launch, the algorithm was developed by the Atmospheric CO2 Observations from Space (ACOS) Task as a preparatory project, using GOSAT TANSO-FTS spectra. After the OCO-2 launch, "ACOS" data are still produced and improved, using approaches applied to the OCO-2 spectra. 11 | 12 | The "ACOS" data set contains Carbon Dioxide (CO2) column averaged dry air mole fraction for all soundings for which retrieval was attempted. These are the highest-level products made available by the OCO Project, using TANSO-FTS spectral radiances, and algorithm build version 7.3. 13 | 14 | The GOSAT team at JAXA produces GOSAT TANSO-FTS Level 1B (L1B) data products for internal use and for distribution to collaborative partners, such as ESA and NASA. These calibrated products are augmented by the OCO Project with additional geolocation information and further corrections. Thus produced Level 1B products (with calibrated radiances and geolocation) are the input to the "ACOS" Level 2 production process. 15 | 16 | Even though the GES DISC is not publicly distributing Level 1B ACOS products, it should be known that changes in this version are affecting both Level 1B and Level 2 data. An important enhancement in Level1B will address the degradation in the number of quality-passed soundings. 17 | 18 | Elimination of many systematic biases, and better agreement with TCCON (Total Carbon Column Observing Network), is expected in Level 2 retrievals. The key changes to the L2 algorithm include scaling the O2-A band spectroscopy (reducing XCO2 bias by 4 or 5 ppm); using interpolation with the instrument lineshape [ ILS ] (reducing XCO2 bias by 1.5 ppm); and fitting a zero level offset to the A-band. Users have to also carefully familiarize themselves with the disclaimer in the new documentation. 19 | 20 | An important element to note are the updates on data screening. Although a Master Quality Flag is provided in the data product, further analysis of a larger set of data has allowed the science team to provide an updated set of screening criteria. These are listed in the data user's guide, and are recommended instead of the Master Quality Flag. 21 | 22 | 23 | Lastly, users should continue to carefully observe and weigh information from three important flags: 24 | 25 | "warn_level" - Provides a value that summarizes each sounding's acceptability to a larger set of quality filters. A high warn level predicts that the sounding would fail most data filters applied to it. A low warn level suggests that the sounding would pass most quality filters that might be applied. 26 | 27 | "sounding_qual_flag" - quality of input data provided to the retrieval processing 28 | 29 | "outcome_flag" - retrieval quality based upon certain internal thresholds (not thoroughly evaluated) 30 | 31 | "master_quality_flag" - four possible values: "Good", "Caution" and "Bad", and "Failed", as determined from other flags in the L2 productThe short name for this data type is ACOS_L2S. 32 | 2019-11-21T14:37:19.000Z 33 | 2 34 | NASA/GSFC/SED/ESD/GCDC/GESDISC 35 | Not provided 36 | COMPLETE 37 | This product have full public access 38 | HDF5 39 | 40 | GLOBAL 41 | 42 | 43 | false 44 | 45 | 2009-04-20T00:00:00.000Z 46 | 47 | 48 | 49 | 50 | ARCHIVER 51 | NASA/GSFC/SED/ESD/GCDC/GESDISC 52 | 53 | 54 | TECHNICAL CONTACT 55 | 56 | 57 | ANDREY 58 | SAVTCHENKO 59 | METADATA AUTHOR 60 | 61 | 62 | 63 | 64 | 65 | 66 | EARTH SCIENCE 67 | ATMOSPHERE 68 | ATMOSPHERIC CHEMISTRY 69 | 70 | CARBON AND HYDROCARBON COMPOUNDS 71 | 72 | CARBON DIOXIDE 73 | 74 | 75 | 76 | 77 | 78 | 79 | GOSAT 80 | Greenhouse Gases Observing Satellite 81 | Earth Observation Satellites 82 | 83 | 84 | TANSO-FTS 85 | Thermal And Near Infrared Sensor For Carbon Observation 86 | 87 | 88 | 89 | 90 | 91 | 92 | OCO 93 | Orbiting Carbon Observatory 94 | 95 | 96 | 97 | 98 | https://oco2.gesdisc.eosdis.nasa.gov/data/GOSAT_TANSO_Level2/ACOS_L2S.7.3/ 99 | Access the data via HTTP. 100 | 101 | 102 | https://search.earthdata.nasa.gov/search?q=ACOS_L2S+7.3 103 | Use the Earthdata Search to find and retrieve data sets across multiple data centers. 104 | 105 | 106 | 107 | 108 | https://disc.gsfc.nasa.gov/datacollection/ACOS_L2S_7.3.html 109 | Access the dataset landing page from the GES DISC website. 110 | CollectionURL : DATA SET LANDING PAGE 111 | 112 | 113 | https://oco2.gesdisc.eosdis.nasa.gov/opendap/ACOS_L2S.7.3/ 114 | Access the data via the OPeNDAP protocol. 115 | USE SERVICE API : OPENDAP DATA 116 | 117 | 118 | http://www.gosat.nies.go.jp/index_e.html 119 | GOSAT site 120 | PublicationURL : VIEW RELATED INFORMATION 121 | 122 | 123 | https://docserver.gesdisc.eosdis.nasa.gov/public/project/OCO/ACOS_v7.3_DataUsersGuide-RevF.pdf 124 | User's Guide 125 | VIEW RELATED INFORMATION : GENERAL DOCUMENTATION 126 | 127 | 128 | https://oco.jpl.nasa.gov/publications/ 129 | Publications from the Science Team 130 | VIEW RELATED INFORMATION : GENERAL DOCUMENTATION 131 | 132 | 133 | https://docserver.gesdisc.eosdis.nasa.gov/public/project/OCO/KnownDataIssues.ACOS.html 134 | ACOS Data Gaps 135 | VIEW RELATED INFORMATION : GENERAL DOCUMENTATION 136 | 137 | 138 | http://www.gosat.nies.go.jp/en/about_%EF%BC%92_observe.html 139 | Instrument Description 140 | VIEW RELATED INFORMATION : GENERAL DOCUMENTATION 141 | 142 | 143 | 144 | 145 | 146 | GEODETIC 147 | 148 | -180 149 | 90 150 | 180 151 | -90 152 | 153 | 154 | 155 | GEODETIC 156 | 157 | 158 | 159 | https://docserver.gesdisc.eosdis.nasa.gov/public/project/OCO/ACOS.L2S.2015_v7.3.png 160 | Monthly maps of the ACOS v7.3 XCO2 data. Each data point contains the average 161 | value for XCO2 estimates in a 2° x 2° bin for that month that passed all pre- and post-screening 162 | filters; the recommend bias correction has been applied. Reproduced from ACOS v7.3 Data Users Guide. 163 | 164 | 165 | 166 | -------------------------------------------------------------------------------- /tests/fixtures/test_cmr_metadata.echo-g: -------------------------------------------------------------------------------- 1 | 2 | SC:ATL08.005:241695844 3 | 2022-04-15 4 | 2022-04-15T10:27:27.492Z 5 | 6 | ATLAS/ICESat-2 L3A Land and Vegetation Height V005 7 | 8 | 9 | 44.2424182892 10 | ATL08_20220210222256_07731412_005_01.h5 11 | UNSPECIFIED 12 | 2022-04-06T02:30:43.000Z 13 | 14 | 15 | 16 | 2022-02-10T22:22:59.217Z 17 | 2022-02-10T22:26:32.279Z 18 | 19 | 20 | 21 | 22 | 23 | 125.75586345146665 24 | -79 25 | A 26 | -50 27 | A 28 | 29 | 30 | 31 | 32 | 33 | 19005 34 | 125.75586345146665 35 | 2022-02-10T21:09:27.619Z 36 | 37 | 38 | 39 | 40 | https://n5eil01u.ecs.nsidc.org/DP7/ATLAS/ATL08.005/2022.02.10/ATL08_20220210222256_07731412_005_01.h5 41 | application/x-hdfeos 42 | 43 | 44 | 45 | 46 | https://n5eil01u.ecs.nsidc.org/DP7/ATLAS/ATL08.005/2022.02.10/ATL08_20220210222256_07731412_005_01.iso.xml 47 | USER SUPPORT 48 | text/xml 49 | 50 | 51 | 52 | 53 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.default.default1.jpg 54 | image/jpeg 55 | 56 | 57 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.default.default2.jpg 58 | image/jpeg 59 | 60 | 61 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt1l.groundtrack.jpg 62 | image/jpeg 63 | 64 | 65 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt1l.h_canopy_abs.jpg 66 | image/jpeg 67 | 68 | 69 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt1l.h_te_median.jpg 70 | image/jpeg 71 | 72 | 73 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt1l.n_ca_photons.jpg 74 | image/jpeg 75 | 76 | 77 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt1l.n_te_photons.jpg 78 | image/jpeg 79 | 80 | 81 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt1r.groundtrack.jpg 82 | image/jpeg 83 | 84 | 85 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt1r.h_canopy_abs.jpg 86 | image/jpeg 87 | 88 | 89 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt1r.h_te_median.jpg 90 | image/jpeg 91 | 92 | 93 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt1r.n_ca_photons.jpg 94 | image/jpeg 95 | 96 | 97 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt1r.n_te_photons.jpg 98 | image/jpeg 99 | 100 | 101 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt2l.groundtrack.jpg 102 | image/jpeg 103 | 104 | 105 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt2l.h_canopy_abs.jpg 106 | image/jpeg 107 | 108 | 109 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt2l.h_te_median.jpg 110 | image/jpeg 111 | 112 | 113 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt2l.n_ca_photons.jpg 114 | image/jpeg 115 | 116 | 117 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt2l.n_te_photons.jpg 118 | image/jpeg 119 | 120 | 121 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt2r.groundtrack.jpg 122 | image/jpeg 123 | 124 | 125 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt2r.h_canopy_abs.jpg 126 | image/jpeg 127 | 128 | 129 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt2r.h_te_median.jpg 130 | image/jpeg 131 | 132 | 133 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt2r.n_ca_photons.jpg 134 | image/jpeg 135 | 136 | 137 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt2r.n_te_photons.jpg 138 | image/jpeg 139 | 140 | 141 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt3l.groundtrack.jpg 142 | image/jpeg 143 | 144 | 145 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt3l.h_canopy_abs.jpg 146 | image/jpeg 147 | 148 | 149 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt3l.h_te_median.jpg 150 | image/jpeg 151 | 152 | 153 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt3l.n_ca_photons.jpg 154 | image/jpeg 155 | 156 | 157 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt3l.n_te_photons.jpg 158 | image/jpeg 159 | 160 | 161 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt3r.groundtrack.jpg 162 | image/jpeg 163 | 164 | 165 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt3r.h_canopy_abs.jpg 166 | image/jpeg 167 | 168 | 169 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt3r.h_te_median.jpg 170 | image/jpeg 171 | 172 | 173 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt3r.n_ca_photons.jpg 174 | image/jpeg 175 | 176 | 177 | https://n5eil01u.ecs.nsidc.org/DP0/BRWS/Browse.001/2022.04.15/ATL08_20220210222256_07731412_005_01_BRW.gt3r.n_te_photons.jpg 178 | image/jpeg 179 | 180 | 181 | 182 | -------------------------------------------------------------------------------- /tests/fixtures/test_cmr_metadata.umm-g: -------------------------------------------------------------------------------- 1 | { 2 | "GranuleUR": "Unique_Granule_UR_v1.6", 3 | "ProviderDates": [{ 4 | "Date": "2018-07-19T00:00:00Z", 5 | "Type": "Create" 6 | }, { 7 | "Date": "2018-08-19T01:00:00Z", 8 | "Type": "Insert" 9 | }, { 10 | "Date": "2018-09-19T02:00:00Z", 11 | "Type": "Update" 12 | }, { 13 | "Date": "2010-08-19T03:00:00Z", 14 | "Type": "Delete" 15 | }], 16 | "CollectionReference": { 17 | "ShortName": "CollectionShortName", 18 | "Version": "1.6" 19 | }, 20 | "AccessConstraints": { 21 | "Description" : "Public Access", 22 | "Value": 42 23 | }, 24 | "DataGranule": { 25 | "ArchiveAndDistributionInformation": [{ 26 | "Name": "GranuleZipFile", 27 | "SizeInBytes": 23000, 28 | "Size": 23, 29 | "SizeUnit": "KB", 30 | "Format": "ZIP", 31 | "MimeType": "application/zip", 32 | "Checksum": { 33 | "Value": "E51569BF48DD0FD0640C6503A46D4753", 34 | "Algorithm": "MD5" 35 | }, 36 | "Files": [{ 37 | "Name": "GranuleFileName1", 38 | "SizeInBytes": 10000, 39 | "Size": 10, 40 | "SizeUnit": "KB", 41 | "Format": "NETCDF-4", 42 | "MimeType": "application/x-netcdf", 43 | "FormatType": "Native", 44 | "Checksum": { 45 | "Value": "E51569BF48DD0FD0640C6503A46D4754", 46 | "Algorithm": "MD5" 47 | } 48 | }, { 49 | "Name": "GranuleFileName2", 50 | "SizeInBytes": 1000, 51 | "Size": 1, 52 | "SizeUnit": "KB", 53 | "Format": "ASCII", 54 | "MimeType": "text/plain", 55 | "FormatType": "NA" 56 | }, { 57 | "Name": "GranuleFileName3", 58 | "SizeInBytes": 1000, 59 | "Size": 1, 60 | "SizeUnit": "KB", 61 | "Format": "DMRPP", 62 | "MimeType": "application/vnd.opendap.dap4.dmrpp+xml", 63 | "FormatType": "NA" 64 | }] 65 | }, { 66 | "Name": "SupportedGranuleFileNotInPackage-WithAVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongName", 67 | "SizeInBytes": 11000, 68 | "Size": 11, 69 | "SizeUnit": "KB", 70 | "Format": "NETCDF-CF", 71 | "FormatType": "Supported", 72 | "MimeType": "application/x-netcdf", 73 | "Checksum": { 74 | "Value": "E51569BF48DD0FD0640C6503A46D4755", 75 | "Algorithm": "MD5" 76 | } 77 | }], 78 | "ReprocessingPlanned": "The Reprocessing Planned Statement Value", 79 | "ReprocessingActual": "The Reprocessing Actual Statement Value", 80 | "DayNightFlag" : "Unspecified", 81 | "ProductionDateTime" : "2018-07-19T12:01:01Z", 82 | "Identifiers": [{ 83 | "Identifier": "SMAP_L3_SM_P_20150407_R13080_001.h5", 84 | "IdentifierType": "ProducerGranuleId" 85 | }, { 86 | "Identifier": "LocalVersionIdValue", 87 | "IdentifierType": "LocalVersionId" 88 | }, { 89 | "Identifier": "FeatureIdValue1", 90 | "IdentifierType": "FeatureId" 91 | }, { 92 | "Identifier": "FeatureIdValue2", 93 | "IdentifierType": "FeatureId" 94 | }, { 95 | "Identifier": "12345678911234567892123456789312345678941234567895123456789612345678971234567898123456789912345678901234567891123456789212345678901234567890", 96 | "IdentifierType": "Other", 97 | "IdentifierName": "SomeIdentifierVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongName" 98 | }, { 99 | "Identifier": "CRIDValue", 100 | "IdentifierType": "CRID" 101 | }] 102 | }, 103 | "PGEVersionClass": { 104 | "PGEName": "A PGE Name", 105 | "PGEVersion": "6.0.27" 106 | }, 107 | "TemporalExtent": { 108 | "RangeDateTime": { 109 | "BeginningDateTime": "2018-07-17T00:00:00.000Z", 110 | "EndingDateTime": "2018-07-17T23:59:59.999Z" 111 | } 112 | }, 113 | "SpatialExtent": { 114 | "GranuleLocalities": ["GranuleLocality1", "GranuleLocality2"], 115 | "HorizontalSpatialDomain": { 116 | "ZoneIdentifier": "ZoneIdentifier 1", 117 | "Geometry": { 118 | "Points": [{ 119 | "Longitude": -77, 120 | "Latitude": 88 121 | }, { 122 | "Longitude":10, 123 | "Latitude": 10 124 | }], 125 | "BoundingRectangles": [{ 126 | "WestBoundingCoordinate": -180, 127 | "NorthBoundingCoordinate": 85.04450225830078, 128 | "EastBoundingCoordinate": 180, 129 | "SouthBoundingCoordinate": -85.04450225830078 130 | }], 131 | "GPolygons": [{ 132 | "Boundary" : { 133 | "Points": [ {"Longitude":-10, "Latitude":-10}, {"Longitude":10, "Latitude":-10}, {"Longitude":10, "Latitude":10}, {"Longitude":-10, "Latitude":10}, {"Longitude":-10, "Latitude":-10}] 134 | }, 135 | "ExclusiveZone": { 136 | "Boundaries": [{ 137 | "Points": [{"Longitude":-5, "Latitude":-5}, {"Longitude":-1, "Latitude":-5}, {"Longitude":-1, "Latitude":-1}, {"Longitude":-5, "Latitude":-1}, {"Longitude":-5, "Latitude":-5}] 138 | }, { 139 | "Points": [{"Longitude":0, "Latitude":0}, {"Longitude":5, "Latitude":0}, {"Longitude":5, "Latitude":5}, {"Longitude":0, "Latitude":5}, {"Longitude":0, "Latitude":0}] 140 | }] 141 | } 142 | }], 143 | "Lines": [{ 144 | "Points": [ {"Longitude":-100, "Latitude":-70}, {"Longitude":-88, "Latitude":-66}] 145 | }] 146 | }, 147 | "Track": { 148 | "Cycle": 1, 149 | "Passes": [{ 150 | "Pass": 1, 151 | "Tiles": ["1L","1R","2F"] 152 | }, { 153 | "Pass": 2, 154 | "Tiles": ["3L","3R","4F"] 155 | }] 156 | } 157 | }, 158 | "VerticalSpatialDomains": [{ 159 | "Type": "Atmosphere Layer", 160 | "Value": "Atmosphere Profile" 161 | }, { 162 | "Type": "Pressure", 163 | "Value": "100", 164 | "Unit": "HectoPascals" 165 | }, { 166 | "Type": "Altitude", 167 | "MinimumValue": "10", 168 | "MaximumValue": "100", 169 | "Unit": "Meters" 170 | }] 171 | }, 172 | "OrbitCalculatedSpatialDomains": [{ 173 | "OrbitalModelName": "OrbitalModelName", 174 | "BeginOrbitNumber": 99263, 175 | "EndOrbitNumber": 99263, 176 | "EquatorCrossingLongitude":88.92, 177 | "EquatorCrossingDateTime": "2018-08-16T16:22:21.000Z" 178 | }], 179 | "MeasuredParameters": [{ 180 | "ParameterName": "Parameter Name", 181 | "QAStats": { 182 | "QAPercentMissingData": 10, 183 | "QAPercentOutOfBoundsData": 20, 184 | "QAPercentInterpolatedData": 30, 185 | "QAPercentCloudCover": 40 186 | }, 187 | "QAFlags": { 188 | "AutomaticQualityFlag": "Passed", 189 | "AutomaticQualityFlagExplanation": "Automatic Quality Flag Explanation", 190 | "OperationalQualityFlag": "Passed", 191 | "OperationalQualityFlagExplanation": "Operational Quality Flag Explanation", 192 | "ScienceQualityFlag": "Passed", 193 | "ScienceQualityFlagExplanation": "Science Quality Flag Explanation" 194 | } 195 | }], 196 | "Platforms": [{ 197 | "ShortName": "Aqua", 198 | "Instruments": [{ 199 | "ShortName": "AMSR-E", 200 | "Characteristics": [{ 201 | "Name": "InstrumentCaracteristicName1", 202 | "Value": "150" 203 | }, { 204 | "Name": "InstrumentCaracteristicName2", 205 | "Value": "22F" 206 | }], 207 | "ComposedOf": [{ 208 | "ShortName": "AMSR-E_ChildInstrument", 209 | "Characteristics": [{ 210 | "Name": "ChildInstrumentCharacteristicName3", 211 | "Value": "250" 212 | }], 213 | "OperationalModes": ["Mode3"] 214 | }], 215 | "OperationalModes": ["Mode1", "Mode2"] 216 | }] 217 | }], 218 | "Projects": [{ 219 | "ShortName": "Project1" 220 | }, { 221 | "ShortName": "Project2" 222 | }], 223 | "AdditionalAttributes": [{ 224 | "Name": "AdditionalAttribute1 Name1", 225 | "Values": ["AdditionalAttribute1 Value3", "AdditionalAttribute1 Value4"] 226 | }, { 227 | "Name": "EVI1KM16DAYQCLASSPERCENTAGE", 228 | "Values": ["EVI1KM16DAYQCLASSPERCENTAGE Value5", "EVI1KM16DAYQCLASSPERCENTAGE Value6"] 229 | }, { 230 | "Name": "QAFRACTIONGOODQUALITY", 231 | "Values": ["QAFRACTIONGOODQUALITY Value7", "QAFRACTIONGOODQUALITY Value8"] 232 | }, { 233 | "Name": "QAFRACTIONNOTPRODUCEDCLOUD", 234 | "Values": ["QAFRACTIONNOTPRODUCEDCLOUD Value9", "QAFRACTIONNOTPRODUCEDCLOUD Value10"] 235 | }], 236 | "InputGranules": ["InputGranule1", "InputGranule2"], 237 | "TilingIdentificationSystem": { 238 | "TilingIdentificationSystemName": "MODIS Tile EASE", 239 | "Coordinate1": { 240 | "MinimumValue": -100, 241 | "MaximumValue": -50 242 | }, 243 | "Coordinate2": { 244 | "MinimumValue": 50, 245 | "MaximumValue": 100 246 | } 247 | }, 248 | "CloudCover": 60, 249 | "RelatedUrls": [{ 250 | "URL": "https://daac.ornl.gov/daacdata/islscp_ii/vegetation/erbe_albedo_monthly_xdeg/data/erbe_albedo_1deg_1986.zip", 251 | "Type": "GET DATA", 252 | "Description": "This link provides direct download access to the granule.", 253 | "Format": "ZIP", 254 | "MimeType": "application/zip", 255 | "Size": 395.673, 256 | "SizeUnit": "KB" 257 | }, { 258 | "URL": "https://daac.ornl.gov/ISLSCP_II/guides/erbe_albedo_monthly_xdeg.html", 259 | "Type": "VIEW RELATED INFORMATION", 260 | "Subtype": "USER'S GUIDE", 261 | "Description": "ORNL DAAC Data Set Documentation", 262 | "Format": "HTML", 263 | "MimeType": "text/html" 264 | }, { 265 | "URL": "https://webmap.ornl.gov/sdat/pimg/957_1.png", 266 | "Type": "GET RELATED VISUALIZATION", 267 | "Description": "ISLSCP II EARTH RADIATION BUDGET EXPERIMENT (ERBE) MONTHLY ALBEDO, 1986-1990", 268 | "Format": "PNG", 269 | "MimeType": "image/png", 270 | "Size": 10, 271 | "SizeUnit": "MB" 272 | }], 273 | "NativeProjectionNames": ["MODIS Sinusoidal System", "Sinusoidal"], 274 | "GridMappingNames": ["Sinusoidal", "Lambert Azimuthal Equal-Area"], 275 | "MetadataSpecification": { 276 | "URL": "https://cdn.earthdata.nasa.gov/umm/granule/v1.6.4", 277 | "Name": "UMM-G", 278 | "Version": "1.6.4" 279 | } 280 | } 281 | -------------------------------------------------------------------------------- /tests/fixtures/test_cmr_metadata_echo-c.json: -------------------------------------------------------------------------------- 1 | { 2 | "Collection": { 3 | "ShortName": "MATMNXSLV", 4 | "VersionId": "5.2.0", 5 | "InsertTime": "2019-06-14T00:00:00.000Z", 6 | "LastUpdate": "2018-07-07T00:00:00.000Z", 7 | "DeleteTime": "2018-07-07T00:00:00.000Z", 8 | "LongName": "Not provided", 9 | "DataSetId": "tavgM_2d_slv_Nx: MERRA 2D IAU Diagnostic, Single Level Meteorology, Monthly Mean 0.667 x 0.5 degree V5.2.0 (MATMNXSLV) at GES DISC", 10 | "Description": "The MATMNXSLV or tavgM_2d_slv_Nx data product is the https://impact.earthdata.nasa.gov/32432 http://admg.nasa-impact.net/authenticate/token/ MERRA Data Assimilation System 2-Dimensional http://randomurl1.com atmospheric single-level google.com diagnostics that is time averaged single-level at the native resolution. It is a history file that is produced from the GCM during the corrector segment of the IAU cycle. All collections from this group are at reduced horizontal resolution. MERRA, or the Modern Era Retrospective-analysis for Research and Application, is a NASA reanalysis for the satellite era (30 years 1979-current) using the Goddard Earth Observing System Data Assimilation System Version 5 (GEOS-5 DAS).\n\nThis data product contains 2-dimensional fields that do not vary during the reanalysis.\nThe data are on the GEOS-5 native 540 x 361 grid with 0.667 degree longitude x 0.5 degree latitude resolution. The files contain the monthly mean. Data are archived in the HDF-EOS2 (Grid) format, based on HDF4.\n\nSign Up for the MERRA-2 Mailing List\n\nSign up for the MERRA-2 listserv to receive announcements on the latest data information, tools and services that become available, data announcements from GMAO and more! Contact the GES DISC User Services (gsfc-help-disc@lists.nasa.gov) to be added to the list.\n\nMERRA-2 Science Data and Data Processing Questions\n\nDo you have a question about MERRA/MERRA-2? Take a look at the File Specification Document and if that doesn't answer your question, users can contact staff with questions on the data, data processing and science. Send questions to merra-questions@lists.nasa.gov.", 11 | "DOI": { 12 | "DOI": "10.5067/W3UEUC5V7M9M", 13 | "Authority": "https://doi.org" 14 | }, 15 | "CollectionDataType": "SCIENCE", 16 | "RevisionDate": "2015-07-07T00:00:00.000Z", 17 | "ProcessingLevelId": 4, 18 | "ArchiveCenter": "NASA/GSFC/SED/ESD/GCDC/GESDISC", 19 | "VersionDescription": "The GEOS-5 model version used for processing MERRA", 20 | "CollectionState": "COMPLETE", 21 | "DataFormat": "HDF-EOS", 22 | "SpatialKeywords": { 23 | "Keyword": "GLOBAL" 24 | }, 25 | "Temporal": { 26 | "EndsAtPresentFlag": "false", 27 | "RangeDateTime": { 28 | "BeginningDateTime": "1979-01-01", 29 | "EndingDateTime": "2016-02-29T23:59:5900:00" 30 | } 31 | }, 32 | "Contacts": { 33 | "Contact": [ 34 | { 35 | "Role": "ARCHIVER", 36 | "OrganizationName": "NASA/GSFC/SED/ESD/GCDC/GESDISC" 37 | }, 38 | { 39 | "Role": "TECHNICAL CONTACT", 40 | "ContactPersons": { 41 | "ContactPerson": [ 42 | { 43 | "FirstName": "DANA", 44 | "LastName": "OSTRENGA", 45 | "JobPosition": "METADATA AUTHOR" 46 | }, 47 | { 48 | "FirstName": "MICHAEL", 49 | "LastName": "BOSILOVICH", 50 | "JobPosition": "INVESTIGATOR" 51 | } 52 | ] 53 | } 54 | } 55 | ] 56 | }, 57 | "ScienceKeywords": { 58 | "ScienceKeyword": [ 59 | { 60 | "CategoryKeyword": "EARTH SCIENCE", 61 | "TopicKeyword": "ATMOSPHERE", 62 | "TermKeyword": "ALTITUDE", 63 | "VariableLevel1Keyword": { 64 | "Value": "GEOPOTENTIAL HEIGHT" 65 | } 66 | }, 67 | { 68 | "CategoryKeyword": "EARTH SCIENCE", 69 | "TopicKeyword": "ATMOSPHERE", 70 | "TermKeyword": "ALTITUDE", 71 | "VariableLevel1Keyword": { 72 | "Value": "TROPOPAUSE" 73 | } 74 | }, 75 | { 76 | "CategoryKeyword": "EARTH SCIENCE", 77 | "TopicKeyword": "ATMOSPHERE", 78 | "TermKeyword": "ATMOSPHERIC PRESSURE", 79 | "VariableLevel1Keyword": { 80 | "Value": "SURFACE PRESSURE" 81 | } 82 | }, 83 | { 84 | "CategoryKeyword": "EARTH SCIENCE", 85 | "TopicKeyword": "ATMOSPHERE", 86 | "TermKeyword": "ATMOSPHERIC TEMPERATURE", 87 | "VariableLevel1Keyword": { 88 | "Value": "SURFACE TEMPERATURE" 89 | } 90 | }, 91 | { 92 | "CategoryKeyword": "EARTH SCIENCE", 93 | "TopicKeyword": "ATMOSPHERE", 94 | "TermKeyword": "ATMOSPHERIC WATER VAPOR" 95 | } 96 | ] 97 | }, 98 | "Platforms": { 99 | "Platform": { 100 | "ShortName": "MERRA", 101 | "LongName": "Modern-Era Retrospective Analysis for Research and Applications", 102 | "Type": "Models/Analyses", 103 | "Instruments": { 104 | "Instrument": { 105 | "ShortName": "NOT APPLICABLE" 106 | } 107 | } 108 | } 109 | }, 110 | "Campaigns": { 111 | "Campaign": { 112 | "ShortName": "MERRA", 113 | "LongName": "Modern Era Retrospective-analysis for Research and Applications" 114 | } 115 | }, 116 | "OnlineAccessURLs": { 117 | "OnlineAccessURL": [ 118 | { 119 | "URL": "https://goldsmr2.gesdisc.eosdis.nasa.gov/data/MERRA_MONTHLY/MATMNXSLV.5.2.0/", 120 | "URLDescription": "Access the data via HTTP." 121 | }, 122 | { 123 | "URL": "https://search.earthdata.nasa.gov/search?q=MATMNXSLV", 124 | "URLDescription": "Use the Earthdata Search to find and retrieve data sets across multiple data centers." 125 | } 126 | ] 127 | }, 128 | "OnlineResources": { 129 | "OnlineResource": [ 130 | { 131 | "URL": "https://disc.gsfc.nasa.gov/information/howto?tags=MERRA", 132 | "Description": "How to read and plot the data.", 133 | "Type": "VIEW RELATED INFORMATION : HOW-TO" 134 | }, 135 | { 136 | "URL": "https://disc.gsfc.nasa.gov/datacollection/MATMNXSLV_5.2.0.html", 137 | "Description": "Access the dataset landing page from the GES DISC website.", 138 | "Type": "CollectionURL : DATA SET LANDING PAGE" 139 | }, 140 | { 141 | "URL": "https://disc.gsfc.nasa.gov/SSW/", 142 | "Description": "Use the Simple Subset Wizard (SSW) to submit subset requests for data sets across multiple data centers from a single unified interface.", 143 | "Type": "GOTO WEB TOOL : SIMPLE SUBSET WIZARD (SSW)" 144 | }, 145 | { 146 | "URL": "https://goldsmr2.gesdisc.eosdis.nasa.gov/dods/MATMNXSLV.info", 147 | "Description": "The GrADS Data Server (GDS) is another form of OPeNDAP that provides subsetting and some analysis services across the Internet.", 148 | "Type": "USE SERVICE API : GRADS DATA SERVER (GDS)" 149 | }, 150 | { 151 | "URL": "https://goldsmr2.gesdisc.eosdis.nasa.gov/opendap/MERRA_MONTHLY/MATMNXSLV.5.2.0/contents.html", 152 | "Description": "Access the data via the OPeNDAP protocol.", 153 | "Type": "USE SERVICE API : OPENDAP DATA" 154 | }, 155 | { 156 | "URL": "https://gmao.gsfc.nasa.gov/", 157 | "Description": "The GMAO home page", 158 | "Type": "CollectionURL : PROJECT HOME PAGE" 159 | }, 160 | { 161 | "URL": "https://gmao.gsfc.nasa.gov/products/documents/MERRA_File_Specification.pdf", 162 | "Description": "MERRA File Specification Document", 163 | "Type": "VIEW RELATED INFORMATION : PI DOCUMENTATION" 164 | }, 165 | { 166 | "URL": "https://goldsmr2.gesdisc.eosdis.nasa.gov/data/MERRA_MONTHLY/MATMNXSLV.5.2.0/doc/MERRA.README.pdf", 167 | "Description": "README Document", 168 | "Type": "VIEW RELATED INFORMATION : READ-ME" 169 | } 170 | ] 171 | }, 172 | "Spatial": { 173 | "HorizontalSpatialDomain": { 174 | "Geometry": { 175 | "CoordinateSystem": "CARTESIAN", 176 | "BoundingRectangle": { 177 | "WestBoundingCoordinate": "-180", 178 | "NorthBoundingCoordinate": "90", 179 | "EastBoundingCoordinate": "180", 180 | "SouthBoundingCoordinate": "-90" 181 | } 182 | } 183 | }, 184 | "GranuleSpatialRepresentation": "CARTESIAN" 185 | }, 186 | "AssociatedBrowseImageUrls": { 187 | "ProviderBrowseUrl": [ 188 | { 189 | "URL": "https://docserver.gesdisc.eosdis.nasa.gov/public/project/Images/MATMNXSLV_5.2.0.png", 190 | "Description": "MATMNXSLV variable" 191 | }, 192 | { 193 | "URL": "https://giovanni.gsfc.nasa.gov/giovanni/#variableFacets=dataProductPlatformInstrument%3AMERRA%20Model%3B", 194 | "Description": "The GES-DISC Interactive Online Visualization ANd aNalysis Interface (Giovanni) is a web-based tool that allows users to interactively visualize and analyze data." 195 | } 196 | ] 197 | } 198 | } 199 | } -------------------------------------------------------------------------------- /tests/fixtures/validator.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | WORKING_DIR = os.getcwd() 4 | FIXTURE_PATH = os.path.join(WORKING_DIR, "tests/fixtures/") 5 | 6 | INPUT_OUTPUT = { 7 | "date_datetime_iso_format_check": [ 8 | { 9 | "input": "2016-06-14T00:00:00.000Z", 10 | "output": True, 11 | }, 12 | { 13 | "input": "2016-06-1400:00:00.000", 14 | "output": False, 15 | }, 16 | ], 17 | "get_path_value": [ 18 | { 19 | "input": "Contacts/Contact/ContactPersons/ContactPerson/glabb", 20 | "output": set(), 21 | }, 22 | { 23 | "input": "Contacts/Contact/ContactPersons/ContactPerson/blabla", 24 | "output": {"BOSILOVICH"}, 25 | }, 26 | { 27 | "input": "Contacts/Contact/ContactPersons/ContactPerson/FirstName", 28 | "output": {"DANA", "SLESA", "MICHAEL"}, 29 | }, 30 | ], 31 | } 32 | -------------------------------------------------------------------------------- /tests/test_checker.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures.checker import FUNCTION_MAPPING 2 | from pyQuARC.code.checker import Checker 3 | from tests.common import read_test_metadata 4 | 5 | 6 | class TestChecker: 7 | """ 8 | Test cases for the Checker script in checker.py 9 | """ 10 | 11 | def setup_method(self): 12 | self.checker = Checker() 13 | self.test_metadata = read_test_metadata() 14 | 15 | def test_run(self): 16 | result = self.checker.run(self.test_metadata) 17 | assert result 18 | 19 | def test_map_to_function(self): 20 | for in_, out_ in zip(FUNCTION_MAPPING["input"], FUNCTION_MAPPING["output"]): 21 | result = self.checker.map_to_function(in_["datatype"], in_["function"]) 22 | assert bool(callable(result)) == out_ 23 | -------------------------------------------------------------------------------- /tests/test_custom_checker.py: -------------------------------------------------------------------------------- 1 | from xmltodict import parse 2 | from pyQuARC.code.custom_checker import CustomChecker 3 | from tests.fixtures.custom_checker import INPUT_OUTPUT 4 | from tests.common import read_test_metadata 5 | 6 | 7 | class TestCustomChecker: 8 | """ 9 | Test cases for the CustomChecker script in custom_checker.py 10 | """ 11 | 12 | def setup_method(self): 13 | self.custom_checker = CustomChecker() 14 | self.dummy_metadata = parse(read_test_metadata()) 15 | 16 | def test_get_path_value(self): 17 | in_out = INPUT_OUTPUT["get_path_value"] 18 | for _in, _out in zip(in_out["input"], in_out["output"]): 19 | assert CustomChecker._get_path_value(self.dummy_metadata, _in) == _out 20 | 21 | dummy_dif_metadata = { 22 | "CollectionCitations": [ 23 | { 24 | "Creator": "Kamel Didan", 25 | "OnlineResource": { 26 | "Linkage": "https://doi.org/10.5067/MODIS/MOD13Q1.061", 27 | "Name": "DOI Landing Page", 28 | }, 29 | "OtherCitationDetails": "The DOI landing page provides citations in APA and Chicago styles.", 30 | "Publisher": "NASA EOSDIS Land Processes DAAC", 31 | "ReleaseDate": "2021-02-16", 32 | "SeriesName": "MOD13Q1.061", 33 | "Title": "MODIS/Terra Vegetation Indices 16-Day L3 Global 250m SIN Grid V061", 34 | } 35 | ], 36 | "MetadataDates": [ 37 | {"Type": "CREATE", "Date": "2021-09-15T15:54:00.000Z"}, 38 | {"Type": "UPDATE", "Date": "2021-09-30T15:54:00.000Z"}, 39 | ], 40 | "DOI": {"Authority": "https://doi.org", "DOI": "10.5067/MODIS/MOD13Q1.061"}, 41 | "SpatialExtent": { 42 | "GranuleSpatialRepresentation": "GEODETIC", 43 | "HorizontalSpatialDomain": { 44 | "Geometry": { 45 | "BoundingRectangles": [ 46 | { 47 | "EastBoundingCoordinate": 180.0, 48 | "NorthBoundingCoordinate": 85, 49 | "SouthBoundingCoordinate": 89, 50 | "WestBoundingCoordinate": -180.0, 51 | } 52 | ], 53 | "CoordinateSystem": "CARTESIAN", 54 | }, 55 | "ResolutionAndCoordinateSystem": { 56 | "HorizontalDataResolution": { 57 | "GriddedResolutions": [ 58 | { 59 | "Unit": "Meters", 60 | "XDimension": 250.0, 61 | "YDimension": 250.0, 62 | } 63 | ] 64 | } 65 | }, 66 | "ZoneIdentifier": "MODIS Sinusoidal Tiling System", 67 | }, 68 | "SpatialCoverageType": "HORIZONTAL", 69 | }, 70 | } 71 | 72 | assert CustomChecker._get_path_value( 73 | dummy_dif_metadata, "CollectionCitations/Creator" 74 | ) == ["Kamel Didan"] 75 | 76 | assert CustomChecker._get_path_value( 77 | dummy_dif_metadata, "CollectionCitations/OnlineResource/Name" 78 | ) == ["DOI Landing Page"] 79 | 80 | assert CustomChecker._get_path_value( 81 | dummy_dif_metadata, "MetadataDates/Date?Type=UPDATE" 82 | ) == ["2021-09-30T15:54:00.000Z"] 83 | 84 | assert CustomChecker._get_path_value( 85 | dummy_dif_metadata, "MetadataDates/Date?Type=CREATE" 86 | ) == ["2021-09-15T15:54:00.000Z"] 87 | 88 | assert CustomChecker._get_path_value(dummy_dif_metadata, "DOI/DOI") == [ 89 | "10.5067/MODIS/MOD13Q1.061" 90 | ] 91 | 92 | assert CustomChecker._get_path_value( 93 | dummy_dif_metadata, 94 | "SpatialExtent/HorizontalSpatialDomain/Geometry/BoundingRectangles/WestBoundingCoordinate", 95 | ) == [-180.0] 96 | 97 | assert CustomChecker._get_path_value( 98 | dummy_dif_metadata, "SpatialExtent/GranuleSpatialRepresentation" 99 | ) == ["GEODETIC"] 100 | -------------------------------------------------------------------------------- /tests/test_datetime_validator.py: -------------------------------------------------------------------------------- 1 | from pyQuARC.code.datetime_validator import DatetimeValidator 2 | from tests.fixtures.validator import INPUT_OUTPUT 3 | 4 | 5 | class TestValidator: 6 | """ 7 | Test cases for the validator script in validator.py 8 | """ 9 | 10 | def setup_method(self): 11 | pass 12 | 13 | def test_datetime_iso_format_check(self): 14 | for input_output in INPUT_OUTPUT["date_datetime_iso_format_check"]: 15 | assert ( 16 | DatetimeValidator.iso_format_check(input_output["input"])["valid"] 17 | ) == input_output["output"] 18 | 19 | def test_datetime_compare(self): 20 | pass 21 | -------------------------------------------------------------------------------- /tests/test_downloader.py: -------------------------------------------------------------------------------- 1 | from pyQuARC.code.downloader import Downloader 2 | 3 | 4 | class TestDownloader: 5 | """ 6 | Test cases for the methods in Downloader class in downloder.py 7 | """ 8 | 9 | def setup_method(self): 10 | self.concept_ids = { 11 | "collection": { 12 | "real": "C1339230297-GES_DISC", 13 | "dummy": "C123456-LPDAAC_ECS", 14 | }, 15 | "granule": { 16 | "real": "G1370895082-GES_DISC", 17 | "dummy": "G1000000002-CMR_PROV", 18 | }, 19 | "invalid": "asdfasdf", 20 | } 21 | 22 | def test_download(self): 23 | # self.assertEqual() 24 | # this should return a status_code that must be 200 or 404 25 | # hitting the URL we are supposed to hit 26 | # getting results from that in proper format (asking for echo10, we get echo10) 27 | # store the content in a variable or fill up the error list 28 | # if the concept ID is a collection concept id, it downloads collection and not granule. same for granule 29 | # assert "h" in "this" 30 | pass 31 | 32 | # def test_validate(self): 33 | # pass 34 | 35 | def test_concept_id_type_collection(self): 36 | assert ( 37 | Downloader._concept_id_type(self.concept_ids["collection"]["dummy"]) 38 | == Downloader.COLLECTION 39 | ) 40 | 41 | def test_concept_id_type_granule(self): 42 | assert ( 43 | Downloader._concept_id_type(self.concept_ids["granule"]["dummy"]) 44 | == Downloader.GRANULE 45 | ) 46 | 47 | def test_concept_id_type_invalid(self): 48 | assert ( 49 | Downloader._concept_id_type(self.concept_ids["invalid"]) 50 | == Downloader.INVALID 51 | ) 52 | 53 | def test_construct_url_collection(self): 54 | real_collection = self.concept_ids["collection"]["real"] 55 | downloader = Downloader(real_collection, "echo-c") 56 | assert ( 57 | downloader._construct_url() 58 | == f"https://cmr.earthdata.nasa.gov/search/concepts/{real_collection}.echo10" 59 | ) 60 | 61 | def test_construct_url_granule(self): 62 | real_granule = self.concept_ids["granule"]["real"] 63 | downloader = Downloader(real_granule, "echo-g") 64 | 65 | assert ( 66 | downloader._construct_url() 67 | == f"https://cmr.earthdata.nasa.gov/search/concepts/{real_granule}.echo10" 68 | ) 69 | 70 | def test_log_error(self): 71 | # create a dummy granule downloader 72 | dummy_granule = self.concept_ids["granule"]["dummy"] 73 | downloader = Downloader(dummy_granule, "echo-g") 74 | 75 | downloader.log_error("invalid_concept_id", {"concept_id": dummy_granule}) 76 | 77 | downloader.log_error( 78 | "request_failed", 79 | { 80 | "concept_id": dummy_granule, 81 | "url": "https://dummy.url", 82 | "status_code": 404, 83 | }, 84 | ) 85 | 86 | assert downloader.errors == [ 87 | {"type": "invalid_concept_id", "details": {"concept_id": dummy_granule}}, 88 | { 89 | "type": "request_failed", 90 | "details": { 91 | "concept_id": dummy_granule, 92 | "url": "https://dummy.url", 93 | "status_code": 404, 94 | }, 95 | }, 96 | ] 97 | 98 | def test_download_invalid_concept_id(self): 99 | invalid_concept_id = self.concept_ids["invalid"] 100 | downloader = Downloader(invalid_concept_id, "echo-c") 101 | 102 | downloader.download() 103 | 104 | assert len(downloader.errors) == 1 105 | assert downloader.errors == [ 106 | { 107 | "type": "invalid_concept_id", 108 | "details": {"concept_id": self.concept_ids["invalid"]}, 109 | } 110 | ] 111 | 112 | def test_download_dummy_collection_no_errors(self): 113 | dummy_collection = self.concept_ids["collection"]["dummy"] 114 | downloader = Downloader(dummy_collection, "echo-c") 115 | 116 | downloader.download() 117 | 118 | assert len(downloader.errors) == 1 119 | assert downloader.errors == [ 120 | { 121 | "type": "request_failed", 122 | "details": { 123 | "concept_id": dummy_collection, 124 | "url": f"https://cmr.earthdata.nasa.gov/search/concepts/{dummy_collection}.echo10", 125 | "status_code": 404, 126 | "message": "Something went wrong while downloading the requested metadata. Make sure all the inputs are correct.", 127 | "details": "N/A", 128 | }, 129 | } 130 | ] 131 | 132 | def test_download_real_collection_no_errors(self): 133 | real_collection = self.concept_ids["collection"]["real"] 134 | downloader = Downloader(real_collection, "echo-c") 135 | 136 | downloader.download() 137 | 138 | # is the concept id valid and is the request going through? 139 | assert downloader.errors == [] 140 | 141 | def test_download_dummy_granule_no_errors(self): 142 | dummy_granule = self.concept_ids["granule"]["dummy"] 143 | downloader = Downloader(dummy_granule, "echo-g") 144 | 145 | downloader.download() 146 | 147 | assert len(downloader.errors) == 1 148 | assert downloader.errors == [ 149 | { 150 | "type": "request_failed", 151 | "details": { 152 | "concept_id": dummy_granule, 153 | "url": f"https://cmr.earthdata.nasa.gov/search/concepts/{dummy_granule}.echo10", 154 | "status_code": 404, 155 | "message": "Something went wrong while downloading the requested metadata. Make sure all the inputs are correct.", 156 | "details": "N/A", 157 | }, 158 | } 159 | ] 160 | 161 | def test_download_real_granule_no_errors(self): 162 | real_collection = self.concept_ids["granule"]["real"] 163 | downloader = Downloader(real_collection, "echo-c") 164 | 165 | downloader.download() 166 | 167 | # is the concept id valid and is the request going through? 168 | assert downloader.errors == [] 169 | -------------------------------------------------------------------------------- /tests/test_schema_validator.py: -------------------------------------------------------------------------------- 1 | import os 2 | from xmltodict import parse 3 | from pyQuARC.code.schema_validator import SchemaValidator 4 | 5 | KEYS = ["no_error_metadata", "bad_syntax_metadata", "test_cmr_metadata"] 6 | 7 | 8 | class TestSchemaValidator: 9 | def setup_method(self): 10 | self.data = self.read_data() 11 | self.schema_validator = SchemaValidator(None) 12 | 13 | def read_data(self): 14 | result = {} 15 | for data_key in KEYS: 16 | # os.path.join(os.getcwd(), DUMMY_METADATA_FILE_PATH) 17 | with open( 18 | os.path.join(os.getcwd(), f"tests/fixtures/{data_key}.echo-c"), "r" 19 | ) as myfile: 20 | result[data_key] = myfile.read().encode() 21 | return result 22 | 23 | def test_xml_validator(self): 24 | for data_key in KEYS: 25 | assert self.schema_validator.run_xml_validator(self.data[data_key]) 26 | -------------------------------------------------------------------------------- /tests/test_string_validator.py: -------------------------------------------------------------------------------- 1 | from pyQuARC.code.string_validator import StringValidator 2 | 3 | 4 | class TestValidator: 5 | """ 6 | Test cases for the validator script in validator.py 7 | """ 8 | 9 | def setup_method(self): 10 | pass 11 | 12 | def test_string_length_check(self): 13 | pass 14 | 15 | def test_string_compare(self): 16 | pass 17 | 18 | def test_processing_level_id_check(self): 19 | pass 20 | 21 | def test_url_health_check(self): 22 | pass 23 | 24 | def test_doi_check(self): 25 | pass 26 | --------------------------------------------------------------------------------